Advertisements
The code
from pyspark.sql.functions import
from pyspark.sql.functions import *
start_date_str = dbutils.widgets.get("startdate")
start_date = to_date(lit(start_date_str), 'yyyy-MM-dd')
end_date = add_months(start_date_str, 12 * 10)
end_date_str = (spark
.range(1)
.select(add_months(start_date, 12 * 10).alias("end_date"))
.collect()[0]["end_date"])
date_range = spark.range(0, (end_date_str - start_date_str).days, 1) \
.withColumn("date", date_add(lit(start_date_str), col("id").cast("date")))
dim_time = date_range \
.withColumn("day_of_week", date_format(col("date"), "EEEE")) \
.withColumn("current_day", when(col("date") == current_date(), 1).otherwise(0)) \
.withColumn("working_day", when(col("day_of_week").isin(["Saturday", "Sunday"]), 0).otherwise(1)) \
.withColumn("month_id", month(col("date")).cast("smallint")) \
.withColumn("month_desc", date_format(col("date"), "MMM")) \
.withColumn("quarter_id", quarter(col("date")).cast("smallint")) \
.withColumn("quarter_desc", concat(lit("Q"), quarter(col("date")).cast("string"))) \
.withColumn("year", year(col("date")).cast("smallint")) \
.select("date", "day_of_week", "current_day", "working_day", "month_id", "month_desc", "quarter_id", "quarter_desc", "year")
TypeError: unsupported operand type(s) for -: ‘datetime.date’ and ‘str’
i want this error to get rectified.
>Solution :
I got the same error when I tried to reproduce the code. The error is within the usage of range
.
- Using the following code, I was able to get what you might be trying to achieve:
from datetime import date
from dateutil.relativedelta import relativedelta
ls = start_date_str.split('-')
start_date = date(int(ls[0]),int(ls[1]),int(ls[2]))
end_date = start_date + relativedelta(months=12*10)
print(start_date,end_date)
date_range = spark.range(0, (end_date - start_date).days, 1) \
.withColumn("date", date_add(lit(start_date), col("id").cast("int")))
dim_time = date_range \
.withColumn("day_of_week", date_format(col("date"), "EEEE")) \
.withColumn("current_day", when(col("date") == current_date(), 1).otherwise(0)) \
.withColumn("working_day", when(col("day_of_week").isin(["Saturday", "Sunday"]), 0).otherwise(1)) \
.withColumn("month_id", month(col("date")).cast("smallint")) \
.withColumn("month_desc", date_format(col("date"), "MMM")) \
.withColumn("quarter_id", quarter(col("date")).cast("smallint")) \
.withColumn("quarter_desc", concat(lit("Q"), quarter(col("date")).cast("string"))) \
.withColumn("year", year(col("date")).cast("smallint")) \
.select("date", "day_of_week", "current_day", "working_day", "month_id", "month_desc", "quarter_id", "quarter_desc", "year")
#display(dim_time)