Problem Statement
🧩 Syntax:
Code:
def Model(params,plot_dir=None):
train_split_less_data = params['train_split_less_data']
val_split_less_data = params['val_split_less_data']
val_split_less_data = train_split_less_data+val_split_less_data
#Reading Input File & and defining target column
#params is a dictionary that contains dataframe and some integer variables.
input_file = params['product_data'].copy()
input_file.sort_values('YEAR_WEEK',inplace=True)
input_file = input_file.reset_index(drop=True)
# input_file = input_file.fillna(0)
#%%
#Defining certain FU level variables to be used as key-references
FU_name = input_file['FORECASTING_UNIT_KEY'].reset_index(drop=True)[0]
FU_no = input_file['FORECASTING_UNIT#'].reset_index(drop=True)[0]
FU_no = str(FU_no)+'_SE'
less_data_flag = params['less_data_flag']
# FU_seasonality = params['product_seasonality']
#Defining Certain variables to be used as duration for train, validation, test & forecast
#params is a dictionary that contains dataframe and some integer variables.
start_train_week = params['start_train_week']
end_train_week = params['end_train_week']
start_validation_week = params['start_validation_week']
end_validation_week = params['end_validation_week']
start_test_week = params['start_test_week']
end_test_week = params['end_test_week']
# start_seasonal_week = params['start_seasonal_week']
# end_seasonal_week = params['end_seasonal_week']
#Defining Certain variables to be used as index for train, validation, test & forecast
start_train_index = input_file.loc[(input_file['YEAR_WEEK']==start_train_week)].index[0]
end_train_index = input_file.loc[(input_file['YEAR_WEEK']==end_train_week)].index[0]+1
start_validation_index = input_file.loc[(input_file['YEAR_WEEK']==start_validation_week)].index[0]
end_validation_index = input_file.loc[(input_file['YEAR_WEEK']==end_validation_week)].index[0]+1
start_test_index = input_file.loc[(input_file['YEAR_WEEK']==start_test_week)].index[0]
end_test_index = input_file.loc[(input_file['YEAR_WEEK']==end_test_week)].index[0]+1
# start_seasonal_index = input_file.loc[(input_file['YEAR_WEEK']==start_seasonal_week)].index[0]
# end_seasonal_index = input_file.loc[(input_file['YEAR_WEEK']==end_seasonal_week)].index[0]+1
#Total Weeks in validation test and forecast
forecast_weeks = params['forecast_weeks']
validation_weeks = end_validation_index-start_validation_index
test_weeks = end_test_index-start_test_index
#Data Used for forecasting
start_retrain_index = start_train_index+validation_weeks+test_weeks
end_retrain_index = end_train_index+validation_weeks+test_weeks
#%%
#Changing train,test when less data is available
if less_data_flag == 'LESS_DATA':
input_file = input_file[input_file['PURCHASE_QTY'].first_valid_index():len(input_file)].reset_index(drop=True)
total_length = len(input_file)
#Defining Certain variables to be used as index for train, validation, test & forecast
start_train_index = input_file.index[0]
end_train_index = int(total_length*train_split_less_data)
start_validation_index = end_train_index
end_validation_index = int(total_length*val_split_less_data)
start_test_index = end_validation_index
end_test_index = total_length
start_seasonal_index = start_train_index
end_seasonal_index = end_test_index
start_train_week = input_file[input_file.index == start_train_index].reset_index(drop=True)['YEAR_WEEK'][0]
end_train_week = input_file[input_file.index == (end_train_index-1)].reset_index(drop=True)['YEAR_WEEK'][0]
start_validation_week = input_file[input_file.index == start_validation_index].reset_index(drop=True)['YEAR_WEEK'][0]
end_validation_week = input_file[input_file.index == (end_validation_index-1)].reset_index(drop=True)['YEAR_WEEK'][0]
start_test_week = input_file[input_file.index == start_test_index].reset_index(drop=True)['YEAR_WEEK'][0]
end_test_week = input_file[input_file.index == (end_test_index-1)].reset_index(drop=True)['YEAR_WEEK'][0]
start_seasonal_week = start_train_week
end_seasonal_week = end_test_week
#Total Weeks in validation test and forecast
forecast_weeks = params['forecast_weeks']
validation_weeks = end_validation_index-start_validation_index
test_weeks = end_test_index-start_test_index
#Data Used for forecasting
start_retrain_index = start_train_index+validation_weeks+test_weeks
end_retrain_index = end_train_index+validation_weeks+test_weeks
input_file = input_file.fillna(0)
#%%
#filtering from train to test as per user decision
input_file = input_file.iloc[start_train_index:end_test_index]
input_file[['Year','Week']]=input_file['YEAR_WEEK'].str.split('-',expand=True)
#%%
input_file = input_file[['YEAR_WEEK','Year','Week','PURCHASE_QTY']]
#Splitting the dataset into train,validation,test & forecast
train = input_file.iloc[start_train_index:end_train_index]
validation = input_file.iloc[start_validation_index:end_validation_index]
# test = input_file.iloc[start_test_index:end_test_index]
#creating the retrain dataset
retrain = input_file.iloc[start_retrain_index:end_retrain_index]
#creating the forecast dataset
input_file['Year'] = input_file['Year'].astype('int')
input_file['Week'] = input_file['Week'].astype('int')
year = input_file['Year'].max()
week_forecast = input_file[input_file['Year']==year]['Week'].max()+1
# 1. Calendar_dim table
calendar_dim=pd.read_csv('calendar_dim.csv') # for testing purposes
calendar_dim['yearweek']=calendar_dim['yearweek'].apply(lambda x: int(x))
calendar_dim = calendar_dim[['yearweek', 'Year', 'Week']].drop_duplicates()
## First week of the forecast: it is always the week after the testing period finishes
calendar_dim_a = calendar_dim[(calendar_dim['Year']==year)]
week_max = calendar_dim_a[calendar_dim_a['Year']==year]['Week'].max()
week_max= int(week_max)
week_forecast = week_forecast
year_forecast = year
## Taking care of the years with 53 weeks
forecast=pd.DataFrame(index=range(0,forecast_weeks),columns=['Year','Week'])
for i in range(0,forecast_weeks):
if(week_forecast>week_max):
year +=1
week_forecast = 1
forecast.loc[i,'Week']= week_forecast
forecast.loc[i,'Year']= year
calendar_dim_a = calendar_dim[(calendar_dim['Year']==year)]
week_max = calendar_dim_a[calendar_dim_a['Year']==year]['Week'].max()
week_max= int(week_max)
week_forecast +=1
forecast['Year'] = forecast['Year'].astype(str)
forecast['Week'] = forecast['Week'].astype(str).apply(lambda x: x.zfill(3))
forecast['YEAR_WEEK']= forecast['Year']+'-'+forecast['Week']
#%%
'''IMPLEMENTING THE GRID-SEARCH'''
alpha_list = [i/10 for i in range(0,10,1)]
# beta_list = [i/10 for i in range(0,10,1)]
# gamma_list = [i/10 for i in range(0,10,1)]
model = SimpleExpSmoothing(np.asarray(train['PURCHASE_QTY']))
#%%
#HyperParameter Tuning
grid_search_df = pd.DataFrame()
start_time = time.time()
for alpha in alpha_list:
smoothed = model.fit(smoothing_level=alpha,optimized=True)
train_pred = smoothed.predict(start_train_index,end_train_index-1)
validation_pred = smoothed.predict(start_validation_index,end_validation_index-1)
train_error_df = pd.DataFrame({'Train':np.array(train['PURCHASE_QTY']),
'Train_Pred':train_pred,
})
validation_error_df = pd.DataFrame({'validation':np.array(validation['PURCHASE_QTY']),
'validation_Pred':validation_pred,
})
validation_error_df['SE'] = np.power(validation_error_df['validation']-validation_error_df['validation_Pred'],2)
train_error_df['SE'] = np.power(train_error_df['Train']-train_error_df['Train_Pred'],2)
validation_MSE = validation_error_df['SE'].mean()
train_mse = train_error_df['SE'].mean()
row_dict = {'alpha':alpha,'Train_MSE':train_mse,'validation_MSE':validation_MSE}
grid_search_df = grid_search_df.append(row_dict,ignore_index=True)
print("--- %s seconds ---" % (time.time() - start_time))
grid_search_df['validation_MSE'] = grid_search_df['validation_MSE']/grid_search_df['validation_MSE'].max()
grid_search_df['Train_MSE'] = grid_search_df['Train_MSE']/grid_search_df['Train_MSE'].max()
grid_search_df['SUM'] = grid_search_df['validation_MSE']+grid_search_df['Train_MSE']
# grid_search_df[['Train_MSE','validation_MSE']].plot()
#%%
#Parameter Selection
best_abg = grid_search_df[grid_search_df['SUM']==grid_search_df['SUM'].min()][['alpha']]
best_abg = [tuple(x) for x in best_abg.values]
alpha = best_abg[0][0]
#%%
prediction_matrix = input_file[['YEAR_WEEK','Year','Week','PURCHASE_QTY']].copy()
fit = model.fit(smoothing_level=alpha,optimized=True)
#validate -1 in below line
prediction_matrix['predicted_PURCHASE_QTY'] = fit.predict(start_train_index,end_test_index-1)
print("Length of Train Forecast=", len(fit.predict(start_train_index,end_test_index-1)))
train_matrix = prediction_matrix.iloc[start_train_index:end_train_index]
validation_matrix = prediction_matrix.iloc[start_validation_index:end_validation_index]
test_matrix = prediction_matrix.iloc[start_test_index:end_test_index]
#%%
model = SimpleExpSmoothing(input_file['PURCHASE_QTY'][start_train_index:end_validation_index])
fit = model.fit(smoothing_level=alpha, optimized=True)
test_matrix['predicted_PURCHASE_QTY'] = np.array(fit.predict(start_test_index,end_test_index-1).reset_index(drop=True))
#error calculations of historical data
#%%
#Foreward forecast
forecast_matrix = forecast[['YEAR_WEEK','Year','Week']].copy()
model = SimpleExpSmoothing(retrain['PURCHASE_QTY'])
fit = model.fit(smoothing_level=alpha,optimized=True)
#validate -1 in below line
forecast_matrix['predicted_PURCHASE_QTY'] = fit.forecast(forecast_weeks-1).reset_index(drop=True)
print("Length of Forecast=", len(fit.predict(end_test_index,end_test_index+forecast_weeks-1)))
#%%
def my_plot_func(plot_dir):
colors = cycler('color',
['#EE6666', '#3388BB', '#9988DD',
'#EECC55', '#88BB44', '#FFBBBB'])
tick_spacing = 10
fig, ax = plt.subplots(1,1,figsize=(20,8))
ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))
ax.plot(train_matrix['YEAR_WEEK'],train_matrix['PURCHASE_QTY'],label='Train',color='orange')
ax.plot(train_matrix['YEAR_WEEK'],train_matrix['predicted_PURCHASE_QTY'],label='Train Predicted',color='blue')
ax.plot(validation_matrix['YEAR_WEEK'],validation_matrix['predicted_PURCHASE_QTY'],label='Validation Predicted',color='purple')
ax.plot(validation_matrix['YEAR_WEEK'],validation_matrix['PURCHASE_QTY'],label='Validation',color='yellow')
ax.plot(test_matrix['YEAR_WEEK'],test_matrix['predicted_PURCHASE_QTY'],label='Test Predicted',color='green')
ax.plot(test_matrix['YEAR_WEEK'],test_matrix['PURCHASE_QTY'],label='Test',color='violet')
ax.plot(forecast_matrix['YEAR_WEEK'],forecast_matrix['predicted_PURCHASE_QTY'],label='Forecast',color='red')
plt.legend(loc='upper left', fontsize=8)
plt.title(FU_name+'_FORECAST'+'_SE')
plt.rc('axes', facecolor='#E6E6E6', edgecolor='none',axisbelow=True, grid=True, prop_cycle=colors)
plt.rc('grid', color='w', linestyle='solid')
plt.xticks(rotation=90)
plt.savefig(plot_dir+'/'+str(FU_no)+'_FORECAST.png')
return None
my_plot_func(plot_dir)
#%%
forecast_matrix['FORECASTING_UNIT_KEY'] = FU_name
forecast_matrix = forecast_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','predicted_PURCHASE_QTY']]
def month_level_mape(matrix):
df=matrix.copy()
df=df.sort_values(by=['Year','Week']).reset_index()
df['count'] = df.index + 1
df['temp'] = 0
df.loc[(df['count']-1) % 4 == 0,'temp'] = 1
df['month_count'] = df['temp'].cumsum()
df.loc[len(df)-2,'month_count']= df.loc[len(df)-3,'month_count']
df.loc[len(df)-1,'month_count']= df.loc[len(df)-3,'month_count']
df = df.groupby('month_count',as_index=False).agg({
'predicted_PURCHASE_QTY': 'sum',
'PURCHASE_QTY':'sum'
})
monthly_ape = np.abs((df['predicted_PURCHASE_QTY'] - df['PURCHASE_QTY'])/df['PURCHASE_QTY'])
monthly_ape = monthly_ape[monthly_ape!=np.inf]
monthly_mape = np.mean(monthly_ape)
return monthly_ape,monthly_mape
train_err, train_mape_monthy = month_level_mape(train_matrix)
validation_err, validation_mape_monthy = month_level_mape(validation_matrix)
test_err, test_mape_monthy = month_level_mape(test_matrix)
train_matrix['FORECASTING_UNIT_KEY'] = FU_name
train_matrix = train_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','PURCHASE_QTY','predicted_PURCHASE_QTY']]
validation_matrix['FORECASTING_UNIT_KEY'] = FU_name
validation_matrix = validation_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','PURCHASE_QTY','predicted_PURCHASE_QTY']]
test_matrix['FORECASTING_UNIT_KEY'] = FU_name
test_matrix = test_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','PURCHASE_QTY','predicted_PURCHASE_QTY']]
return(forecast_matrix,train_mape_monthy,validation_mape_monthy,test_mape_monthy,train_matrix,validation_matrix,test_matrix)