Code: def Model(params,plot_dir=None): train_split_less_data = params['train_split_less_data'] val_split_less_data = params['val_split_less_data'] val_split_less_data = train_split_less_data+val_split_less_data #Reading Input File & and defining target column #params is a dictionary that contains dataframe and some integer variables. input_file = params['product_data'].copy() input_file.sort_values('YEAR_WEEK',inplace=True) input_file = input_file.reset_index(drop=True) # input_file = input_file.fillna(0) #%% #Defining certain FU level variables to be used as key-references FU_name = input_file['FORECASTING_UNIT_KEY'].reset_index(drop=True)[0] FU_no = input_file['FORECASTING_UNIT#'].reset_index(drop=True)[0] FU_no = str(FU_no)+'_SE' less_data_flag = params['less_data_flag'] # FU_seasonality = params['product_seasonality'] #Defining Certain variables to be used as duration for train, validation, test & forecast #params is a dictionary that contains dataframe and some integer variables. start_train_week = params['start_train_week'] end_train_week = params['end_train_week'] start_validation_week = params['start_validation_week'] end_validation_week = params['end_validation_week'] start_test_week = params['start_test_week'] end_test_week = params['end_test_week'] # start_seasonal_week = params['start_seasonal_week'] # end_seasonal_week = params['end_seasonal_week'] #Defining Certain variables to be used as index for train, validation, test & forecast start_train_index = input_file.loc[(input_file['YEAR_WEEK']==start_train_week)].index[0] end_train_index = input_file.loc[(input_file['YEAR_WEEK']==end_train_week)].index[0]+1 start_validation_index = input_file.loc[(input_file['YEAR_WEEK']==start_validation_week)].index[0] end_validation_index = input_file.loc[(input_file['YEAR_WEEK']==end_validation_week)].index[0]+1 start_test_index = input_file.loc[(input_file['YEAR_WEEK']==start_test_week)].index[0] end_test_index = input_file.loc[(input_file['YEAR_WEEK']==end_test_week)].index[0]+1 # start_seasonal_index = input_file.loc[(input_file['YEAR_WEEK']==start_seasonal_week)].index[0] # end_seasonal_index = input_file.loc[(input_file['YEAR_WEEK']==end_seasonal_week)].index[0]+1 #Total Weeks in validation test and forecast forecast_weeks = params['forecast_weeks'] validation_weeks = end_validation_index-start_validation_index test_weeks = end_test_index-start_test_index #Data Used for forecasting start_retrain_index = start_train_index+validation_weeks+test_weeks end_retrain_index = end_train_index+validation_weeks+test_weeks #%% #Changing train,test when less data is available if less_data_flag == 'LESS_DATA': input_file = input_file[input_file['PURCHASE_QTY'].first_valid_index():len(input_file)].reset_index(drop=True) total_length = len(input_file) #Defining Certain variables to be used as index for train, validation, test & forecast start_train_index = input_file.index[0] end_train_index = int(total_length*train_split_less_data) start_validation_index = end_train_index end_validation_index = int(total_length*val_split_less_data) start_test_index = end_validation_index end_test_index = total_length start_seasonal_index = start_train_index end_seasonal_index = end_test_index start_train_week = input_file[input_file.index == start_train_index].reset_index(drop=True)['YEAR_WEEK'][0] end_train_week = input_file[input_file.index == (end_train_index-1)].reset_index(drop=True)['YEAR_WEEK'][0] start_validation_week = input_file[input_file.index == start_validation_index].reset_index(drop=True)['YEAR_WEEK'][0] end_validation_week = input_file[input_file.index == (end_validation_index-1)].reset_index(drop=True)['YEAR_WEEK'][0] start_test_week = input_file[input_file.index == start_test_index].reset_index(drop=True)['YEAR_WEEK'][0] end_test_week = input_file[input_file.index == (end_test_index-1)].reset_index(drop=True)['YEAR_WEEK'][0] start_seasonal_week = start_train_week end_seasonal_week = end_test_week #Total Weeks in validation test and forecast forecast_weeks = params['forecast_weeks'] validation_weeks = end_validation_index-start_validation_index test_weeks = end_test_index-start_test_index #Data Used for forecasting start_retrain_index = start_train_index+validation_weeks+test_weeks end_retrain_index = end_train_index+validation_weeks+test_weeks input_file = input_file.fillna(0) #%% #filtering from train to test as per user decision input_file = input_file.iloc[start_train_index:end_test_index] input_file[['Year','Week']]=input_file['YEAR_WEEK'].str.split('-',expand=True) #%% input_file = input_file[['YEAR_WEEK','Year','Week','PURCHASE_QTY']] #Splitting the dataset into train,validation,test & forecast train = input_file.iloc[start_train_index:end_train_index] validation = input_file.iloc[start_validation_index:end_validation_index] # test = input_file.iloc[start_test_index:end_test_index] #creating the retrain dataset retrain = input_file.iloc[start_retrain_index:end_retrain_index] #creating the forecast dataset input_file['Year'] = input_file['Year'].astype('int') input_file['Week'] = input_file['Week'].astype('int') year = input_file['Year'].max() week_forecast = input_file[input_file['Year']==year]['Week'].max()+1 # 1. Calendar_dim table calendar_dim=pd.read_csv('calendar_dim.csv') # for testing purposes calendar_dim['yearweek']=calendar_dim['yearweek'].apply(lambda x: int(x)) calendar_dim = calendar_dim[['yearweek', 'Year', 'Week']].drop_duplicates() ## First week of the forecast: it is always the week after the testing period finishes calendar_dim_a = calendar_dim[(calendar_dim['Year']==year)] week_max = calendar_dim_a[calendar_dim_a['Year']==year]['Week'].max() week_max= int(week_max) week_forecast = week_forecast year_forecast = year ## Taking care of the years with 53 weeks forecast=pd.DataFrame(index=range(0,forecast_weeks),columns=['Year','Week']) for i in range(0,forecast_weeks): if(week_forecast>week_max): year +=1 week_forecast = 1 forecast.loc[i,'Week']= week_forecast forecast.loc[i,'Year']= year calendar_dim_a = calendar_dim[(calendar_dim['Year']==year)] week_max = calendar_dim_a[calendar_dim_a['Year']==year]['Week'].max() week_max= int(week_max) week_forecast +=1 forecast['Year'] = forecast['Year'].astype(str) forecast['Week'] = forecast['Week'].astype(str).apply(lambda x: x.zfill(3)) forecast['YEAR_WEEK']= forecast['Year']+'-'+forecast['Week'] #%% '''IMPLEMENTING THE GRID-SEARCH''' alpha_list = [i/10 for i in range(0,10,1)] # beta_list = [i/10 for i in range(0,10,1)] # gamma_list = [i/10 for i in range(0,10,1)] model = SimpleExpSmoothing(np.asarray(train['PURCHASE_QTY'])) #%% #HyperParameter Tuning grid_search_df = pd.DataFrame() start_time = time.time() for alpha in alpha_list: smoothed = model.fit(smoothing_level=alpha,optimized=True) train_pred = smoothed.predict(start_train_index,end_train_index-1) validation_pred = smoothed.predict(start_validation_index,end_validation_index-1) train_error_df = pd.DataFrame({'Train':np.array(train['PURCHASE_QTY']), 'Train_Pred':train_pred, }) validation_error_df = pd.DataFrame({'validation':np.array(validation['PURCHASE_QTY']), 'validation_Pred':validation_pred, }) validation_error_df['SE'] = np.power(validation_error_df['validation']-validation_error_df['validation_Pred'],2) train_error_df['SE'] = np.power(train_error_df['Train']-train_error_df['Train_Pred'],2) validation_MSE = validation_error_df['SE'].mean() train_mse = train_error_df['SE'].mean() row_dict = {'alpha':alpha,'Train_MSE':train_mse,'validation_MSE':validation_MSE} grid_search_df = grid_search_df.append(row_dict,ignore_index=True) print("--- %s seconds ---" % (time.time() - start_time)) grid_search_df['validation_MSE'] = grid_search_df['validation_MSE']/grid_search_df['validation_MSE'].max() grid_search_df['Train_MSE'] = grid_search_df['Train_MSE']/grid_search_df['Train_MSE'].max() grid_search_df['SUM'] = grid_search_df['validation_MSE']+grid_search_df['Train_MSE'] # grid_search_df[['Train_MSE','validation_MSE']].plot() #%% #Parameter Selection best_abg = grid_search_df[grid_search_df['SUM']==grid_search_df['SUM'].min()][['alpha']] best_abg = [tuple(x) for x in best_abg.values] alpha = best_abg[0][0] #%% prediction_matrix = input_file[['YEAR_WEEK','Year','Week','PURCHASE_QTY']].copy() fit = model.fit(smoothing_level=alpha,optimized=True) #validate -1 in below line prediction_matrix['predicted_PURCHASE_QTY'] = fit.predict(start_train_index,end_test_index-1) print("Length of Train Forecast=", len(fit.predict(start_train_index,end_test_index-1))) train_matrix = prediction_matrix.iloc[start_train_index:end_train_index] validation_matrix = prediction_matrix.iloc[start_validation_index:end_validation_index] test_matrix = prediction_matrix.iloc[start_test_index:end_test_index] #%% model = SimpleExpSmoothing(input_file['PURCHASE_QTY'][start_train_index:end_validation_index]) fit = model.fit(smoothing_level=alpha, optimized=True) test_matrix['predicted_PURCHASE_QTY'] = np.array(fit.predict(start_test_index,end_test_index-1).reset_index(drop=True)) #error calculations of historical data #%% #Foreward forecast forecast_matrix = forecast[['YEAR_WEEK','Year','Week']].copy() model = SimpleExpSmoothing(retrain['PURCHASE_QTY']) fit = model.fit(smoothing_level=alpha,optimized=True) #validate -1 in below line forecast_matrix['predicted_PURCHASE_QTY'] = fit.forecast(forecast_weeks-1).reset_index(drop=True) print("Length of Forecast=", len(fit.predict(end_test_index,end_test_index+forecast_weeks-1))) #%% def my_plot_func(plot_dir): colors = cycler('color', ['#EE6666', '#3388BB', '#9988DD', '#EECC55', '#88BB44', '#FFBBBB']) tick_spacing = 10 fig, ax = plt.subplots(1,1,figsize=(20,8)) ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing)) ax.plot(train_matrix['YEAR_WEEK'],train_matrix['PURCHASE_QTY'],label='Train',color='orange') ax.plot(train_matrix['YEAR_WEEK'],train_matrix['predicted_PURCHASE_QTY'],label='Train Predicted',color='blue') ax.plot(validation_matrix['YEAR_WEEK'],validation_matrix['predicted_PURCHASE_QTY'],label='Validation Predicted',color='purple') ax.plot(validation_matrix['YEAR_WEEK'],validation_matrix['PURCHASE_QTY'],label='Validation',color='yellow') ax.plot(test_matrix['YEAR_WEEK'],test_matrix['predicted_PURCHASE_QTY'],label='Test Predicted',color='green') ax.plot(test_matrix['YEAR_WEEK'],test_matrix['PURCHASE_QTY'],label='Test',color='violet') ax.plot(forecast_matrix['YEAR_WEEK'],forecast_matrix['predicted_PURCHASE_QTY'],label='Forecast',color='red') plt.legend(loc='upper left', fontsize=8) plt.title(FU_name+'_FORECAST'+'_SE') plt.rc('axes', facecolor='#E6E6E6', edgecolor='none',axisbelow=True, grid=True, prop_cycle=colors) plt.rc('grid', color='w', linestyle='solid') plt.xticks(rotation=90) plt.savefig(plot_dir+'/'+str(FU_no)+'_FORECAST.png') return None my_plot_func(plot_dir) #%% forecast_matrix['FORECASTING_UNIT_KEY'] = FU_name forecast_matrix = forecast_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','predicted_PURCHASE_QTY']] def month_level_mape(matrix): df=matrix.copy() df=df.sort_values(by=['Year','Week']).reset_index() df['count'] = df.index + 1 df['temp'] = 0 df.loc[(df['count']-1) % 4 == 0,'temp'] = 1 df['month_count'] = df['temp'].cumsum() df.loc[len(df)-2,'month_count']= df.loc[len(df)-3,'month_count'] df.loc[len(df)-1,'month_count']= df.loc[len(df)-3,'month_count'] df = df.groupby('month_count',as_index=False).agg({ 'predicted_PURCHASE_QTY': 'sum', 'PURCHASE_QTY':'sum' }) monthly_ape = np.abs((df['predicted_PURCHASE_QTY'] - df['PURCHASE_QTY'])/df['PURCHASE_QTY']) monthly_ape = monthly_ape[monthly_ape!=np.inf] monthly_mape = np.mean(monthly_ape) return monthly_ape,monthly_mape train_err, train_mape_monthy = month_level_mape(train_matrix) validation_err, validation_mape_monthy = month_level_mape(validation_matrix) test_err, test_mape_monthy = month_level_mape(test_matrix) train_matrix['FORECASTING_UNIT_KEY'] = FU_name train_matrix = train_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','PURCHASE_QTY','predicted_PURCHASE_QTY']] validation_matrix['FORECASTING_UNIT_KEY'] = FU_name validation_matrix = validation_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','PURCHASE_QTY','predicted_PURCHASE_QTY']] test_matrix['FORECASTING_UNIT_KEY'] = FU_name test_matrix = test_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','PURCHASE_QTY','predicted_PURCHASE_QTY']] return(forecast_matrix,train_mape_monthy,validation_mape_monthy,test_mape_monthy,train_matrix,validation_matrix,test_matrix)