Problem Statement

🧩 Syntax:
Code:
def Model(params,plot_dir=None):

    train_split_less_data = params['train_split_less_data']
    val_split_less_data = params['val_split_less_data']
    val_split_less_data = train_split_less_data+val_split_less_data
    #Reading Input File & and defining target column
    #params is a dictionary that contains dataframe and some integer variables.
    input_file = params['product_data'].copy()
    input_file.sort_values('YEAR_WEEK',inplace=True)
    input_file = input_file.reset_index(drop=True)
    # input_file = input_file.fillna(0)

    #%%
    #Defining certain FU level variables to be used as key-references
    FU_name = input_file['FORECASTING_UNIT_KEY'].reset_index(drop=True)[0]
    FU_no = input_file['FORECASTING_UNIT#'].reset_index(drop=True)[0]
    FU_no = str(FU_no)+'_SE'
    less_data_flag = params['less_data_flag']
    #    FU_seasonality = params['product_seasonality']

    #Defining Certain variables to be used as duration for train, validation, test & forecast
    #params is a dictionary that contains dataframe and some integer variables.
    start_train_week = params['start_train_week']
    end_train_week = params['end_train_week']

    start_validation_week = params['start_validation_week']
    end_validation_week = params['end_validation_week']

    start_test_week = params['start_test_week']
    end_test_week = params['end_test_week']

    #    start_seasonal_week = params['start_seasonal_week']
    #    end_seasonal_week = params['end_seasonal_week']

    #Defining Certain variables to be used as index for train, validation, test & forecast
    start_train_index = input_file.loc[(input_file['YEAR_WEEK']==start_train_week)].index[0]
    end_train_index = input_file.loc[(input_file['YEAR_WEEK']==end_train_week)].index[0]+1

    start_validation_index = input_file.loc[(input_file['YEAR_WEEK']==start_validation_week)].index[0]
    end_validation_index = input_file.loc[(input_file['YEAR_WEEK']==end_validation_week)].index[0]+1

    start_test_index = input_file.loc[(input_file['YEAR_WEEK']==start_test_week)].index[0]
    end_test_index = input_file.loc[(input_file['YEAR_WEEK']==end_test_week)].index[0]+1

    #    start_seasonal_index = input_file.loc[(input_file['YEAR_WEEK']==start_seasonal_week)].index[0]
    #    end_seasonal_index = input_file.loc[(input_file['YEAR_WEEK']==end_seasonal_week)].index[0]+1

    #Total Weeks in validation test and forecast
    forecast_weeks = params['forecast_weeks']
    validation_weeks = end_validation_index-start_validation_index
    test_weeks = end_test_index-start_test_index

    #Data Used for forecasting
    start_retrain_index = start_train_index+validation_weeks+test_weeks
    end_retrain_index = end_train_index+validation_weeks+test_weeks
#%%
#Changing train,test when less data is available

    if less_data_flag == 'LESS_DATA':
        input_file = input_file[input_file['PURCHASE_QTY'].first_valid_index():len(input_file)].reset_index(drop=True)
        total_length = len(input_file)


        #Defining Certain variables to be used as index for train, validation, test & forecast
        start_train_index = input_file.index[0]
        end_train_index = int(total_length*train_split_less_data)

        start_validation_index = end_train_index
        end_validation_index = int(total_length*val_split_less_data)

        start_test_index = end_validation_index
        end_test_index = total_length

        start_seasonal_index = start_train_index
        end_seasonal_index = end_test_index

        start_train_week = input_file[input_file.index == start_train_index].reset_index(drop=True)['YEAR_WEEK'][0]
        end_train_week = input_file[input_file.index == (end_train_index-1)].reset_index(drop=True)['YEAR_WEEK'][0]

        start_validation_week = input_file[input_file.index == start_validation_index].reset_index(drop=True)['YEAR_WEEK'][0]
        end_validation_week = input_file[input_file.index == (end_validation_index-1)].reset_index(drop=True)['YEAR_WEEK'][0]

        start_test_week = input_file[input_file.index == start_test_index].reset_index(drop=True)['YEAR_WEEK'][0]
        end_test_week = input_file[input_file.index == (end_test_index-1)].reset_index(drop=True)['YEAR_WEEK'][0]

        start_seasonal_week = start_train_week
        end_seasonal_week = end_test_week

        #Total Weeks in validation test and forecast
        forecast_weeks = params['forecast_weeks']
        validation_weeks = end_validation_index-start_validation_index
        test_weeks = end_test_index-start_test_index

        #Data Used for forecasting
        start_retrain_index = start_train_index+validation_weeks+test_weeks
        end_retrain_index = end_train_index+validation_weeks+test_weeks
    input_file = input_file.fillna(0)
    #%%
    #filtering from train to test as per user decision
    input_file = input_file.iloc[start_train_index:end_test_index]
    input_file[['Year','Week']]=input_file['YEAR_WEEK'].str.split('-',expand=True)
    #%%
    input_file = input_file[['YEAR_WEEK','Year','Week','PURCHASE_QTY']]

    #Splitting the dataset into train,validation,test & forecast
    train = input_file.iloc[start_train_index:end_train_index]
    validation = input_file.iloc[start_validation_index:end_validation_index]
    #    test = input_file.iloc[start_test_index:end_test_index]

    #creating the retrain dataset
    retrain = input_file.iloc[start_retrain_index:end_retrain_index]




    #creating the forecast dataset
    input_file['Year'] = input_file['Year'].astype('int')
    input_file['Week'] = input_file['Week'].astype('int')
    year = input_file['Year'].max()
    week_forecast = input_file[input_file['Year']==year]['Week'].max()+1
    
    

    # 1. Calendar_dim table
    calendar_dim=pd.read_csv('calendar_dim.csv')  # for testing purposes
    calendar_dim['yearweek']=calendar_dim['yearweek'].apply(lambda x: int(x))
    calendar_dim = calendar_dim[['yearweek', 'Year', 'Week']].drop_duplicates()

    ## First week of the forecast: it is always the week after the testing period finishes 
    calendar_dim_a = calendar_dim[(calendar_dim['Year']==year)]
    week_max = calendar_dim_a[calendar_dim_a['Year']==year]['Week'].max()
    week_max= int(week_max)
    week_forecast = week_forecast
    year_forecast = year

    ## Taking care of the years with 53 weeks
    
    forecast=pd.DataFrame(index=range(0,forecast_weeks),columns=['Year','Week'])
    for i in range(0,forecast_weeks):
        if(week_forecast>week_max):
            year +=1
            week_forecast = 1
        forecast.loc[i,'Week']= week_forecast
        forecast.loc[i,'Year']= year
        calendar_dim_a = calendar_dim[(calendar_dim['Year']==year)]
        week_max = calendar_dim_a[calendar_dim_a['Year']==year]['Week'].max()
        week_max= int(week_max)
        week_forecast +=1


    forecast['Year'] = forecast['Year'].astype(str)
    forecast['Week'] = forecast['Week'].astype(str).apply(lambda x: x.zfill(3))
    forecast['YEAR_WEEK']= forecast['Year']+'-'+forecast['Week']



    #%%
    '''IMPLEMENTING THE GRID-SEARCH'''
    alpha_list = [i/10 for i in range(0,10,1)]
    # beta_list = [i/10 for i in range(0,10,1)]
    # gamma_list = [i/10 for i in range(0,10,1)]
    model = SimpleExpSmoothing(np.asarray(train['PURCHASE_QTY']))
    #%%
    #HyperParameter Tuning
    grid_search_df = pd.DataFrame()
    start_time = time.time()
    for alpha in alpha_list:
        smoothed = model.fit(smoothing_level=alpha,optimized=True)
        train_pred = smoothed.predict(start_train_index,end_train_index-1)
        validation_pred = smoothed.predict(start_validation_index,end_validation_index-1)
        train_error_df = pd.DataFrame({'Train':np.array(train['PURCHASE_QTY']),
                     'Train_Pred':train_pred,
                     })
        validation_error_df = pd.DataFrame({'validation':np.array(validation['PURCHASE_QTY']),
                                 'validation_Pred':validation_pred,
                                 })
        validation_error_df['SE'] = np.power(validation_error_df['validation']-validation_error_df['validation_Pred'],2)
        train_error_df['SE'] = np.power(train_error_df['Train']-train_error_df['Train_Pred'],2)

        validation_MSE = validation_error_df['SE'].mean()
        train_mse = train_error_df['SE'].mean()
        row_dict = {'alpha':alpha,'Train_MSE':train_mse,'validation_MSE':validation_MSE}
        grid_search_df = grid_search_df.append(row_dict,ignore_index=True)


    print("--- %s seconds ---" % (time.time() - start_time))
    grid_search_df['validation_MSE'] = grid_search_df['validation_MSE']/grid_search_df['validation_MSE'].max()
    grid_search_df['Train_MSE'] = grid_search_df['Train_MSE']/grid_search_df['Train_MSE'].max()
    grid_search_df['SUM'] = grid_search_df['validation_MSE']+grid_search_df['Train_MSE']
    #    grid_search_df[['Train_MSE','validation_MSE']].plot()
    #%%
    #Parameter Selection
    best_abg = grid_search_df[grid_search_df['SUM']==grid_search_df['SUM'].min()][['alpha']]
    best_abg = [tuple(x) for x in best_abg.values]
    alpha = best_abg[0][0]

    #%%

    prediction_matrix = input_file[['YEAR_WEEK','Year','Week','PURCHASE_QTY']].copy()
    fit = model.fit(smoothing_level=alpha,optimized=True)

    #validate -1 in below line
    prediction_matrix['predicted_PURCHASE_QTY'] = fit.predict(start_train_index,end_test_index-1)
    print("Length of Train Forecast=", len(fit.predict(start_train_index,end_test_index-1)))


    train_matrix = prediction_matrix.iloc[start_train_index:end_train_index]
    validation_matrix = prediction_matrix.iloc[start_validation_index:end_validation_index]
    test_matrix = prediction_matrix.iloc[start_test_index:end_test_index]
    #%%
    model = SimpleExpSmoothing(input_file['PURCHASE_QTY'][start_train_index:end_validation_index])
    fit = model.fit(smoothing_level=alpha, optimized=True)
    test_matrix['predicted_PURCHASE_QTY'] = np.array(fit.predict(start_test_index,end_test_index-1).reset_index(drop=True))

    #error calculations of historical data
    #%%

    #Foreward forecast
    forecast_matrix = forecast[['YEAR_WEEK','Year','Week']].copy()
    model = SimpleExpSmoothing(retrain['PURCHASE_QTY'])

    fit = model.fit(smoothing_level=alpha,optimized=True)

    #validate -1 in below line
    forecast_matrix['predicted_PURCHASE_QTY'] = fit.forecast(forecast_weeks-1).reset_index(drop=True)
    print("Length of Forecast=", len(fit.predict(end_test_index,end_test_index+forecast_weeks-1)))
    #%%
    def my_plot_func(plot_dir):
        colors = cycler('color',
                    ['#EE6666', '#3388BB', '#9988DD',
                     '#EECC55', '#88BB44', '#FFBBBB'])
        tick_spacing = 10
        fig, ax = plt.subplots(1,1,figsize=(20,8))
        ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))
        ax.plot(train_matrix['YEAR_WEEK'],train_matrix['PURCHASE_QTY'],label='Train',color='orange')
        ax.plot(train_matrix['YEAR_WEEK'],train_matrix['predicted_PURCHASE_QTY'],label='Train Predicted',color='blue')
        ax.plot(validation_matrix['YEAR_WEEK'],validation_matrix['predicted_PURCHASE_QTY'],label='Validation Predicted',color='purple')
        ax.plot(validation_matrix['YEAR_WEEK'],validation_matrix['PURCHASE_QTY'],label='Validation',color='yellow')
        ax.plot(test_matrix['YEAR_WEEK'],test_matrix['predicted_PURCHASE_QTY'],label='Test Predicted',color='green')
        ax.plot(test_matrix['YEAR_WEEK'],test_matrix['PURCHASE_QTY'],label='Test',color='violet')
        ax.plot(forecast_matrix['YEAR_WEEK'],forecast_matrix['predicted_PURCHASE_QTY'],label='Forecast',color='red')
        plt.legend(loc='upper left', fontsize=8)
        plt.title(FU_name+'_FORECAST'+'_SE')
        plt.rc('axes', facecolor='#E6E6E6', edgecolor='none',axisbelow=True, grid=True, prop_cycle=colors)
        plt.rc('grid', color='w', linestyle='solid')
        plt.xticks(rotation=90)
        plt.savefig(plot_dir+'/'+str(FU_no)+'_FORECAST.png')
        return None

    my_plot_func(plot_dir)


#%%
    forecast_matrix['FORECASTING_UNIT_KEY'] = FU_name
    forecast_matrix = forecast_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','predicted_PURCHASE_QTY']]

    def month_level_mape(matrix):
        df=matrix.copy()
        df=df.sort_values(by=['Year','Week']).reset_index()
        df['count'] = df.index + 1
        df['temp'] = 0
        df.loc[(df['count']-1) % 4 == 0,'temp'] = 1
        df['month_count'] = df['temp'].cumsum()

        df.loc[len(df)-2,'month_count']= df.loc[len(df)-3,'month_count']
        df.loc[len(df)-1,'month_count']= df.loc[len(df)-3,'month_count']

        df = df.groupby('month_count',as_index=False).agg({
                'predicted_PURCHASE_QTY': 'sum',
                'PURCHASE_QTY':'sum'
                })

        monthly_ape = np.abs((df['predicted_PURCHASE_QTY'] - df['PURCHASE_QTY'])/df['PURCHASE_QTY'])
        monthly_ape = monthly_ape[monthly_ape!=np.inf]
        monthly_mape = np.mean(monthly_ape)
        return monthly_ape,monthly_mape

    train_err, train_mape_monthy = month_level_mape(train_matrix)
    validation_err, validation_mape_monthy = month_level_mape(validation_matrix)
    test_err, test_mape_monthy = month_level_mape(test_matrix)

    train_matrix['FORECASTING_UNIT_KEY'] = FU_name
    train_matrix = train_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','PURCHASE_QTY','predicted_PURCHASE_QTY']]
    validation_matrix['FORECASTING_UNIT_KEY'] = FU_name
    validation_matrix = validation_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','PURCHASE_QTY','predicted_PURCHASE_QTY']]
    test_matrix['FORECASTING_UNIT_KEY'] = FU_name
    test_matrix = test_matrix[['FORECASTING_UNIT_KEY','YEAR_WEEK','PURCHASE_QTY','predicted_PURCHASE_QTY']]

    return(forecast_matrix,train_mape_monthy,validation_mape_monthy,test_mape_monthy,train_matrix,validation_matrix,test_matrix)