Data cleaning and graphing code

🧩 Syntax:
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

import json
import pandas as pd
import numpy as np
import scipy.stats as sp

with open("file.json", 'r') as f:
    LeagueMasterList = json.load(f)

def double_std(array):
 return np.std(array) * 2

for league in LeagueMasterList:
    # LeagueList = next((item for item in LeagueMasterList if item['Name'] == "Premier League"), None)

    df = pd.DataFrame(league['Data'])
    df['SquadCount'] = df['SquadCount'].astype('int')

    #Filter 0s and blanks and low squad count (high-error data) from lists
    df = df[df['AverageAgeSquad'] != '']
    df = df[df['SquadCount'] > 10]

    df['SquadCount'] = df['SquadCount'].astype('int')
    df['PlayersUsed'] = df['PlayersUsed'].astype('int')
    df['AverageAgeSquad'] = df['AverageAgeSquad'].astype('float')
    #Trim out bad startingXI data
    df = df[df['AverageAgeSquad'] > 0]
    df = df[df['AverageAgeSquad'] < 50]
    df['AverageAgeStartingXI'] = df['AverageAgeStartingXI'].astype('float')
    #Trim out bad startingXI data
    df = df[df['AverageAgeStartingXI'] > 0]
    df = df[df['AverageAgeStartingXI'] < 50]
    df['AverageAge'] = df['AverageAge'].astype('float')
    df['SquadCount'] = df['SquadCount'].astype('float')
    df['SeasonStartYear'] = df['SeasonStartYear'].astype('float')

    #Filter years where there are less than 9 clubs with data remaining
    dfYear = df.SeasonStartYear.value_counts()
    for idx, year in dfYear.items():
        if year < 5:
            df.drop(df[df.SeasonStartYear == idx].index, inplace=True)

    dfStartingXI = df[df['AverageAgeStartingXI'] > 0]
    dfStartingXI.plot.scatter('SeasonStartYear','AverageAgeStartingXI', color='DarkGreen', ax=ax, alpha=0.3, label='AvgAgeStartingXI')

    dfBins0 = df[['SeasonStartYear','AverageAgeSquad']].copy()
    dfBins0.rename(columns={'AverageAgeSquad': 'AvgAge'}, inplace=True)

    dfBins1 = df[['SeasonStartYear','AverageAgeStartingXI']].copy()
    dfBins1.rename(columns={'AverageAgeStartingXI': 'AvgAge'}, inplace=True)

    dfSquad = dfBins0.groupby("SeasonStartYear", as_index=False).agg([np.mean, double_std, sp.sem])
    dfSquad = dfSquad["AvgAge"]
    dfSquad = dfSquad.reset_index()

    dfXI = dfBins1.groupby("SeasonStartYear", as_index=False).agg([np.mean, double_std, sp.sem])
    dfXI = dfXI["AvgAge"]
    dfXI = dfXI.reset_index()
    #Add 0.5 so easier to display side by side with squad avg age in graph
    dfXI['SeasonStartYear'] = dfXI['SeasonStartYear'] + 0.5

    ax2 = figure(figsize=(20, 20), dpi=380)
    ax2 = plt.subplot(211)
    ax2.set_xlim(left=1925,right=2025)
    ax2.errorbar(dfSquad['SeasonStartYear'], dfSquad['mean'], color='tab:orange', fmt='o', label='AvgAgeSquad', yerr=dfSquad['double_std'], ecolor='tab:orange', capsize=4, elinewidth=3)
    ax2.errorbar(dfXI['SeasonStartYear'], dfXI['mean'], color='tab:blue', fmt='o', label='AvgAgeStartingXI', yerr=dfXI['double_std'], ecolor='tab:blue', capsize=4, elinewidth=3)
    plt.legend(loc='upper left')
    plt.title(league['Name'])
    plt.grid(True)