import matplotlib.pyplot as plt from matplotlib.pyplot import figure import json import pandas as pd import numpy as np import scipy.stats as sp with open("file.json", 'r') as f: LeagueMasterList = json.load(f) def double_std(array): return np.std(array) * 2 for league in LeagueMasterList: # LeagueList = next((item for item in LeagueMasterList if item['Name'] == "Premier League"), None) df = pd.DataFrame(league['Data']) df['SquadCount'] = df['SquadCount'].astype('int') #Filter 0s and blanks and low squad count (high-error data) from lists df = df[df['AverageAgeSquad'] != ''] df = df[df['SquadCount'] > 10] df['SquadCount'] = df['SquadCount'].astype('int') df['PlayersUsed'] = df['PlayersUsed'].astype('int') df['AverageAgeSquad'] = df['AverageAgeSquad'].astype('float') #Trim out bad startingXI data df = df[df['AverageAgeSquad'] > 0] df = df[df['AverageAgeSquad'] < 50] df['AverageAgeStartingXI'] = df['AverageAgeStartingXI'].astype('float') #Trim out bad startingXI data df = df[df['AverageAgeStartingXI'] > 0] df = df[df['AverageAgeStartingXI'] < 50] df['AverageAge'] = df['AverageAge'].astype('float') df['SquadCount'] = df['SquadCount'].astype('float') df['SeasonStartYear'] = df['SeasonStartYear'].astype('float') #Filter years where there are less than 9 clubs with data remaining dfYear = df.SeasonStartYear.value_counts() for idx, year in dfYear.items(): if year < 5: df.drop(df[df.SeasonStartYear == idx].index, inplace=True) dfStartingXI = df[df['AverageAgeStartingXI'] > 0] dfStartingXI.plot.scatter('SeasonStartYear','AverageAgeStartingXI', color='DarkGreen', ax=ax, alpha=0.3, label='AvgAgeStartingXI') dfBins0 = df[['SeasonStartYear','AverageAgeSquad']].copy() dfBins0.rename(columns={'AverageAgeSquad': 'AvgAge'}, inplace=True) dfBins1 = df[['SeasonStartYear','AverageAgeStartingXI']].copy() dfBins1.rename(columns={'AverageAgeStartingXI': 'AvgAge'}, inplace=True) dfSquad = dfBins0.groupby("SeasonStartYear", as_index=False).agg([np.mean, double_std, sp.sem]) dfSquad = dfSquad["AvgAge"] dfSquad = dfSquad.reset_index() dfXI = dfBins1.groupby("SeasonStartYear", as_index=False).agg([np.mean, double_std, sp.sem]) dfXI = dfXI["AvgAge"] dfXI = dfXI.reset_index() #Add 0.5 so easier to display side by side with squad avg age in graph dfXI['SeasonStartYear'] = dfXI['SeasonStartYear'] + 0.5 ax2 = figure(figsize=(20, 20), dpi=380) ax2 = plt.subplot(211) ax2.set_xlim(left=1925,right=2025) ax2.errorbar(dfSquad['SeasonStartYear'], dfSquad['mean'], color='tab:orange', fmt='o', label='AvgAgeSquad', yerr=dfSquad['double_std'], ecolor='tab:orange', capsize=4, elinewidth=3) ax2.errorbar(dfXI['SeasonStartYear'], dfXI['mean'], color='tab:blue', fmt='o', label='AvgAgeStartingXI', yerr=dfXI['double_std'], ecolor='tab:blue', capsize=4, elinewidth=3) plt.legend(loc='upper left') plt.title(league['Name']) plt.grid(True)