Global structure economy: A vision of GDP per capita and population
economy
python
Explores the global distribution of total GDP through a multidimensional perspective, offering a comprehensive view of income distribution across the worldwide population.
Published
Jun 21, 2025
Keywords
value chains
Summary
This animation illustrates how the hegemony exerted by certain nations has aimed to preserve an unequal income distribution by capturing the greater share of the value chains over the past decades.
Code
# Libraries# =====================================================================import requestsimport wbgapi as wbimport pandas as pdimport numpy as npimport seaborn as snsimport matplotlib.pyplot as pltimport matplotlib.animation as animationimport matplotlib.ticker as ticker# Data Extraction (Countries)# =====================================================================# Extract JSON and bring data to a dataframeurl ='https://raw.githubusercontent.com/guillemmaya92/world_map/main/Dim_Country.json'response = requests.get(url)data = response.json()df = pd.DataFrame(data)df = pd.DataFrame.from_dict(data, orient='index').reset_index()df_countries = df.rename(columns={'index': 'ISO3'})# Data Extraction - WBD (1960-1980)# ========================================================# To use the built-in plotting methodindicator = ['NY.GDP.PCAP.CD', 'SP.POP.TOTL']countries = df_countries['ISO3'].tolist()data_range =range(1960, 2024)data = wb.data.DataFrame(indicator, countries, data_range, numericTimeKeys=True, labels=False, columns='series').reset_index()df_wb = data.rename(columns={'economy': 'ISO3','time': 'Year','SP.POP.TOTL': 'LP','NY.GDP.PCAP.CD': 'NGDPDPC'})# Adjust LP and filter before 1980df_wb['LP'] = df_wb['LP'] /1000000df_wb = df_wb[df_wb['Year'] <1980]# Data Extraction - IMF (1980-2030)# =====================================================================#Parametroparameters = ['LP', 'NGDPDPC']# Create an empty listrecords = []# Iterar sobre cada parámetrofor parameter in parameters:# Request URL url =f"https://www.imf.org/external/datamapper/api/v1/{parameter}" response = requests.get(url) data = response.json() values = data.get('values', {})# Iterate over each country and yearfor country, years in values.get(parameter, {}).items():for year, value in years.items(): records.append({'Parameter': parameter,'ISO3': country,'Year': int(year),'Value': float(value) })# Create dataframedf_imf = pd.DataFrame(records)# Pivot Parameter to columns and filter nullsdf_imf = df_imf.pivot(index=['ISO3', 'Year'], columns='Parameter', values='Value').reset_index()# Filter after 2024df_imf = df_imf[df_imf['Year'] >=1980]# Data Manipulation# =====================================================================# Concat and filter dataframesdf = pd.concat([df_wb, df_imf], ignore_index=True)df = df.dropna(subset=['NGDPDPC', 'LP'], how='any')# Merge queriesdf = df.merge(df_countries, how='left', left_on='ISO3', right_on='ISO3')df = df[['ISO3', 'Country', 'Year', 'LP', 'NGDPDPC', 'Analytical', 'Region']]df = df[df['Region'].notna()]# Create a listdfs = []# Interpolate monthly datafor iso3 in df['ISO3'].unique(): temp_df = df[df['ISO3'] == iso3].copy() temp_df['Date'] = pd.to_datetime(temp_df['Year'], format='%Y') temp_df = temp_df[['Date', 'LP', 'NGDPDPC']] temp_df = temp_df.set_index('Date').resample('ME').mean().interpolate(method='linear').reset_index() temp_df['ISO3'] = iso3 temp_df['Year'] = temp_df['Date'].dt.year dfs.append(temp_df)# Concat dataframes df = pd.concat(dfs, ignore_index=True)# Filter nulls and orderdf = df.sort_values(by=['Year', 'NGDPDPC'])# Calculate 'left accrual widths'df['LP_Cum'] = df.groupby('Date')['LP'].cumsum()df['LP_Per'] = df['LP'] / df.groupby('Date')['LP'].transform('sum')df['LP_Cum_Per'] = df['LP_Cum'] / df.groupby('Date')['LP_Cum'].transform('max')df['Left'] = df['LP_Cum_Per'] - df['LP_Per']# Calculate GDP Average weighted by Population and partitioned by Yeardf['AVG_Weight'] = df.groupby('Date')['NGDPDPC'].transform(lambda x: np.average(x, weights=df.loc[x.index, 'LP']))# Add a total GDP column and cummulative itdf['GDP'] = df['NGDPDPC'] * df['LP']df['GDPcum'] = df.groupby('Date')['GDP'].cumsum()df['GDP_Per'] = df['GDP'] / df.groupby('Date')['GDP'].transform('sum')df['NGDPDPC_Change'] = ((df['NGDPDPC'] / df.groupby('ISO3')['NGDPDPC'].transform('first')) -1) *100# Copy a df sample to calculate a mediandf_sample = df.copy()columns = df.columnsdf_sample = np.repeat(df_sample.values, df_sample['LP'].astype(int), axis=0)df_sample = pd.DataFrame(df_sample, columns=columns)df_sample.dropna(subset=['NGDPDPC'], inplace=True)df_sample['NGDPDPC'] = pd.to_numeric(df_sample['NGDPDPC'], errors='coerce')df_sample['GDPcum'] = df_sample.groupby('Date')['NGDPDPC'].transform('cumsum')# Function to get median rowdef medianrow(group): medianrow =len(group) //2# Índice de la mitadreturn group.iloc[medianrow]df_sample = df_sample.groupby('Date').apply(medianrow).reset_index(drop=True)# Transform columnsdf_sample = df_sample[['Date', 'NGDPDPC', 'GDPcum']].reset_index(drop=True)df_sample = df_sample.rename(columns={'NGDPDPC': 'Median', 'GDPcum': 'Median_Total'})# Merge queriesdf = df.merge(df_sample, how='left', on='Date')# Add percentage bottom 50df['Median_Total_Per'] = df['Median_Total'] / df.groupby('Date')['GDP'].transform('sum')print(df)# Data Visualization# =====================================================================# Seaborn figure stylesns.set(style="whitegrid")fig, ax = plt.subplots(figsize=(16, 9))# Create a palettepalette = sns.color_palette("coolwarm", as_cmap=True).reversed()# Function to refresh animationdef update(date): plt.clf() subset = df[df['Date'] == date] subset_usa = subset[subset['ISO3'] =='USA'].copy()# Normalize GDPcum in a range [0, 1] gdp_min = subset['GDPcum'].min() gdp_max = subset_usa['GDPcum'].max() norm = plt.Normalize(gdp_min, gdp_max) colors = palette(norm(subset['GDPcum']))# Create a Matplotlib plot bars = plt.bar(subset['Left'], subset['NGDPDPC'], width=subset['LP_Per'], color=colors, alpha=1, align='edge', edgecolor='grey', linewidth=0.1)# Configuration grid and labels plt.text(0, 1.05, 'Distribution of Global GDP', fontsize=13, fontweight='bold', ha='left', transform=plt.gca().transAxes) plt.text(0, 1.02, 'Evolution of Countries from 1960 to 2029', fontsize=9, color='#262626', ha='left', transform=plt.gca().transAxes) plt.xlim(0, subset['LP_Cum_Per'].max()) plt.ylim(0, subset_usa['NGDPDPC'].max() *1.05) plt.grid(axis='x') plt.grid(axis='y', linestyle='--', linewidth=0.5, color='lightgray') plt.xlabel('Cumulative Global Population (%)', fontsize=10, fontweight='bold') plt.ylabel('GDP per capita (US$)', fontsize=10, fontweight='bold') plt.tick_params(axis='x', labelsize=9) plt.tick_params(axis='y', labelsize=9) plt.gca().xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(x*100):,}%')) plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(round(x, -3)):,}')) plt.xticks(np.linspace(0, subset['LP_Cum_Per'].max(), 5)) plt.yticks(np.linspace(0, subset_usa['NGDPDPC'].max() *1.05, 8))# Add Labels to relevant countriesfor bar, value, country inzip(bars, subset['GDP'], subset['ISO3']):if country in ['CHN', 'IND', 'USA', 'IDN', 'PAK', 'NGA', 'BRA', 'BGD', 'RUS', 'MEX', 'JPN', 'VNM', 'DEU', 'GBR']: plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height(),f'{country}\n{''}', ha='center', va='bottom', fontsize=7, color='grey')# Add Median Line and Label median = subset['Median'].max() median_total = subset.iloc[0]['Median_Total'] median_total_per = subset.iloc[0]['Median_Total_Per'] maxis = subset_usa['NGDPDPC'].max() plt.axhline( y=median, color='darkred', linestyle='--', linewidth=0.5) plt.text( x=subset['Left'].max() *0.02, y=median + (maxis *0.06), s=f'GDP Capita Median: {median:,.0f}', color='darkred', verticalalignment='bottom', horizontalalignment='left', fontsize=10, weight='bold') plt.gca().text( subset['Left'].max() *0.02, median + (maxis *0.04),f'Bottom 50 - Total GDP: {median_total:,.0f} MM ({median_total_per *100:.2f}%)', ha='left', va='center', fontsize=9, color='#737373')# Add USA Line and Label pibc_usa = subset_usa.iloc[0]['NGDPDPC'] pib_usa = subset_usa.iloc[0]['GDP'] pib_usa_per = subset_usa.iloc[0]['GDP_Per'] plt.axhline( y=pibc_usa, color='darkblue', linestyle='--', linewidth=0.5) plt.text( x=subset['Left'].max() *0.02, y=pibc_usa *0.95, s=f'GDP Capita USA: {pibc_usa:,.0f}', color='darkblue', fontsize=10, verticalalignment='bottom', horizontalalignment='left', weight='bold') plt.gca().text( subset['Left'].max() *0.02, pibc_usa *0.93,f'USA - Total GDP: {pib_usa:,.0f} MM ({pib_usa_per *100:.2f}%)', ha='left', va='center', fontsize=9, color='#737373')# Cover countries going outside plot formatted_date = date.strftime('%Y-%m') plt.text(1, 1.15, f'{formatted_date}', transform=plt.gca().transAxes, fontsize=22, ha='right', va='top', fontweight='bold', color='white', bbox=dict(facecolor='white', edgecolor='none', boxstyle='square,pad=1'))# Add Year label plt.text(1, 1.06, f'{formatted_date}', transform=plt.gca().transAxes, fontsize=22, ha='right', va='top', fontweight='bold', color='#D3D3D3', bbox=dict(facecolor='white', edgecolor='none', boxstyle='square,pad=0.3'))# Add Data Source plt.text(0, -0.1, 'Data Source: IMF World Economic Outlook Database, 2024 | World Bank national accounts data, and OECD National Accounts data files.', transform=plt.gca().transAxes, fontsize=8, color='gray')# Add author plt.text(1, -0.1, '@guillemmaya.com', transform=plt.gca().transAxes, fontsize=8, color='#212121', ha='right')# Add label "poorest" and "richest" plt.text(0, -0.065, 'Poorest', transform=ax.transAxes, fontsize=12, fontweight='bold', color='darkred', ha='left', va='center') plt.text(0.95, -0.065, 'Richest', transform=ax.transAxes, fontsize=12, fontweight='bold', color='darkblue', va='center')# Configurate animationdates =sorted(df['Date'].unique())ani = animation.FuncAnimation(fig, update, frames=dates, repeat=False, interval=500, blit=False)# Save the animation :)ani.save('C:/Users/guill/Downloads/FIG_GDP_Capita_Bars_1960.webp', writer='imagemagick', fps=15)# Print it!plt.show()