Print

60 of 100: Area chart in matplotlib

At the beginning of the year I challenged myself to create all 100 visualizations using python and matplotlib from the 1 dataset,100 visualizations project and I am sharing with you the code for all the visualizations.

Note: Data Viz Project is copyright Ferdio and available under a Creative Commons Attribution – Non Commercial – No Derivatives 4.0 International license. I asked Ferdio and they told me they used a Design tool to create all the plots.

Collaborate

There are a ton of improvements that can be made on the code, so let me know in the comments any improvements you make and I will update the post accordingly!

This is the original viz that we are trying to recreate in matplotlib:

Import the packages

We will need the following packages:

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LinearSegmentedColormap

import numpy as np
import pandas as pd

from svgpathtools import svg2paths
from svgpath2mpl import parse_path

Generate the data

We could actually go from numpy to matplotlib, but most data projects use pandas to transform the data, so I am using a pandas dataframe as the starting point.


gradient_colors = [['#CE5C46', '#DB9386'],['#5678D7','#8DADF2'],['#303653','#5C6077']]

color_dict = {(2004,"Norway"): "#303653", (2022,"Norway"): "#5C6077",
              (2004,"Denmark"): "#CE5C46", (2022,"Denmark"): "#DB9386",
              (2004,"Sweden"): "#5678D7", (2022,"Sweden"): "#8DADF2",
              }

xy_ticklabel_color, xlabel_color, grand_totals_color, grid_color, datalabels_color ='#101628',"#101628","#101628", "#F2F3F4", "#ffffff"

data = {
    "year": [2004, 2022, 2004, 2022, 2004, 2022],
    "countries" : [ "Denmark", "Denmark", "Norway", "Norway","Sweden", "Sweden",],
    "sites": [4,10,5,8,13,15]
}
df= pd.DataFrame(data)
indexyearcountriessites
02004Sweden13
12022Sweden15
22004Denmark4
32022Denmark10
42004Norway5
52022Norway8

We need to create the subtotals for each year, the year lables and the percentage change and then sort the data.

df['year_lbl'] ="'"+df['year'].astype(str).str[-2:].astype(str)
df['sub_total'] = df.groupby('countries')['sites'].transform('sum')
df['pct_change'] = df.groupby('countries', sort=False)['sites'].apply(
     lambda x: x.pct_change()).to_numpy().round(3)*100
df = df.fillna(method='bfill')
#custom sort
sort_order_dict = {"Denmark":1, "Sweden":2, "Norway":3, }
df = df.sort_values(by=['countries',], key=lambda x: x.map(sort_order_dict))
#Add the color based on the color dictionary
df['color'] = df.set_index(['year', 'countries']).index.map(color_dict.get)
yearcountriessitesyear_lblsub_totalpct_changecolor
02004Denmark4’0414150.0#CE5C46
12022Denmark10’2214150.0#DB9386
42004Sweden13’042815.4#5678D7
52022Sweden15’222815.4#8DADF2
22004Norway5’041360.0#303653
32022Norway8’221360.0#5C6077

Create the heritage symbol

icon_path, attributes = svg2paths('flags/Unesco_World_Heritage_logo_notext_transparent.svg')
#matplotlib path object of the icon
icon_marker  = parse_path(attributes[0]['d'])

icon_marker.vertices -= icon_marker.vertices.mean(axis=0)
icon_marker = icon_marker.transformed(mpl.transforms.Affine2D().rotate_deg(180))
icon_marker = icon_marker.transformed(mpl.transforms.Affine2D().scale(-1,1))

Define the variables

countries = df.countries.unique()
years = df.year.unique()
year_labels = df.year_lbl.unique()
site_cmap = df.sites
#if it is a whole number remove the decimals otherwise keep it.
pcts = [int(num) if float(num).is_integer() else num for num in df["pct_change"]]
line_colors =  df.color[0::2]
color = df.groupby('countries', sort=False)['color'].apply(list)
gradient_colors = color.to_list()

Plot the chart

fig, axes = plt.subplots(ncols = len(countries), nrows = 1, sharey = True, figsize=(10,6), facecolor = "#FFFFFF", )
fig.tight_layout(pad=0.1)

for country, line_color,pct_change, gradient_color,  ax  in zip(countries, line_colors,pcts[1::2],  gradient_colors, axes.ravel()):
    sites = df[df.countries == country]['sites'].tolist()
    cm = LinearSegmentedColormap.from_list('Temperature Map', gradient_color)
    polygon = ax.fill_between(year_labels, 0, sites, lw=0, color='none') #fill between returns a polygon
    verts = np.vstack([p.vertices for p in polygon.get_paths()])  #get the shape of the poligon
    gradient = ax.imshow(np.linspace(0, 1, 256).reshape(-1, 1), cmap=cm, aspect='auto', origin='lower',
                        extent=[verts[:, 0].min(), verts[:, 0].max(), verts[:, 1].min(), verts[:, 1].max()]) #create an imshow
    gradient.set_clip_path(polygon.get_paths()[0], transform=ax.transData) #cut over the plot


    ax.plot(year_labels, sites,'-o',ms=8, mec="w",clip_on=False,color = line_color, zorder=1)
    ax.set_ylim(0,16)
    ax.set_xlim(-0.1,1.1)
    ax.tick_params(length = 0, labelsize=12,colors =xy_ticklabel_color,pad = 23)
    ax.set_yticks([])
    ax.spines[['top','left','bottom','right']].set_visible(False)
    ax.set_xlabel(f'{country}', size= 20,color =xlabel_color, weight= 'bold', ha= "center")
    ax.xaxis.set_label_coords(0.5, -0.15)
    ax.plot(0.5,-4,marker=icon_marker,color = line_color,markersize=32,clip_on=False)
    ax.axvline(-0.2, -0.2, 1.1, clip_on= False, color = grid_color)
    ax.annotate(f'  \u25B2\n\n{pct_change}%', xy=(0.5, 1),  color = datalabels_color, size = 28, weight= "bold", ha= "center")

    for i,lb in enumerate(sites):
        ax.annotate(lb, xy=(i,lb+1), size=12,color =xlabel_color, ha="center", va="center")

ax.axvline(1.2, -0.2, 1.1, clip_on= False, color = grid_color)

The result:

60 of 100: Area chart in matplotlib
Was this helpful?

Reader Interactions

Leave a Reply

Your email address will not be published. Required fields are marked *

Table of Contents