Print

Tornado Charts in matplotlib

In this tutorial I will show you how to create Tornado charts using Python and Matplotlib. For more matplotlib charts, check out the gallery:

1 dataset 100 matplotlib visualizations
Python dataviz gallery, matplotlib viz gallery

Important notes:

1. This are my personal notes, so apologies if some explanations and notations are missing.

Tornado charts in matplotlib

This is what we will be creating:

  1. Tornado chart with legend in the middle
  2. Tornado chart with legend on top of the bar
  3. Polar tornado chart
  4. Not quite a tornado bar

Tornado chart in matplotlib with legend in the middle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.transforms import blended_transform_factory

Add data and columns:

color_dict = {2022: "#A54836", 2004: "#5375D4", }


xy_ticklabel_color, title_color, grid_color, datalabels_color ='#101628',"#101628", "#C8C9C9", "#101628"

data = {
    "year": [2004, 2022, 2004, 2022, 2004, 2022],
    "countries" : ["Sweden", "Sweden", "Denmark", "Denmark", "Norway", "Norway"],
    "sites": [13,15,4,10,5,8]
}

df= pd.DataFrame(data)
#custom sort
sort_order_dict = {"Denmark":2, "Sweden":3, "Norway":1, 2004:4, 2022:5}
df = df.sort_values(by=['year','countries',], key=lambda x: x.map(sort_order_dict))
#map the colors of a dict to a dataframe
df['color'] = df.set_index(['year', 'countries']).index.map(color_dict.get)
#map the colors of a dict to a dataframe
df['color']= df.year.map(color_dict)
df
yearcountriessitescolor
42004Norway5#5375D4
22004Denmark4#5375D4
02004Sweden13#5375D4
52022Norway8#A54836
32022Denmark10#A54836
12022Sweden15#A54836

Define variables

years = df.year.unique()
sites = df.sites
countries = df.countries.unique()
colors = df.color.unique()

Plot the chart

fig, axes = plt.subplots(ncols = len(years),figsize=(10, 4))
fig.tight_layout(w_pad=10)

offset_labels = [[-1]*3 , [1]*3]
direction = [-1,1]
for year, dir, color, ax, offset in zip(years, direction, colors, axes.ravel(),offset_labels): 
    temp_df = df[df.year == year]
    ax.barh(temp_df.countries, temp_df.sites*dir, align='center', height = 0.6,facecolor=color,)
    
    
    for bar, site, off in zip(ax.patches, sites,offset ):
        ax.text(
            bar.get_width() +off ,
            bar.get_height()/2 + bar.get_y(),
            site,
            ha='center',va="center", color=xy_ticklabel_color,  size=11
            )
    
    for bar in ax.patches[2::3]:
        print(year)
        ax.text(
            bar.get_width() ,
            bar.get_height()+0.5 + bar.get_y(),
            year, size = 16, weight = "bold",
            ha='center',va="center", color=xy_ticklabel_color,  
            )

    ax.set_axis_off()

#blend the coordinate systems: x-axis = figure coordinates, y-axis = data coordinates
trans = blended_transform_factory(x_transform=fig.transFigure, y_transform=ax.transData)

for i, country in enumerate(countries):
    ax.annotate(country, xy=[0.5, i], xycoords=trans, ha='center',  va = "center", color = xy_ticklabel_color)
74 of 100: Radial bar chart in matplotlib

Tornado chart in matplotlib no middle space

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

Add data and columns:

color_dict = {2022: "#A54836", 2004: "#5375D4", }


xy_ticklabel_color, title_color, grid_color, datalabels_color ='#101628',"#101628", "#C8C9C9", "#101628"

data = {
    "year": [2004, 2022, 2004, 2022, 2004, 2022],
    "countries" : ["Sweden", "Sweden", "Denmark", "Denmark", "Norway", "Norway"],
    "sites": [13,15,4,10,5,8]
}

df= pd.DataFrame(data)
#custom sort
sort_order_dict = {"Denmark":2, "Sweden":3, "Norway":1, 2004:4, 2022:5}
df = df.sort_values(by=['year','countries',], key=lambda x: x.map(sort_order_dict))
#map the colors of a dict to a dataframe
df['color'] = df.set_index(['year', 'countries']).index.map(color_dict.get)
#map the colors of a dict to a dataframe
df['color']= df.year.map(color_dict)
df
yearcountriessitescolor
42004Norway5#5375D4
22004Denmark4#5375D4
02004Sweden13#5375D4
52022Norway8#A54836
32022Denmark10#A54836
12022Sweden15#A54836

Define variables

years = df.year.unique()
sites = df.sites
countries = df.countries.unique()
colors = df.color.unique()

Plot the chart

fig, ax = plt.subplots(figsize=(10, 4))

direction = [1,-1]
for year, dir, color in zip(years, direction, colors): 
    temp_df = df[df.year == year]
    ax.barh(temp_df.countries, temp_df.sites*dir, align='center', height = 0.6,facecolor=color,)

offset_labels = [1]*3 + [-1]*3
for bar, site, off in zip(ax.patches, sites, offset_labels):
    ax.text(
        bar.get_x() + bar.get_width() +off ,
        bar.get_height()/2 + bar.get_y(),
        site,
        ha='center',va="center", color=xy_ticklabel_color,  size=11
          )

# Show sum on each stacked bar
for bar, country in zip(ax.patches, countries):
    width = bar.get_width()
    label_y = bar.get_y() + bar.get_height() +0.05
    ax.text(0, label_y, s=f'{country}', size = 11, color = xy_ticklabel_color)

ha = ["right", "left"]
for bar,year, ha in zip(ax.patches[2::3], years, ha):
    print(bar)
    ax.text(bar.get_x() + bar.get_width() ,
            bar.get_height() + bar.get_y()+0.3,
            year, size = 16, weight = "bold", ha= ha)


 
ax.set_axis_off()
74 of 100: Radial bar chart in matplotlib

Not quite a tornado chart but useful…

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

Add data and columns:

color_dict = {2022: "#A54836", 2004: "#5375D4", }


xy_ticklabel_color, title_color, grid_color, datalabels_color ='#101628',"#101628", "#C8C9C9", "#101628"

data = {
    "year": [2004, 2022, 2004, 2022, 2004, 2022],
    "countries" : ["Sweden", "Sweden", "Denmark", "Denmark", "Norway", "Norway"],
    "sites": [13,15,4,10,5,8]
}

df= pd.DataFrame(data)
#custom sort
sort_order_dict = {"Denmark":2, "Sweden":3, "Norway":1, 2004:4, 2022:5}
df = df.sort_values(by=['year','countries',], key=lambda x: x.map(sort_order_dict))
#map the colors of a dict to a dataframe
df['color'] = df.set_index(['year', 'countries']).index.map(color_dict.get)
#map the colors of a dict to a dataframe
df['color']= df.year.map(color_dict)
df
yearcountriessitescolor
42004Norway5#5375D4
22004Denmark4#5375D4
02004Sweden13#5375D4
52022Norway8#A54836
32022Denmark10#A54836
12022Sweden15#A54836

Define variables

countries = df.countries.unique()
sites = df.sites
colors = df.color
color_legend = df.color[::3]
y_axis = np.array(list(range(1,len(countries)+1))*2, dtype=float)
y_axis[0:3] += 0.2

Plot the chart

fig, ax = plt.subplots(figsize=(7,5),facecolor = "#FFFFFF")

ax.barh( y_axis, df.sites, height = 0.5, color = colors,zorder= 2)

for bar in ax.patches:
    print(bar)
    ax.text(
        0.1  ,
        bar.get_height()-0.15 + bar.get_y(),
        bar.get_width() , size = 10, color = "w" )
    
#we remove all axis and place the y-axis text manually  
for i, country in enumerate(countries):
    ax.text(-4, i+1, country, ha='left', verticalalignment='center',color = xy_ticklabel_color, size = 12)



lines = [Line2D([0], [0], color=c,  marker='s', mec="w", linestyle='', markersize=12,) for c in color_legend]

plt.legend(lines, df.year.unique(), labelcolor= xy_ticklabel_color,
           bbox_to_anchor=(0.5, -0.3), loc="lower center",
            ncols = 3,frameon=False, fontsize= 10)
    
ax.set_axis_off()
27 of 100: Clustered bar chart in matplotlib
Was this helpful?

Reader Interactions

Leave a Reply

Your email address will not be published. Required fields are marked *

Table of Contents