Artem Smirnov / Apr 15 2024
BBC Visual and Data Journalism Cookbook for Lets-Plot
The notebook is based on this page but with Lets-Plot instead of ggplot2.
Data is extracted from the gapminder R package.
The cookbook below should hopefully help anyone who wants to make graphics like these:

pip install lets-plot==4.3.1rc13.3s
import pandas as pdfrom lets_plot import *from lets_plot.mapping import as_discrete0.0s
LetsPlot.setup_html()0.0s
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/gapminder.csv")df.head()0.1s
line_size = 1.4def bbc_theme(show_x_axis=True): def get_element_text(title=False, subtitle=False, size=21): face = None margin = None if title: size = 33 face = "bold" margin = [11, 0, 0, 0] if subtitle: size = 26 margin = [9, 0, 0, 0] return element_text(family="Helvetica", face=face, size=size, margin=margin) result = theme( plot_title=get_element_text(title=True), plot_subtitle=get_element_text(subtitle=True), legend_position='top', legend_background='blank', legend_title='blank', legend_text=get_element_text(), axis_title='blank', axis_text=get_element_text(), axis_text_x=element_text(margin=[20, 20]), axis_text_y=element_text(margin=[10, 5]), axis_ticks='blank', axis_line=element_line(size=2*line_size) if show_x_axis else 'blank', axis_ontop_x=True, panel_grid_minor='blank', panel_grid_major_y=element_line(size=line_size*6/5, color='#CBCBCB'), panel_grid_major_x='blank', panel_background='blank', strip_text=element_text(size=26, hjust=0), ) if show_x_axis: result += coord_cartesian(ylim=[0, None]) + scale_y_continuous(expand=[.15, 0]) return result0.0s
Make a line chart
line_df = df[df.country == "Malawi"]ggplot(line_df, aes('year', 'lifeExp')) + \ geom_line(color='#1380A1', size=line_size, \ tooltips=layer_tooltips().format("@year", "d")) + \ scale_x_continuous(format='d') + \ bbc_theme() + \ ggsize(600, 450) + \ labs(title="Living longer", subtitle="Life expectancy in Malawi 1952-2007")0.3s
line_df = df[df.country == "China"]ggplot(line_df, aes('year', 'lifeExp')) + \ geom_line(color='#1380A1', size=line_size, \ tooltips=layer_tooltips().format("@year", "d")) + \ scale_x_continuous(format='d') + \ bbc_theme() + \ ggsize(600, 450) + \ labs(title="Living longer", subtitle="Life expectancy in China 1952-2007")0.2s
Make a multiple line chart
multiple_line_df = df[df.country.isin(["China", "United States"])]multiple_line_plot = ggplot(multiple_line_df, aes('year', 'lifeExp', color='country')) + \ geom_line(size=line_size, tooltips=layer_tooltips().format("@year", "d")) + \ scale_x_continuous(format='d') + \ scale_color_manual(values=['#FAAB18', '#1380A1']) + \ bbc_theme() + \ ggsize(600, 450) + \ labs(title="Living longer", subtitle="Life expectancy in China and the US")multiple_line_plot0.3s
Add color scheme (flavor)
multiple_line_plot + \ theme(plot_margin=[20, 30]) + flavor_high_contrast_dark() + \ ggsize(700, 500)0.3s
Make a bar chart
bar_df = df[(df.year == 2007)&(df.continent == "Africa")]\ .sort_values(by=['lifeExp'], ascending=False).head(5)bars_plot = ggplot(bar_df, aes(as_discrete('country', order=1), 'lifeExp')) + \ geom_bar(stat='identity', position='identity', fill='#1380A1') + \ bbc_theme() + \ ggsize(640, 480) + \ labs(title="Reunion is highest", subtitle="Highest African life expectancy, 2007")bars_plot0.3s
Make a stacked bar chart
stacked_df = df[df.year == 2007].assign( lifeExpGrouped=lambda df: pd.cut( df.lifeExp, bins=pd.IntervalIndex.from_tuples([(0, 50), (50, 65), (65, 80), (80, 90)]) ).cat.rename_categories(["Under 50", "50-65", "65-80", "80+"])).rename( columns={'pop': 'continentPop'}).groupby(['continent', 'lifeExpGrouped'], observed=False).continentPop.sum().reset_index().query('continentPop > 0').assign( continentPopPercentage=lambda df: 100 * df.groupby('continent', group_keys=True).continentPop.apply( lambda x: x / float(x.sum()) ).values)ggplot(stacked_df, aes('continent', 'continentPopPercentage', fill='lifeExpGrouped')) + \ geom_bar(stat='identity', size=0, tooltips=layer_tooltips().line('@continentPop')\ .format('@continentPop', ',d')) + \ scale_y_continuous(breaks=list(range(0, 101, 25)), format='{d}%') + \ scale_fill_viridis() + \ bbc_theme() + \ theme(legend_justification=[0, 1], legend_position=[-.02, 1.02], legend_direction='horizontal') + \ ggsize(640, 480) + \ labs(title="How life expectancy varies", subtitle="% of population by life expectancy band, 2007")0.3s
Make a grouped bar chart
grouped_bar_df = pd.melt( df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot( index='country', columns='year', values='lifeExp' ).assign( gap=lambda df: df[2007] - df[1967] ).sort_values( by='gap', ascending=False ).head(5).reset_index(), id_vars=['country', 'gap'], value_vars=[1967, 2007], value_name='lifeExp')ggplot(grouped_bar_df, aes(as_discrete('country', order=1), 'lifeExp', \ group='year', fill=as_discrete('year'))) + \ geom_bar(stat='identity', position='dodge') + \ bbc_theme() + \ scale_fill_manual(values=['#1380A1', '#FAAB18']) + \ ggsize(720, 480) + \ labs(title="We're living longer", subtitle="Biggest life expectancy rise, 1967-2007")0.3s
Make a dumbbell chart
dumbbell_df = df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot( index='country', columns='year', values='lifeExp').assign( gap=lambda df: df[2007] - df[1967]).sort_values( by='gap').tail(10).reset_index()dumbbell_df.columns = dumbbell_df.columns.map(str)ggplot(dumbbell_df) + \ geom_segment(aes(x='1967', xend='2007', y='country', yend='country'), \ color='#DDDDDD', size=3) + \ geom_point(aes(x='1967', y='country'), color='#FAAB18', size=5) + \ geom_point(aes(x='2007', y='country'), color='#1380A1', size=5) + \ bbc_theme(show_x_axis=False) + \ ggsize(640, 400) + \ labs(title="We're living longer", subtitle="Biggest life expectancy rise, 1967-2007")0.3s
Make a histogram
hist_df = df[df.year == 2007]breaks = list(range(40, 91, 10))labels = ['{0} years'.format(y) if y == 90 else str(y) for y in breaks]ggplot(hist_df, aes('lifeExp')) + \ geom_histogram(binwidth=5, color='white', fill='#1380A1') + \ scale_x_continuous(limits=[35, 95], breaks=breaks, labels=labels) + \ bbc_theme() + \ ggsize(600, 450) + \ labs(title = "How life expectancy varies", subtitle="Distribution of life expectancy in 2007")0.2s
Make changes to the legend
Remove the legend
multiple_line_plot + scale_color_manual(values=['#FAAB18', '#1380A1'], guide='none')0.3s
multiple_line_plot + theme(legend_position='none')0.2s
Change the position of the legend
multiple_line_plot + theme(legend_position='right')0.2s
Make changes to the axes
Flip the coordinates of a plot
bars_plot = bars_plot + coord_flip()bars_plot0.2s
Change the plot limits
bars_plot + ylim(0, 500)0.2s
Change the axis text manually
breaks = list(range(0, 81, 20))labels = ['{0} years'.format(y) if y == 80 else str(y) for y in breaks]bars_plot = bars_plot + \ scale_y_continuous(limits=[0, 85], breaks=breaks, labels=labels)bars_plot0.2s
Add axis ticks
multiple_line_plot + \ theme(axis_ticks_x=element_line(color='#333333'), axis_ticks_length_x=10)0.3s
Add annotations
Add an annotation
multiple_line_plot + \ geom_text(x=1980, y=45, label="I'm an annotation!", \ hjust=0, vjust=0.5, color='#555555', fill='white', \ family="Helvetica", size=10)0.2s
multiple_line_plot = multiple_line_plot + \ geom_label(x=1980, y=45, label="I'm quite a long\nannotation over\nthree rows", \ hjust=0, vjust=0.5, color='#555555', fill='white', \ family="Helvetica", size=10, label_size=0) + \ theme(legend_position='none') + \ xlim(1950, 2011) + \ geom_label(x=2007, y=79, label="US", \ hjust=0, vjust=0.5, color='#1380A1', fill='white', \ family="Helvetica", size=10, label_size=0) + \ geom_label(x=2007, y=72, label="China", \ hjust=0, vjust=0.5, color='#FAAB18', fill='white', \ family="Helvetica", size=10, label_size=0)multiple_line_plot0.2s
Add labels based on your data
bars_plot + \ geom_text(aes('country', 'lifeExp', label='lifeExp'), \ label_format='d', hjust=1, nudge_y=-1.5, color='white', \ family="Helvetica", size=10)0.3s
Add left-aligned labels to bar charts
bars_plot + \ geom_text(aes(x='country', label='lifeExp'), y=4, \ label_format='d', hjust=0, color='white', \ family="Helvetica", size=10)0.3s
Add a line
multiple_line_plot + \ geom_segment(x=1979, y=45, xend=1965, yend=43, color='#555555', size=line_size*3/4)0.2s
Add a curved line
multiple_line_plot + \ geom_curve(x=1979, y=45, xend=1965, yend=43, color='#555555', \ curvature = -0.2, size=line_size*3/4)0.2s
Add an arrow
multiple_line_plot + \ geom_curve(x=1979, y=45, xend=1965, yend=43, color='#555555', \ curvature = -0.2, size=line_size*3/4, arrow=arrow())0.2s
Add a line across the whole plot
multiple_line_plot + \ geom_hline(yintercept=10, size=line_size, color='red', linetype='dashed')0.2s
Work with small multiples
Facets
facet_df = df[df.continent != "Americas"].groupby(['continent', 'year']).pop.sum().to_frame().reset_index()ggplot() + \ geom_area(aes('year', 'pop', fill='continent'), data=facet_df, size=0) + \ scale_fill_manual(values=['#FAAB18', '#1380A1', '#990000', '#588300']) + \ facet_wrap('continent', ncol=5) + \ scale_y_continuous(breaks=[0, 2000000000, 4000000000], \ labels=['0', '2bn', '4bn'], \ limits=[0, 4000000010]) + \ bbc_theme() + \ theme(legend_position='none', axis_text_x=element_blank()) + \ ggsize(600, 420) + \ labs(title="Asia's rapid growth", subtitle="Population growth by continent, 1952-2007")0.3s
Free scales
ggplot() + \ geom_area(aes('year', 'pop', fill='continent'), data=facet_df, size=0) + \ scale_fill_manual(values=['#FAAB18', '#1380A1', '#990000', '#588300']) + \ facet_wrap('continent', scales='free') + \ bbc_theme() + \ theme(legend_position='none', axis_text_x=element_blank(), axis_text_y=element_blank()) + \ ggsize(600, 400) + \ labs(title="It's all relative", subtitle="Relative population growth by continent, 1952-2007")0.3s
Do something else entirely
Increase or decrease margins
bars_plot + theme(plot_subtitle=element_text(margin=[0, 0, 75, 0]))0.2s
Reorder bars by size
ggplot(bar_df, aes(as_discrete('country', order_by='lifeExp', order=1), 'lifeExp')) + \ geom_bar(stat='identity', position='identity', fill='#1380A1') + \ bbc_theme() + \ ggsize(600, 450) + \ coord_flip() + \ labs(title="Reunion is highest", subtitle="Highest African life expectancy, 2007")0.3s