BBC Visual and Data Journalism Cookbook for Lets-Plot

The notebook is based on this page but with Lets-Plot instead of ggplot2.

Data is extracted from the gapminder R package.

The cookbook below should hopefully help anyone who wants to make graphics like these:

pip install lets-plot
9.0s
import pandas as pd
from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html(isolated_frame=True)
1.4s
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/gapminder.csv")
df.head()
0.4s
def lets_plot_bbc_style():
    e_text = element_text(color='#222222')
    return theme(
        plot_title=e_text,
        legend_position='top',
        legend_title='blank',
        legend_text=e_text,
        axis_title='blank',
        axis_text=e_text,
        axis_ticks='blank',
        axis_line='blank',
        panel_grid_minor='blank',
        panel_grid_major_y=element_line(color='#CBCBCB'),
        panel_grid_major_x='blank',
        panel_background='blank',
        strip_background=element_rect(fill='white')
    )
0.1s

Make a line chart

line_df = df[df.country == "Malawi"]
ggplot(line_df, aes('year', 'lifeExp')) + \
    geom_line(color='#1380A1', size=1) + \
    geom_hline(yintercept=0, size=1, color='#333333') + \
    lets_plot_bbc_style() + \
    labs(title="Living longer: Life expectancy in Malawi 1952-2007")
0.5s
line_df = df[df.country == "China"]
ggplot(line_df, aes('year', 'lifeExp')) + \
    geom_line(color='#1380A1', size=1) + \
    geom_hline(yintercept=0, size=1, color='#333333') + \
    lets_plot_bbc_style() + \
    labs(title="Living longer: Life expectancy in China 1952-2007")
0.4s

Make a multiple line chart

multiple_line_df = df[df.country.isin(["China", "United States"])]
multiple_line_plot = ggplot(multiple_line_df, aes('year', 'lifeExp', color='country')) + \
    geom_line(size=1) + \
    geom_hline(yintercept=0, size=1, colour='#333333') + \
    scale_y_continuous(breaks=list(range(0, 81, 20))) + \
    scale_color_manual(values=['#FAAB18', '#1380A1']) + \
    lets_plot_bbc_style() + \
    labs(title="Living longer: Life expectancy in China and the US")
multiple_line_plot
0.6s

Make a bar chart

bar_df = df[(df.year == 2007)&(df.continent == "Africa")]\
           .sort_values(by=['lifeExp'], ascending=False).head(5)
bars_plot = ggplot(bar_df, aes(as_discrete('country', order=1), 'lifeExp')) + \
    geom_bar(stat='identity', position='identity', fill='#1380A1') + \
    geom_hline(yintercept=0, size=2, color='#333333') + \
    lets_plot_bbc_style() + \
    labs(title="Reunion is highest: Highest African life expectancy, 2007")
bars_plot
0.6s

Make a stacked bar chart

stacked_df = df[df.year == 2007].assign(
    lifeExpGrouped=lambda df: pd.cut(
        df.lifeExp, bins=pd.IntervalIndex.from_tuples([(0, 50), (50, 65), (65, 80), (80, 90)])
    ).cat.rename_categories(["Under 50", "50-65", "65-80", "80+"])
).rename(
    columns={'pop': 'continentPop'}
).groupby(['continent', 'lifeExpGrouped']).continentPop.sum().reset_index().query('continentPop > 0').assign(
    continentPopPercentage=lambda df: 100 * df.groupby('continent').continentPop.apply(
        lambda x: x / float(x.sum())
    )
)
ggplot(stacked_df, aes('continent', 'continentPopPercentage', fill='lifeExpGrouped')) + \
    geom_bar(stat='identity', tooltips=layer_tooltips().line('@continentPopPercentage')) + \
    lets_plot_bbc_style() + \
    scale_y_continuous(breaks=list(range(0, 101, 25)), format='{d}%') + \
    scale_fill_brewer(type='qual', palette='Set1', direction=-1) + \
    geom_hline(yintercept=0, size=2, color='#333333') + \
    labs(title="How life expectancy varies: % of population by life expectancy band, 2007")
0.6s

Make a grouped bar chart

grouped_bar_df = pd.melt(
    df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot(
        index='country', columns='year', values='lifeExp'
    ).assign(
        gap=lambda df: df[2007] - df[1967]
    ).sort_values(
        by='gap', ascending=False
    ).head(5).reset_index(),
    id_vars=['country', 'gap'],
    value_vars=[1967, 2007],
    value_name='lifeExp'
)
ggplot(grouped_bar_df, aes(as_discrete('country', order=1), 'lifeExp', \
                           group='year', fill=as_discrete('year'))) + \
    geom_bar(stat='identity', position='dodge') + \
    geom_hline(yintercept=0, size=2, color='#333333') + \
    lets_plot_bbc_style() + \
    scale_fill_manual(values=['#1380A1', '#FAAB18']) + \
    labs(title="We're living longer: Biggest life expectancy rise, 1967-2007")
0.5s

Make a dumbbell chart

dumbbell_df = df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot(
    index='country', columns='year', values='lifeExp'
).assign(
    gap=lambda df: df[2007] - df[1967]
).sort_values(
    by='gap'
).tail(10).reset_index()
ggplot(dumbbell_df) + \
    geom_segment(aes(x='1967', xend='2007', y='country', yend='country'), \
                 color='#DDDDDD', size=3) + \
    geom_point(aes(x='1967', y='country'), color='#FAAB18', size=5) + \
    geom_point(aes(x='2007', y='country'), color='#1380A1', size=5) + \
    lets_plot_bbc_style() + \
    labs(title="We're living longer: Biggest life expectancy rise, 1967-2007")
0.4s

Make a histogram

hist_df = df[df.year == 2007]
breaks = list(range(40, 91, 10))
labels = ['{0} years'.format(y) if y == 90 else str(y) for y in breaks]
ggplot(hist_df, aes('lifeExp')) + \
    geom_histogram(binwidth=5, color='white', fill='#1380A1') + \
    geom_hline(yintercept=0, size=2, color='#333333') + \
    lets_plot_bbc_style() + \
    scale_x_continuous(limits=[35, 95], breaks=breaks, labels=labels) + \
    labs(title = "How life expectancy varies: Distribution of life expectancy in 2007")
0.5s

Make changes to the legend

Remove the legend

multiple_line_plot + scale_color_manual(values=['#FAAB18', '#1380A1'], guide='none')
0.4s
multiple_line_plot + theme(legend_position='none')
0.4s

Change the position of the legend

multiple_line_plot + theme(legend_position='right')
0.5s

Make changes to the axes

Flip the coordinates of a plot

bars_plot = bars_plot + coord_flip()
bars_plot
0.4s

Change the plot limits

bars_plot + ylim(0, 500)
0.4s

Change the axis text manually

breaks = list(range(0, 81, 20))
labels = ['{0} years'.format(y) if y == 80 else str(y) for y in breaks]
bars_plot = bars_plot + \
    scale_y_continuous(limits=[0, 85], breaks=breaks, labels=labels)
bars_plot
0.5s

Add axis ticks

multiple_line_plot + theme(axis_ticks_x=element_line(color='#333333'))
0.6s

Add annotations

Add an annotation

multiple_line_plot = multiple_line_plot + \
    geom_text(x=1980, y=45, label="I'm an annotation!", \
              hjust=1, vjust=0.5, color='#555555', fill='white', \
              family="Helvetica", size=10)
multiple_line_plot
0.4s
multiple_line_plot = multiple_line_plot + \
    theme(legend_position='none') + \
    xlim(1950, 2011) + \
    geom_text(x=2007, y=79, label="US", \
              hjust=1, vjust=0.5, color='#1380A1', fill='white', \
              family="Helvetica", size=10) + \
    geom_text(x=2007, y=72, label="China", \
              hjust=1, vjust=0.5, color='#FAAB18', fill='white', \
              family="Helvetica", size=10)
multiple_line_plot
0.5s

Add labels based on your data

bars_plot + \
    geom_text(aes('country', 'lifeExp', label='lifeExp'), \
              label_format='d', hjust=0, color='white', \
              family="Helvetica", size=10)
0.4s

Add left-aligned labels to bar charts

bars_plot + \
    geom_text(aes(x='country', label='lifeExp'), y=4, \
              label_format='d', hjust=1, color='white', \
              family="Helvetica", size=10)
0.3s

Add a line

multiple_line_plot + \
    geom_segment(x=1979, y=45, xend=1965, yend=43, color='#555555', size=.5)
0.5s

Add an arrow

multiple_line_plot + \
    geom_segment(x=1979, y=45, xend=1965, yend=43, \
                 color='#555555', size=.5, arrow=arrow())
0.5s

Add a line across the whole plot

multiple_line_plot + \
    geom_hline(yintercept=10, size=1, color='red', linetype='dashed')
0.4s

Work with small multiples

facet_df = df[df.continent != "Americas"].groupby(['continent', 'year']).pop.sum().to_frame().reset_index()
ggplot() + \
    geom_area(aes('year', 'pop', fill='continent'), data=facet_df, color='white') + \
    scale_fill_manual(values=['#FAAB18', '#1380A1', '#990000', '#588300']) + \
    facet_wrap('continent', ncol=5) + \
    scale_y_continuous(breaks=[0, 2000000000, 4000000000], \
                       labels=['0', '2bn', '4bn']) + \
    lets_plot_bbc_style() + \
    geom_hline(yintercept=0, size=2, color='#333333') + \
    theme(legend_position='none', axis_text_x=element_blank()) + \
    labs(title = "Asia's rapid growth: Population growth by continent, 1952-2007") + \
    ggsize(800, 400)
0.5s

Reorder bars by size

ggplot(bar_df, aes(as_discrete('country', order_by='lifeExp', order=1), 'lifeExp')) + \
    geom_bar(stat='identity', position='identity', fill='#1380A1') + \
    geom_hline(yintercept=0, size=2, color='#333333') + \
    scale_y_continuous(breaks=list(range(0, 81, 20))) + \
    lets_plot_bbc_style() + \
    coord_flip() + \
    labs(title="Reunion is highest: Highest African life expectancy, 2007")
0.5s
Runtimes (1)