BBC Visual and Data Journalism Cookbook for Lets-Plot

The notebook is based on this page but with Lets-Plot instead of ggplot2.

Data is extracted from the gapminder R package.

The cookbook below should hopefully help anyone who wants to make graphics like these:

pip install lets-plot==4.3.1rc1
3.3s
import pandas as pd
from lets_plot import *
from lets_plot.mapping import as_discrete
0.0s
LetsPlot.setup_html()
0.0s
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/gapminder.csv")
df.head()
0.1s
line_size = 1.4
def bbc_theme(show_x_axis=True):
    def get_element_text(title=False, subtitle=False, size=21):
        face = None
        margin = None
        if title:
            size = 33
            face = "bold"
            margin = [11, 0, 0, 0]
        if subtitle:
            size = 26
            margin = [9, 0, 0, 0]
        return element_text(family="Helvetica", face=face, size=size, margin=margin)
    result = theme(
        plot_title=get_element_text(title=True),
        plot_subtitle=get_element_text(subtitle=True),
        legend_position='top',
        legend_background='blank',
        legend_title='blank',
        legend_text=get_element_text(),
        axis_title='blank',
        axis_text=get_element_text(),
        axis_text_x=element_text(margin=[20, 20]),
        axis_text_y=element_text(margin=[10, 5]),
        axis_ticks='blank',
        axis_line=element_line(size=2*line_size) if show_x_axis else 'blank',
        axis_ontop_x=True,
        panel_grid_minor='blank',
        panel_grid_major_y=element_line(size=line_size*6/5, color='#CBCBCB'),
        panel_grid_major_x='blank',
        panel_background='blank',
        strip_text=element_text(size=26, hjust=0),
    )
    if show_x_axis:
        result += coord_cartesian(ylim=[0, None]) + scale_y_continuous(expand=[.15, 0])
    return result
0.0s

Make a line chart

line_df = df[df.country == "Malawi"]
ggplot(line_df, aes('year', 'lifeExp')) + \
    geom_line(color='#1380A1', size=line_size, \
              tooltips=layer_tooltips().format("@year", "d")) + \
    scale_x_continuous(format='d') + \
    bbc_theme() + \
    ggsize(600, 450) + \
    labs(title="Living longer", subtitle="Life expectancy in Malawi 1952-2007")
0.3s
line_df = df[df.country == "China"]
ggplot(line_df, aes('year', 'lifeExp')) + \
    geom_line(color='#1380A1', size=line_size, \
              tooltips=layer_tooltips().format("@year", "d")) + \
    scale_x_continuous(format='d') + \
    bbc_theme() + \
    ggsize(600, 450) + \
    labs(title="Living longer", subtitle="Life expectancy in China 1952-2007")
0.2s

Make a multiple line chart

multiple_line_df = df[df.country.isin(["China", "United States"])]
multiple_line_plot = ggplot(multiple_line_df, aes('year', 'lifeExp', color='country')) + \
    geom_line(size=line_size, tooltips=layer_tooltips().format("@year", "d")) + \
    scale_x_continuous(format='d') + \
    scale_color_manual(values=['#FAAB18', '#1380A1']) + \
    bbc_theme() + \
    ggsize(600, 450) + \
    labs(title="Living longer", subtitle="Life expectancy in China and the US")
multiple_line_plot
0.3s

Add color scheme (flavor)

multiple_line_plot + \
    theme(plot_margin=[20, 30]) + flavor_high_contrast_dark() + \
    ggsize(700, 500)
0.3s

Make a bar chart

bar_df = df[(df.year == 2007)&(df.continent == "Africa")]\
           .sort_values(by=['lifeExp'], ascending=False).head(5)
bars_plot = ggplot(bar_df, aes(as_discrete('country', order=1), 'lifeExp')) + \
    geom_bar(stat='identity', position='identity', fill='#1380A1') + \
    bbc_theme() + \
    ggsize(640, 480) + \
    labs(title="Reunion is highest", subtitle="Highest African life expectancy, 2007")
bars_plot
0.3s

Make a stacked bar chart

stacked_df = df[df.year == 2007].assign(
    lifeExpGrouped=lambda df: pd.cut(
        df.lifeExp, bins=pd.IntervalIndex.from_tuples([(0, 50), (50, 65), (65, 80), (80, 90)])
    ).cat.rename_categories(["Under 50", "50-65", "65-80", "80+"])
).rename(
    columns={'pop': 'continentPop'}
).groupby(['continent', 'lifeExpGrouped'], observed=False).continentPop.sum().reset_index().query('continentPop > 0').assign(
    continentPopPercentage=lambda df: 100 * df.groupby('continent', group_keys=True).continentPop.apply(
        lambda x: x / float(x.sum())
    ).values
)
ggplot(stacked_df, aes('continent', 'continentPopPercentage', fill='lifeExpGrouped')) + \
    geom_bar(stat='identity', size=0, tooltips=layer_tooltips().line('@continentPop')\
                                                               .format('@continentPop', ',d')) + \
    scale_y_continuous(breaks=list(range(0, 101, 25)), format='{d}%') + \
    scale_fill_viridis() + \
    bbc_theme() + \
    theme(legend_justification=[0, 1], legend_position=[-.02, 1.02], legend_direction='horizontal') + \
    ggsize(640, 480) + \
    labs(title="How life expectancy varies", subtitle="% of population by life expectancy band, 2007")
0.3s

Make a grouped bar chart

grouped_bar_df = pd.melt(
    df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot(
        index='country', columns='year', values='lifeExp'
    ).assign(
        gap=lambda df: df[2007] - df[1967]
    ).sort_values(
        by='gap', ascending=False
    ).head(5).reset_index(),
    id_vars=['country', 'gap'],
    value_vars=[1967, 2007],
    value_name='lifeExp'
)
ggplot(grouped_bar_df, aes(as_discrete('country', order=1), 'lifeExp', \
                           group='year', fill=as_discrete('year'))) + \
    geom_bar(stat='identity', position='dodge') + \
    bbc_theme() + \
    scale_fill_manual(values=['#1380A1', '#FAAB18']) + \
    ggsize(720, 480) + \
    labs(title="We're living longer", subtitle="Biggest life expectancy rise, 1967-2007")
0.3s

Make a dumbbell chart

dumbbell_df = df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot(
    index='country', columns='year', values='lifeExp'
).assign(
    gap=lambda df: df[2007] - df[1967]
).sort_values(
    by='gap'
).tail(10).reset_index()
dumbbell_df.columns = dumbbell_df.columns.map(str)
ggplot(dumbbell_df) + \
    geom_segment(aes(x='1967', xend='2007', y='country', yend='country'), \
                 color='#DDDDDD', size=3) + \
    geom_point(aes(x='1967', y='country'), color='#FAAB18', size=5) + \
    geom_point(aes(x='2007', y='country'), color='#1380A1', size=5) + \
    bbc_theme(show_x_axis=False) + \
    ggsize(640, 400) + \
    labs(title="We're living longer", subtitle="Biggest life expectancy rise, 1967-2007")
0.3s

Make a histogram

hist_df = df[df.year == 2007]
breaks = list(range(40, 91, 10))
labels = ['{0} years'.format(y) if y == 90 else str(y) for y in breaks]
ggplot(hist_df, aes('lifeExp')) + \
    geom_histogram(binwidth=5, color='white', fill='#1380A1') + \
    scale_x_continuous(limits=[35, 95], breaks=breaks, labels=labels) + \
    bbc_theme() + \
    ggsize(600, 450) + \
    labs(title = "How life expectancy varies", subtitle="Distribution of life expectancy in 2007")
0.2s

Make changes to the legend

Remove the legend

multiple_line_plot + scale_color_manual(values=['#FAAB18', '#1380A1'], guide='none')
0.3s
multiple_line_plot + theme(legend_position='none')
0.2s

Change the position of the legend

multiple_line_plot + theme(legend_position='right')
0.2s

Make changes to the axes

Flip the coordinates of a plot

bars_plot = bars_plot + coord_flip()
bars_plot
0.2s

Change the plot limits

bars_plot + ylim(0, 500)
0.2s

Change the axis text manually

breaks = list(range(0, 81, 20))
labels = ['{0} years'.format(y) if y == 80 else str(y) for y in breaks]
bars_plot = bars_plot + \
    scale_y_continuous(limits=[0, 85], breaks=breaks, labels=labels)
bars_plot
0.2s

Add axis ticks

multiple_line_plot + \
    theme(axis_ticks_x=element_line(color='#333333'), axis_ticks_length_x=10)
0.3s

Add annotations

Add an annotation

multiple_line_plot + \
    geom_text(x=1980, y=45, label="I'm an annotation!", \
              hjust=0, vjust=0.5, color='#555555', fill='white', \
              family="Helvetica", size=10)
0.2s
multiple_line_plot = multiple_line_plot + \
    geom_label(x=1980, y=45, label="I'm quite a long\nannotation over\nthree rows", \
               hjust=0, vjust=0.5, color='#555555', fill='white', \
               family="Helvetica", size=10, label_size=0) + \
    theme(legend_position='none') + \
    xlim(1950, 2011) + \
    geom_label(x=2007, y=79, label="US", \
               hjust=0, vjust=0.5, color='#1380A1', fill='white', \
               family="Helvetica", size=10, label_size=0) + \
    geom_label(x=2007, y=72, label="China", \
               hjust=0, vjust=0.5, color='#FAAB18', fill='white', \
               family="Helvetica", size=10, label_size=0)
multiple_line_plot
0.2s

Add labels based on your data

bars_plot + \
    geom_text(aes('country', 'lifeExp', label='lifeExp'), \
              label_format='d', hjust=1, nudge_y=-1.5, color='white', \
              family="Helvetica", size=10)
0.3s

Add left-aligned labels to bar charts

bars_plot + \
    geom_text(aes(x='country', label='lifeExp'), y=4, \
              label_format='d', hjust=0, color='white', \
              family="Helvetica", size=10)
0.3s

Add a line

multiple_line_plot + \
    geom_segment(x=1979, y=45, xend=1965, yend=43, color='#555555', size=line_size*3/4)
0.2s

Add a curved line

multiple_line_plot + \
    geom_curve(x=1979, y=45, xend=1965, yend=43, color='#555555', \
               curvature = -0.2, size=line_size*3/4)
0.2s

Add an arrow

multiple_line_plot + \
    geom_curve(x=1979, y=45, xend=1965, yend=43, color='#555555', \
               curvature = -0.2, size=line_size*3/4, arrow=arrow())
0.2s

Add a line across the whole plot

multiple_line_plot + \
    geom_hline(yintercept=10, size=line_size, color='red', linetype='dashed')
0.2s

Work with small multiples

Facets
facet_df = df[df.continent != "Americas"].groupby(['continent', 'year']).pop.sum().to_frame().reset_index()
ggplot() + \
    geom_area(aes('year', 'pop', fill='continent'), data=facet_df, size=0) + \
    scale_fill_manual(values=['#FAAB18', '#1380A1', '#990000', '#588300']) + \
    facet_wrap('continent', ncol=5) + \
    scale_y_continuous(breaks=[0, 2000000000, 4000000000], \
                       labels=['0', '2bn', '4bn'], \
                       limits=[0, 4000000010]) + \
    bbc_theme() + \
    theme(legend_position='none', axis_text_x=element_blank()) + \
    ggsize(600, 420) + \
    labs(title="Asia's rapid growth", subtitle="Population growth by continent, 1952-2007")
0.3s
Free scales
ggplot() + \
    geom_area(aes('year', 'pop', fill='continent'), data=facet_df, size=0) + \
    scale_fill_manual(values=['#FAAB18', '#1380A1', '#990000', '#588300']) + \
    facet_wrap('continent', scales='free') + \
    bbc_theme() + \
    theme(legend_position='none', axis_text_x=element_blank(), axis_text_y=element_blank()) + \
    ggsize(600, 400) + \
    labs(title="It's all relative", subtitle="Relative population growth by continent, 1952-2007")
0.3s

Do something else entirely

Increase or decrease margins

bars_plot + theme(plot_subtitle=element_text(margin=[0, 0, 75, 0]))
0.2s

Reorder bars by size

ggplot(bar_df, aes(as_discrete('country', order_by='lifeExp', order=1), 'lifeExp')) + \
    geom_bar(stat='identity', position='identity', fill='#1380A1') + \
    bbc_theme() + \
    ggsize(600, 450) + \
    coord_flip() + \
    labs(title="Reunion is highest", subtitle="Highest African life expectancy, 2007")
0.3s
Runtimes (1)