Artem Smirnov / Apr 15 2024
BBC Visual and Data Journalism Cookbook for Lets-Plot
The notebook is based on this page but with Lets-Plot instead of ggplot2.
Data is extracted from the gapminder R package.
The cookbook below should hopefully help anyone who wants to make graphics like these:
pip install lets-plot==4.3.1rc1
3.3s
import pandas as pd
from lets_plot import *
from lets_plot.mapping import as_discrete
0.0s
LetsPlot.setup_html()
0.0s
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/gapminder.csv")
df.head()
0.1s
line_size = 1.4
def bbc_theme(show_x_axis=True):
def get_element_text(title=False, subtitle=False, size=21):
face = None
margin = None
if title:
size = 33
face = "bold"
margin = [11, 0, 0, 0]
if subtitle:
size = 26
margin = [9, 0, 0, 0]
return element_text(family="Helvetica", face=face, size=size, margin=margin)
result = theme(
plot_title=get_element_text(title=True),
plot_subtitle=get_element_text(subtitle=True),
legend_position='top',
legend_background='blank',
legend_title='blank',
legend_text=get_element_text(),
axis_title='blank',
axis_text=get_element_text(),
axis_text_x=element_text(margin=[20, 20]),
axis_text_y=element_text(margin=[10, 5]),
axis_ticks='blank',
axis_line=element_line(size=2*line_size) if show_x_axis else 'blank',
axis_ontop_x=True,
panel_grid_minor='blank',
panel_grid_major_y=element_line(size=line_size*6/5, color='#CBCBCB'),
panel_grid_major_x='blank',
panel_background='blank',
strip_text=element_text(size=26, hjust=0),
)
if show_x_axis:
result += coord_cartesian(ylim=[0, None]) + scale_y_continuous(expand=[.15, 0])
return result
0.0s
Make a line chart
line_df = df[df.country == "Malawi"]
ggplot(line_df, aes('year', 'lifeExp')) + \
geom_line(color='#1380A1', size=line_size, \
tooltips=layer_tooltips().format("@year", "d")) + \
scale_x_continuous(format='d') + \
bbc_theme() + \
ggsize(600, 450) + \
labs(title="Living longer", subtitle="Life expectancy in Malawi 1952-2007")
0.3s
line_df = df[df.country == "China"]
ggplot(line_df, aes('year', 'lifeExp')) + \
geom_line(color='#1380A1', size=line_size, \
tooltips=layer_tooltips().format("@year", "d")) + \
scale_x_continuous(format='d') + \
bbc_theme() + \
ggsize(600, 450) + \
labs(title="Living longer", subtitle="Life expectancy in China 1952-2007")
0.2s
Make a multiple line chart
multiple_line_df = df[df.country.isin(["China", "United States"])]
multiple_line_plot = ggplot(multiple_line_df, aes('year', 'lifeExp', color='country')) + \
geom_line(size=line_size, tooltips=layer_tooltips().format("@year", "d")) + \
scale_x_continuous(format='d') + \
scale_color_manual(values=['#FAAB18', '#1380A1']) + \
bbc_theme() + \
ggsize(600, 450) + \
labs(title="Living longer", subtitle="Life expectancy in China and the US")
multiple_line_plot
0.3s
Add color scheme (flavor)
multiple_line_plot + \
theme(plot_margin=[20, 30]) + flavor_high_contrast_dark() + \
ggsize(700, 500)
0.3s
Make a bar chart
bar_df = df[(df.year == 2007)&(df.continent == "Africa")]\
.sort_values(by=['lifeExp'], ascending=False).head(5)
bars_plot = ggplot(bar_df, aes(as_discrete('country', order=1), 'lifeExp')) + \
geom_bar(stat='identity', position='identity', fill='#1380A1') + \
bbc_theme() + \
ggsize(640, 480) + \
labs(title="Reunion is highest", subtitle="Highest African life expectancy, 2007")
bars_plot
0.3s
Make a stacked bar chart
stacked_df = df[df.year == 2007].assign(
lifeExpGrouped=lambda df: pd.cut(
df.lifeExp, bins=pd.IntervalIndex.from_tuples([(0, 50), (50, 65), (65, 80), (80, 90)])
).cat.rename_categories(["Under 50", "50-65", "65-80", "80+"])
).rename(
columns={'pop': 'continentPop'}
).groupby(['continent', 'lifeExpGrouped'], observed=False).continentPop.sum().reset_index().query('continentPop > 0').assign(
continentPopPercentage=lambda df: 100 * df.groupby('continent', group_keys=True).continentPop.apply(
lambda x: x / float(x.sum())
).values
)
ggplot(stacked_df, aes('continent', 'continentPopPercentage', fill='lifeExpGrouped')) + \
geom_bar(stat='identity', size=0, tooltips=layer_tooltips().line('@continentPop')\
.format('@continentPop', ',d')) + \
scale_y_continuous(breaks=list(range(0, 101, 25)), format='{d}%') + \
scale_fill_viridis() + \
bbc_theme() + \
theme(legend_justification=[0, 1], legend_position=[-.02, 1.02], legend_direction='horizontal') + \
ggsize(640, 480) + \
labs(title="How life expectancy varies", subtitle="% of population by life expectancy band, 2007")
0.3s
Make a grouped bar chart
grouped_bar_df = pd.melt(
df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot(
index='country', columns='year', values='lifeExp'
).assign(
gap=lambda df: df[2007] - df[1967]
).sort_values(
by='gap', ascending=False
).head(5).reset_index(),
id_vars=['country', 'gap'],
value_vars=[1967, 2007],
value_name='lifeExp'
)
ggplot(grouped_bar_df, aes(as_discrete('country', order=1), 'lifeExp', \
group='year', fill=as_discrete('year'))) + \
geom_bar(stat='identity', position='dodge') + \
bbc_theme() + \
scale_fill_manual(values=['#1380A1', '#FAAB18']) + \
ggsize(720, 480) + \
labs(title="We're living longer", subtitle="Biggest life expectancy rise, 1967-2007")
0.3s
Make a dumbbell chart
dumbbell_df = df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot(
index='country', columns='year', values='lifeExp'
).assign(
gap=lambda df: df[2007] - df[1967]
).sort_values(
by='gap'
).tail(10).reset_index()
dumbbell_df.columns = dumbbell_df.columns.map(str)
ggplot(dumbbell_df) + \
geom_segment(aes(x='1967', xend='2007', y='country', yend='country'), \
color='#DDDDDD', size=3) + \
geom_point(aes(x='1967', y='country'), color='#FAAB18', size=5) + \
geom_point(aes(x='2007', y='country'), color='#1380A1', size=5) + \
bbc_theme(show_x_axis=False) + \
ggsize(640, 400) + \
labs(title="We're living longer", subtitle="Biggest life expectancy rise, 1967-2007")
0.3s
Make a histogram
hist_df = df[df.year == 2007]
breaks = list(range(40, 91, 10))
labels = ['{0} years'.format(y) if y == 90 else str(y) for y in breaks]
ggplot(hist_df, aes('lifeExp')) + \
geom_histogram(binwidth=5, color='white', fill='#1380A1') + \
scale_x_continuous(limits=[35, 95], breaks=breaks, labels=labels) + \
bbc_theme() + \
ggsize(600, 450) + \
labs(title = "How life expectancy varies", subtitle="Distribution of life expectancy in 2007")
0.2s
Make changes to the legend
Remove the legend
multiple_line_plot + scale_color_manual(values=['#FAAB18', '#1380A1'], guide='none')
0.3s
multiple_line_plot + theme(legend_position='none')
0.2s
Change the position of the legend
multiple_line_plot + theme(legend_position='right')
0.2s
Make changes to the axes
Flip the coordinates of a plot
bars_plot = bars_plot + coord_flip()
bars_plot
0.2s
Change the plot limits
bars_plot + ylim(0, 500)
0.2s
Change the axis text manually
breaks = list(range(0, 81, 20))
labels = ['{0} years'.format(y) if y == 80 else str(y) for y in breaks]
bars_plot = bars_plot + \
scale_y_continuous(limits=[0, 85], breaks=breaks, labels=labels)
bars_plot
0.2s
Add axis ticks
multiple_line_plot + \
theme(axis_ticks_x=element_line(color='#333333'), axis_ticks_length_x=10)
0.3s
Add annotations
Add an annotation
multiple_line_plot + \
geom_text(x=1980, y=45, label="I'm an annotation!", \
hjust=0, vjust=0.5, color='#555555', fill='white', \
family="Helvetica", size=10)
0.2s
multiple_line_plot = multiple_line_plot + \
geom_label(x=1980, y=45, label="I'm quite a long\nannotation over\nthree rows", \
hjust=0, vjust=0.5, color='#555555', fill='white', \
family="Helvetica", size=10, label_size=0) + \
theme(legend_position='none') + \
xlim(1950, 2011) + \
geom_label(x=2007, y=79, label="US", \
hjust=0, vjust=0.5, color='#1380A1', fill='white', \
family="Helvetica", size=10, label_size=0) + \
geom_label(x=2007, y=72, label="China", \
hjust=0, vjust=0.5, color='#FAAB18', fill='white', \
family="Helvetica", size=10, label_size=0)
multiple_line_plot
0.2s
Add labels based on your data
bars_plot + \
geom_text(aes('country', 'lifeExp', label='lifeExp'), \
label_format='d', hjust=1, nudge_y=-1.5, color='white', \
family="Helvetica", size=10)
0.3s
Add left-aligned labels to bar charts
bars_plot + \
geom_text(aes(x='country', label='lifeExp'), y=4, \
label_format='d', hjust=0, color='white', \
family="Helvetica", size=10)
0.3s
Add a line
multiple_line_plot + \
geom_segment(x=1979, y=45, xend=1965, yend=43, color='#555555', size=line_size*3/4)
0.2s
Add a curved line
multiple_line_plot + \
geom_curve(x=1979, y=45, xend=1965, yend=43, color='#555555', \
curvature = -0.2, size=line_size*3/4)
0.2s
Add an arrow
multiple_line_plot + \
geom_curve(x=1979, y=45, xend=1965, yend=43, color='#555555', \
curvature = -0.2, size=line_size*3/4, arrow=arrow())
0.2s
Add a line across the whole plot
multiple_line_plot + \
geom_hline(yintercept=10, size=line_size, color='red', linetype='dashed')
0.2s
Work with small multiples
Facets
facet_df = df[df.continent != "Americas"].groupby(['continent', 'year']).pop.sum().to_frame().reset_index()
ggplot() + \
geom_area(aes('year', 'pop', fill='continent'), data=facet_df, size=0) + \
scale_fill_manual(values=['#FAAB18', '#1380A1', '#990000', '#588300']) + \
facet_wrap('continent', ncol=5) + \
scale_y_continuous(breaks=[0, 2000000000, 4000000000], \
labels=['0', '2bn', '4bn'], \
limits=[0, 4000000010]) + \
bbc_theme() + \
theme(legend_position='none', axis_text_x=element_blank()) + \
ggsize(600, 420) + \
labs(title="Asia's rapid growth", subtitle="Population growth by continent, 1952-2007")
0.3s
Free scales
ggplot() + \
geom_area(aes('year', 'pop', fill='continent'), data=facet_df, size=0) + \
scale_fill_manual(values=['#FAAB18', '#1380A1', '#990000', '#588300']) + \
facet_wrap('continent', scales='free') + \
bbc_theme() + \
theme(legend_position='none', axis_text_x=element_blank(), axis_text_y=element_blank()) + \
ggsize(600, 400) + \
labs(title="It's all relative", subtitle="Relative population growth by continent, 1952-2007")
0.3s
Do something else entirely
Increase or decrease margins
bars_plot + theme(plot_subtitle=element_text(margin=[0, 0, 75, 0]))
0.2s
Reorder bars by size
ggplot(bar_df, aes(as_discrete('country', order_by='lifeExp', order=1), 'lifeExp')) + \
geom_bar(stat='identity', position='identity', fill='#1380A1') + \
bbc_theme() + \
ggsize(600, 450) + \
coord_flip() + \
labs(title="Reunion is highest", subtitle="Highest African life expectancy, 2007")
0.3s