Micah P. Dombrowski / Mar 18 2020

COVID-19 Growth Analysis

Adapted from analyses by Thomas Wiecki.

Visualizations of the growth of COVID-19.

Data

#hide%matplotlib inlineimport numpy as npimport matplotlib.pyplot as pltimport matplotlibimport pandas as pdimport seaborn as snsimport requestsimport iosns.set_context('talk')plt.style.use('seaborn-whitegrid')

5.4s

Python

#hidedef load_timeseries(name,                     base_url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series'):    # Thanks to kasparthommen for the suggestion to directly download    url = f'{base_url}/time_series_19-covid-{name}.csv'    csv = requests.get(url).text    df = pd.read_csv(io.StringIO(csv),                      index_col=['Country/Region', 'Province/State', 'Lat', 'Long'])    df['type'] = name.lower()    df.columns.name = 'date'        df = (df.set_index('type', append=True)            .reset_index(['Lat', 'Long'], drop=True)            .stack()            .reset_index()            .set_index('date')         )    df.index = pd.to_datetime(df.index)    df.columns = ['country', 'state', 'type', 'cases']        # Move HK to country level    df.loc[df.state =='Hong Kong', 'country'] = 'Hong Kong'    df.loc[df.state =='Hong Kong', 'state'] = np.nan        # Aggregate large countries split by states    df = pd.concat([df,                     (df.loc[~df.state.isna()]                     .groupby(['country', 'date', 'type'])                     .sum()                     .rename(index=lambda x: x+' (total)', level=0)                     .reset_index(level=['country', 'type']))                   ])    return dfdf_confirmed = load_timeseries('Confirmed')# Estimated critical casesp_crit = .05df_confirmed = df_confirmed.assign(cases_crit=df_confirmed.cases*p_crit)# Compute days relative to when 100 confirmed cases was crosseddf_confirmed.loc[:, 'days_since_100'] = np.nanfor country in df_confirmed.country.unique():    df_confirmed.loc[(df_confirmed.country == country), 'days_since_100'] = \        np.arange(-len(df_confirmed.loc[(df_confirmed.country == country) & (df_confirmed.cases < 100)]),                   len(df_confirmed.loc[(df_confirmed.country == country) & (df_confirmed.cases >= 100)]))    annotate_kwargs = dict(    s='Based on COVID Data Repository by Johns Hopkins CSSE ({})\nBy Thomas Wiecki'.format(df_confirmed.index.max().strftime('%B %d, %Y')),     xy=(0.05, 0.01), xycoords='figure fraction', fontsize=10)

3.1s

Python

Info

#hide# Country names seem to change quite a bitfor c in df_confirmed.country.unique():  print(c)

0.5s

Python

Date of last data update.

pd.to_datetime(df_confirmed.index.values[-1]).strftime("%e %B, %Y")

0.0s

Python

'15 March, 2020'

Growth Analysis

#hideeuropean_countries = ['Italy', 'Germany', 'France (total)', 'Spain', 'United Kingdom (total)',                       'Iran']large_engl_countries = ['US (total)', 'Canada (total)', 'Australia (total)']asian_countries = ['Singapore', 'Japan', 'Korea, South', 'Hong Kong']south_american_countries = ['Argentina', 'Brazil', 'Colombia', 'Chile']country_groups = [european_countries, large_engl_countries, asian_countries, south_american_countries]line_styles = ['-', ':', '--', '-.']

0.0s

Python

#collapse-hidedef plot_countries(df, countries, min_cases=100, ls='-', col='cases'):    for country in countries:        df_country = df.loc[(df.country == country) & (df.cases >= min_cases)]        if len(df_country) == 0:            continue        df_country.reset_index()[col].plot(label=country, ls=ls)        sns.set_palette(sns.hls_palette(8, l=.45, s=.8)) # 8 countries maxfig, ax = plt.subplots(figsize=(12, 8))for countries, ls in zip(country_groups, line_styles):    plot_countries(df_confirmed, countries, ls=ls)x = np.linspace(0, plt.xlim()[1] - 1)ax.plot(x, 100 * (1.33) ** x, ls='--', color='k', label='33% daily growth')ax.set(yscale='log',       title='Exponential growth of COVID-19 across countries',       xlabel='Days from first 100 confirmed cases',       ylabel='Confirmed cases (log scale)')ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())ax.legend(bbox_to_anchor=(1.0, 1.0))ax.annotate(**annotate_kwargs)sns.despine();

1.8s

Python

#hide# This creates a preview image for the blog post and home pagefig.savefig('/results/covid-growth.png')

0.3s

Python

#collapse-hidefig, ax = plt.subplots(figsize=(12, 8))for countries, ls in zip(country_groups, line_styles):    plot_countries(df_confirmed, countries, ls=ls)x = np.linspace(0, plt.xlim()[1] - 1)ax.plot(x, 100 * (1.33) ** x, ls='--', color='k', label='33% daily growth')ax.set(title='Exponential growth of COVID-19 across countries',       xlabel='Days from first 100 confirmed cases',       ylabel='Confirmed cases', ylim=(0, 30000))ax.legend(bbox_to_anchor=(1.0, 1.0))ax.annotate(**annotate_kwargs)sns.despine();

1.3s

Python

#collapse-hidesmooth_days = 4fig, ax = plt.subplots(figsize=(14, 8))df_confirmed['pct_change'] = (df_confirmed                              .groupby('country')                              .cases                              .pct_change()                              .rolling(smooth_days)                              .mean())for countries, ls in zip(country_groups, line_styles):    (df_confirmed.set_index('country')                 .loc[countries]                 .loc[lambda x: x.cases > 100]                 .reset_index()                 .set_index('days_since_100')                 .groupby('country', sort=False)['pct_change']                 .plot(ls=ls)    )ax.set(ylim=(0, 1),       xlim=(0, 20),       title='Are we seeing changes in daily growth rate?',       xlabel='Days from first 100 confirmed cases',       ylabel='Daily percent change (smoothed over {} days)'.format(smooth_days),)ax.axhline(.33, ls='--', color='k')ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())ax.legend(bbox_to_anchor=(1.0, 1.0))sns.despine()ax.annotate(**annotate_kwargs);

1.1s

Python

#collapse-hidesns.set_palette(sns.hls_palette(8, l=.45, s=.8)) # 8 countries maxfig, ax = plt.subplots(figsize=(12, 8))# 28000 ICU beds total, 80% occupiedicu_germany = 28000icu_germany_free = .2df_tmp = df_confirmed.loc[lambda x: (x.country == 'Germany') & (x.cases > 100)].cases_critdf_tmp.plot(ax=ax)x = np.linspace(0, 30, 30)pd.Series(index=pd.date_range(df_tmp.index[0], periods=30),          data=100*p_crit * (1.33) ** x).plot(ax=ax,ls='--', color='k', label='33% daily growth')ax.axhline(icu_germany, color='.3', ls='-.', label='Total ICU beds')ax.axhline(icu_germany * icu_germany_free, color='.5', ls=':', label='Free ICU beds')ax.set(yscale='log',       title='When will Germany run out of ICU beds?',       ylabel='Expected critical cases (assuming {:.0f}% critical)'.format(100 * p_crit),)ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())ax.legend(bbox_to_anchor=(1.0, 1.0))sns.despine()ax.annotate(**annotate_kwargs);

1.1s

Python

About

Updated daily by GitHub Actions.

This visualization was made by Thomas Wiecki [1].

[1]: Data sourced from "2019 Novel Coronavirus COVID-19 (2019-nCoV) Data Repository by Johns Hopkins CSSE" GitHub repository and recreates the (pay-walled) plot in the Financial Times. This code is provided under the BSD-3 License. Link to original notebook.

COVID-19 Growth Analysis

Data

Info

Growth Analysis

About

Runtimes (1)