# Interactive Map - Confirmed Cases in the US by State
Interactive Visualizations of The Count and Growth of COVID-19 in the US. # Setup ```python id=31bddf69-0b98-4a56-9d7e-bc4af1b08606 #hide import numpy as np import pandas as pd import altair as alt alt.data_transformers.disable_max_rows() ``` ```python id=3cc75c6e-8ea7-4b4b-b6bf-00c0c4a41845 #hide url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv' confirmed_df = pd.read_csv(url) cols = confirmed_df.columns confirmed_df = confirmed_df.astype({'Province/State': 'str','Country/Region':str}) #confirmed_df.head() ``` ```python id=7940c7cf-ffb9-4b3c-83e4-2254f0e06696 #hide dcols = cols[cols.str.match(pat='\d+\/\d+\/\d+', case=False)] rcols = cols[~cols.isin(dcols)] df = pd.melt(confirmed_df, id_vars=rcols, value_vars=dcols, var_name='date', value_name='confirmed_count') df.columns = df.columns.str.lower().str.replace('/','_') df['date'] = pd.to_datetime(df['date']) df = df.astype({'province_state': 'str','country_region':str}) #clean df.loc[df.province_state.isna(),'province_state'] = '' ``` ```python id=792d9f18-bdbe-4f3d-96bf-b76e1e9a8655 #hide abbr2state = { 'AK': 'Alaska', 'AL': 'Alabama', 'AR': 'Arkansas', 'AS': 'American Samoa', 'AZ': 'Arizona', 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DC': 'District of Columbia', 'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia', 'GU': 'Guam', 'HI': 'Hawaii', 'IA': 'Iowa', 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'MA': 'Massachusetts', 'MD': 'Maryland', 'ME': 'Maine', 'MI': 'Michigan', 'MN': 'Minnesota', 'MO': 'Missouri', 'MP': 'Northern Mariana Islands', 'MS': 'Mississippi', 'MT': 'Montana', 'NA': 'National', 'NC': 'North Carolina', 'ND': 'North Dakota', 'NE': 'Nebraska', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NV': 'Nevada', 'NY': 'New York', 'OH': 'Ohio', 'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'PR': 'Puerto Rico', 'RI': 'Rhode Island', 'SC': 'South Carolina', 'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah', 'VA': 'Virginia', 'VI': 'Virgin Islands', 'VT': 'Vermont', 'WA': 'Washington', 'WI': 'Wisconsin', 'WV': 'West Virginia', 'WY': 'Wyoming' } state2abbr = {s:a for a,s in abbr2state.items()} ``` ```python id=8632d636-5b97-4bb0-af40-7dd276e52529 #hide us_df = df[(df.country_region == 'US')].copy() def clean_state(col): if ',' in col: return col.split(', ', 1)[1] else: return state2abbr.get(col) us_df['state'] = us_df.province_state.apply(clean_state) us_df['state'] = us_df.apply(lambda x: 'DP' if x['province_state'] == 'Diamond Princess' else x['state'], axis=1) us_df.confirmed_count.fillna(0, inplace=True) us_df.rename(columns={'country_region':'country'}, inplace=True) us_daily_df = (us_df .groupby(['state','country','date']) .agg( confirmed_count=('confirmed_count','sum'), lat=('lat','mean'), long=('long','mean')) ) us_daily_df['new_cases'] = us_daily_df.confirmed_count.diff() us_daily_df.loc[us_daily_df.new_cases < 0, 'new_cases'] = 0 #us_daily_df['cs'] = us_daily_df.sort_values('date').groupby(['state','country']).confirmed_count.cumsum() us_daily_df = us_daily_df.reset_index() #us_daily_df.head() #active = use the recent date state_df = us_daily_df.sort_values('date').groupby(['state','country']).tail(1) #state_df.head() #https://github.com/altair-viz/altair/issues/1005#issuecomment-403237407 def to_altair_datetime(dt): return alt.DateTime(year=dt.year, month=dt.month, date=dt.day, hours=dt.hour, minutes=dt.minute, seconds=dt.second, milliseconds=0.001 * dt.microsecond) ``` ```python id=f23411cb-31f5-4a0b-ac03-6acfdee93171 #hide states_data = 'https://vega.github.io/vega-datasets/data/us-10m.json' states = alt.topo_feature(states_data, feature='states') selector = alt.selection_single(empty='none', fields=['state'], nearest=True, init={'state':'CA'}) curr_date = state_df.date.max().date().strftime('%Y-%m-%d') dmax = (us_daily_df.date.max() + pd.DateOffset(days=3)) dmin = us_daily_df.date.min() # US states background background = alt.Chart(states).mark_geoshape( fill='lightgray', stroke='white' ).properties( width=500, height=400 ).project('albersUsa') points = alt.Chart(state_df).mark_circle().encode( longitude='long:Q', latitude='lat:Q', size=alt.Size('confirmed_count:Q', title= 'Number of Confirmed Cases'), color=alt.value('steelblue'), tooltip=['state:N','confirmed_count:Q'] ).properties( title=f'Total Confirmed Cases by State as of {curr_date}' ).add_selection(selector) timeseries = alt.Chart(us_daily_df).mark_bar().properties( width=500, height=350, title="New Cases by Day", ).encode( x=alt.X('date:T', title='Date', timeUnit='yearmonthdate', axis=alt.Axis(format='%y/%m/%d', labelAngle=-30), scale=alt.Scale(domain=[to_altair_datetime(dmin), to_altair_datetime(dmax)])), y=alt.Y('new_cases:Q', axis=alt.Axis(title='# of New Cases',titleColor='steelblue'), ), color=alt.Color('state:O'), tooltip=['state:N','date:T','confirmed_count:Q', 'new_cases:Q'] ).transform_filter( selector ).add_selection(alt.selection_single() ) timeseries_cs = alt.Chart(us_daily_df).mark_line(color='red').properties( width=500, height=350, ).encode( x=alt.X('date:T', title='Date', timeUnit='yearmonthdate', axis=alt.Axis(format='%y/%m/%d', labelAngle=-30), scale=alt.Scale(domain=[to_altair_datetime(dmin), to_altair_datetime(dmax)])), y=alt.Y('confirmed_count:Q', #scale=alt.Scale(type='log'), axis=alt.Axis(title='# of Confirmed Cases', titleColor='red'), ), ).transform_filter( selector ).add_selection(alt.selection_single(nearest=True) ) final_chart = alt.vconcat( background + points, alt.layer(timeseries, timeseries_cs).resolve_scale(y='independent'), ).resolve_scale( color='independent', shape='independent', ).configure( padding={'left':10, 'bottom':40} ).configure_axis( labelFontSize=10, labelPadding=10, titleFontSize=12, ).configure_view( stroke=None ) ``` # Click On State To Filter Chart Below ```python id=1c87f32b-3aea-4ec0-9b19-16155990b1da #hide_input final_chart ``` [result][nextjournal#output#1c87f32b-3aea-4ec0-9b19-16155990b1da#result] NB: Cruise ship, the "Diamond Princess" is represented by state = DP Prepared by [Asif Imran](https://twitter.com/5sigma)\[1\] \[1\]: Source: ["2019 Novel Coronavirus COVID-19 (2019-nCoV) Data Repository by Johns Hopkins CSSE"](https://systems.jhu.edu/research/public-health/ncov/) [GitHub repository](https://github.com/CSSEGISandData/COVID-19). This code is provided under the [BSD-3 License](https://github.com/twiecki/covid19/blob/master/LICENSE). [nextjournal#output#1c87f32b-3aea-4ec0-9b19-16155990b1da#result]: