COVID research
Import packages & data
import pandas as pd
import seaborn as sns
0.0s
Python
Select data source
# select just one resuls_source file
# --> EDIT
# results_source = pd.read_csv(_, index_col="ID odpowiedzi")
# results_source = pd.read_csv(_, index_col="ID odpowiedzi")
# results_source = pd.read_csv(_, index_col="ID odpowiedzi")
results_source = pd.read_csv(results_institutions.csv, index_col="ID odpowiedzi")
# results_source = pd.read_csv(_, index_col="ID odpowiedzi")
# don't edit
def filter_city(results, city='All'):
if city in ('Katowice', 'Lublin', 'Wrocław'):
mask = results.columns[results.columns.str.startswith('W jakim mieście')][0]
results = results[results[mask]==city]
return results
elif city is 'All':
return results
else:
raise ValueError('Value needs to be one of: "All / Katowice / Lublin / Wrocław"')
def aggr_in_cols(df, first_col, last_col, normalize):
result = df.iloc[:, first_col:last_col + 1].apply(pd.Series.value_counts, normalize=normalize)
if normalize:
result = result.applymap(lambda x: round(x * 100, 2))
return result
0.4s
Python
# CHECK COLUMN NUMBER
for idx,r in enumerate(results_source):
print(idx,r)
0.4s
Python
# --> FILTER VALUES FOR ALL OR ONE CITY
city = 'All' # e.g. city = 'Katowice'
# don't edit
results = filter_city(results_source, city)
# --> SELECT VARIABLES TO COMPARE (CHECK THE COL NUM ABOVE)
col1 = 202
col2 = 0
# don't edit
crosst = pd.crosstab(results[results.columns[col1]],
results[results.columns[col2]],
margins=True,
normalize='columns', # or: 'columns'
)
crosst = crosst.applymap(lambda x: round(x * 100, 2))
crosst
0.2s
Python
# Generate visualisation
viz = sns.heatmap(crosst, cmap="YlGnBu", annot=True, cbar=False)
viz.set_ylabel('col1')
viz.set_xlabel('col2')
viz.plot()
0.5s
Python
# % for values in multiple columns
# insert col numbers here
start = 1
end = 5
percent = True
aggr_in_cols(results, start, end, percent)
0.2s
Python