COVID research
Import packages & data
import pandas as pdimport seaborn as sns0.0s
Python
Select data source
# select just one resuls_source file# --> EDIT# results_source = pd.read_csv(_, index_col="ID odpowiedzi")# results_source = pd.read_csv(_, index_col="ID odpowiedzi")# results_source = pd.read_csv(_, index_col="ID odpowiedzi")results_source = pd.read_csv(results_institutions.csv, index_col="ID odpowiedzi")# results_source = pd.read_csv(_, index_col="ID odpowiedzi")# don't editdef filter_city(results, city='All'): if city in ('Katowice', 'Lublin', 'Wrocław'): mask = results.columns[results.columns.str.startswith('W jakim mieście')][0] results = results[results[mask]==city] return results elif city is 'All': return results else: raise ValueError('Value needs to be one of: "All / Katowice / Lublin / Wrocław"') def aggr_in_cols(df, first_col, last_col, normalize): result = df.iloc[:, first_col:last_col + 1].apply(pd.Series.value_counts, normalize=normalize) if normalize: result = result.applymap(lambda x: round(x * 100, 2)) return result0.4s
Python
# CHECK COLUMN NUMBERfor idx,r in enumerate(results_source): print(idx,r)0.4s
Python
# --> FILTER VALUES FOR ALL OR ONE CITYcity = 'All' # e.g. city = 'Katowice'# don't editresults = filter_city(results_source, city)# --> SELECT VARIABLES TO COMPARE (CHECK THE COL NUM ABOVE)col1 = 202col2 = 0# don't editcrosst = pd.crosstab(results[results.columns[col1]], results[results.columns[col2]], margins=True, normalize='columns', # or: 'columns' )crosst = crosst.applymap(lambda x: round(x * 100, 2))crosst0.2s
Python
# Generate visualisationviz = sns.heatmap(crosst, cmap="YlGnBu", annot=True, cbar=False)viz.set_ylabel('col1')viz.set_xlabel('col2')viz.plot()0.5s
Python
# % for values in multiple columns# insert col numbers herestart = 1end = 5percent = Trueaggr_in_cols(results, start, end, percent)0.2s
Python