COVID research

Import packages & data

import pandas as pd
import seaborn as sns
0.0s
Python
results_all_joined.csv
260.70 KB
results_institutions.csv
151.66 KB
results_jdg_freelance.csv
155.59 KB
results_business.csv
64.54 KB
results_ngo.csv
133.17 KB

Select data source

# select just one resuls_source file
# --> EDIT
# results_source = pd.read_csv(_, index_col="ID odpowiedzi")
# results_source = pd.read_csv(_, index_col="ID odpowiedzi")
# results_source = pd.read_csv(_, index_col="ID odpowiedzi")
results_source = pd.read_csv(
results_institutions.csv
, index_col="ID odpowiedzi")
# results_source = pd.read_csv(_, index_col="ID odpowiedzi")
# don't edit
def filter_city(results, city='All'):
    if city in ('Katowice', 'Lublin', 'Wrocław'):
        mask = results.columns[results.columns.str.startswith('W jakim mieście')][0]
        results = results[results[mask]==city]
        return results
    elif city is 'All':
        return results
    else:
        raise ValueError('Value needs to be one of: "All / Katowice / Lublin / Wrocław"')
        
def aggr_in_cols(df, first_col, last_col, normalize):
    result = df.iloc[:, first_col:last_col + 1].apply(pd.Series.value_counts, normalize=normalize)
    if normalize:
        result = result.applymap(lambda x: round(x * 100, 2))
    return result
0.4s
Python
# CHECK COLUMN NUMBER
for idx,r in enumerate(results_source):
    print(idx,r)
0.4s
Python
# --> FILTER VALUES FOR ALL OR ONE CITY
city = 'All' # e.g. city = 'Katowice'
# don't edit
results = filter_city(results_source, city)
# --> SELECT VARIABLES TO COMPARE (CHECK THE COL NUM ABOVE)
col1 = 202
col2 = 0
# don't edit
crosst = pd.crosstab(results[results.columns[col1]],
            results[results.columns[col2]],
            margins=True,
            normalize='columns',  # or: 'columns'
           )
crosst = crosst.applymap(lambda x: round(x * 100, 2))
crosst
0.2s
Python
# Generate visualisation
viz = sns.heatmap(crosst, cmap="YlGnBu", annot=True, cbar=False)
viz.set_ylabel('col1')
viz.set_xlabel('col2')
viz.plot()
0.5s
Python
# % for values in multiple columns
# insert col numbers here
start = 1
end = 5
percent = True
aggr_in_cols(results, start, end, percent)
0.2s
Python
Runtimes (1)