Avi Drucker / May 27 2024 / Published
Module 4 - Tutorial - Data Visualization
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
0.0s
Pokemon.csv
7.99 KBDownloadlocation = Pokemon.csv
df = pd.read_csv(location, encoding = "ISO-8859-1") #1st column needs encoding to read pound/hashtag(#) symbol
df.head()
0.5s
#rename the pound/hashtag(#) column so it doesn't cause further issues
df.rename(columns={'#': 'Number'}, inplace =True)
0.0s
#validate
df.head()
0.3s
#set indices as Pokemon numbers
df.set_index('Number', inplace=True)
df.head()
0.3s
#make a scatterplot
sns.lmplot(x='Attack', y='Defense', data=df)
0.8s
#add more details to scatterplot
sns.lmplot(x='Attack', y='Defense', data=df,
fit_reg=False, #remove regression line
hue='Stage') #color by evolution stage
0.7s
#boxplot showing stats for each characteristic
sns.boxplot(data=df)
0.6s
#create a dataframe containing the stats for each Pokemon
#drop Total, Stage, and Legendary
stats_df = df.drop(['Total', 'Stage', 'Legendary'], axis=1)
stats_df.head()
0.3s
#boxplot for stats
sns.boxplot(data=stats_df)
0.5s
#change size of plot
plt.subplots(figsize=(13,7))
#violin plot shows the distribution of attack power for each type
sns.violinplot(x='Type 1', y='Attack', data=df)
0.7s
#create color palette
#color Hex numbers are from Bulbapedia (https://bulbapedia.bulbagarden.net/wiki/Category:Type_color_templates)
pkmn_type_colors = ['#78C850', # Grass
'#F08030', # Fire
'#6890F0', # Water
'#A8B820', # Bug
'#A8A878', # Normal
'#A040A0', # Poison
'#F8D030', # Electric
'#E0C068', # Ground
'#EE99AC', # Fairy
'#C03028', # Fighting
'#F85888', # Psychic
'#B8A038', # Rock
'#705898', # Ghost
'#98D8D8', # Ice
'#7038F8', # Dragon
]
0.0s
#violin plot with Pokemon color palette
plt.subplots(figsize=(13,7))
sns.violinplot(x='Type 1', y='Attack', data=df, palette=pkmn_type_colors)
0.7s
#swarm plot
plt.subplots(figsize=(10,6))
sns.swarmplot(x='Type 1', y='Attack', data=df, palette=pkmn_type_colors)
0.6s
#combine a violin plot with a swarm plot
plt.figure(figsize=(13,7))
sns.violinplot(x='Type 1',
y='Attack',
data=df,
inner=None, #removes bars inside violin plot
palette=pkmn_type_colors)
sns.swarmplot(x='Type 1',
y='Attack',
data=df,
color='k', #make points black
alpha=0.7) #slightly transparent
plt.title('Attack by Type')
0.7s
Visualize all Types by Stat
#create a column that contains all stat types and a column for their corresponding value
melt_df = pd.melt(stats_df,
id_vars=['Name', 'Type 1', 'Type 2'], #column to keep
var_name="Stat")
melt_df.head()
0.3s
#melted dataframe has 6 times the amount of rows as original stats dataframe
#6 rows for each pokemon for each stat type
print(stats_df.shape)
print(melt_df.shape)
0.3s
plt.figure(figsize=(10,6))
#swarm plot of Stats from melted dataframe
sns.swarmplot(x='Stat', y='value',
data=melt_df,
hue='Type 1')
2.0s
This swarm plot is a bit messy, let's clean it up.
plt.figure(figsize=(13,7))
sns.swarmplot(x='Stat', y='value',
data=melt_df,
hue='Type 1',
dodge=True, #separate points on chart by hue
palette=pkmn_type_colors)
#adjust the y-axis
plt.ylim(0,260)
#put the legend on the right
plt.legend(bbox_to_anchor=(1,1), loc=2)
13.9s
#show correlation of stats via heatmap
corr = stats_df.corr()
sns.heatmap(corr, vmin=-1, annot=True)
0.7s
#create a histogram of the distribution of attack power
sns.distplot(df['Attack'])
0.4s
#create a bar plot of Type 1 frequency
sns.countplot(x='Type 1',
data=df,
palette=pkmn_type_colors)
plt.xticks(rotation=-45)
0.6s
#scatterplot by each stage type (evolution level)
g = sns.factorplot(x='Type 1', y='Attack',
data=df,
hue='Stage',
col='Stage',
kind='swarm')
#rotate x-axis labels on ALL charts
g.set_xticklabels(rotation=-45)
3.2s
Source: https://elitedatascience.com/python-seaborn-tutorial