Module 4 - Tutorial - Data Visualization

Back to the course outline

import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
0.0s
Pokemon.csv
7.99 KBDownload
location = Pokemon.csv
df = pd.read_csv(location, encoding = "ISO-8859-1") #1st column needs encoding to read pound/hashtag(#) symbol
df.head()
0.5s
#rename the pound/hashtag(#) column so it doesn't cause further issues
df.rename(columns={'#': 'Number'}, inplace =True)
0.0s
#validate
df.head()
0.3s
#set indices as Pokemon numbers
df.set_index('Number', inplace=True)
df.head()
0.3s
#make a scatterplot
sns.lmplot(x='Attack', y='Defense', data=df)
0.8s
#add more details to scatterplot
sns.lmplot(x='Attack', y='Defense', data=df, 
           fit_reg=False, #remove regression line
           hue='Stage')   #color by evolution stage
0.7s
#boxplot showing stats for each characteristic
sns.boxplot(data=df)
0.6s
#create a dataframe containing the stats for each Pokemon
#drop Total, Stage, and Legendary
stats_df = df.drop(['Total', 'Stage', 'Legendary'], axis=1)
stats_df.head()
0.3s
#boxplot for stats
sns.boxplot(data=stats_df)
0.5s
#change size of plot
plt.subplots(figsize=(13,7))
#violin plot shows the distribution of attack power for each type
sns.violinplot(x='Type 1', y='Attack', data=df)
0.7s
#create color palette
#color Hex numbers are from Bulbapedia (https://bulbapedia.bulbagarden.net/wiki/Category:Type_color_templates)
pkmn_type_colors = ['#78C850',  # Grass
                    '#F08030',  # Fire
                    '#6890F0',  # Water
                    '#A8B820',  # Bug
                    '#A8A878',  # Normal
                    '#A040A0',  # Poison
                    '#F8D030',  # Electric
                    '#E0C068',  # Ground
                    '#EE99AC',  # Fairy
                    '#C03028',  # Fighting
                    '#F85888',  # Psychic
                    '#B8A038',  # Rock
                    '#705898',  # Ghost
                    '#98D8D8',  # Ice
                    '#7038F8',  # Dragon
                   ]
0.0s
#violin plot with Pokemon color palette
plt.subplots(figsize=(13,7))
sns.violinplot(x='Type 1', y='Attack', data=df, palette=pkmn_type_colors)
0.7s
#swarm plot
plt.subplots(figsize=(10,6))
sns.swarmplot(x='Type 1', y='Attack', data=df, palette=pkmn_type_colors)
0.6s
#combine a violin plot with a swarm plot
plt.figure(figsize=(13,7))
sns.violinplot(x='Type 1',
               y='Attack',
               data=df,
               inner=None, #removes bars inside violin plot
               palette=pkmn_type_colors)
sns.swarmplot(x='Type 1',
              y='Attack',
              data=df,
              color='k', #make points black
              alpha=0.7) #slightly transparent
plt.title('Attack by Type')
0.7s

Visualize all Types by Stat

#create a column that contains all stat types and a column for their corresponding value
melt_df = pd.melt(stats_df, 
                  id_vars=['Name', 'Type 1', 'Type 2'], #column to keep
                  var_name="Stat")
melt_df.head()
0.3s
#melted dataframe has 6 times the amount of rows as original stats dataframe
#6 rows for each pokemon for each stat type
print(stats_df.shape)
print(melt_df.shape)
0.3s
plt.figure(figsize=(10,6))
#swarm plot of Stats from melted dataframe
sns.swarmplot(x='Stat', y='value', 
              data=melt_df,
              hue='Type 1')
2.0s

This swarm plot is a bit messy, let's clean it up.

plt.figure(figsize=(13,7))
sns.swarmplot(x='Stat', y='value',
              data=melt_df,
              hue='Type 1', 
              dodge=True, #separate points on chart by hue
              palette=pkmn_type_colors)
#adjust the y-axis
plt.ylim(0,260)
#put the legend on the right
plt.legend(bbox_to_anchor=(1,1), loc=2)
13.9s
#show correlation of stats via heatmap
corr = stats_df.corr()
sns.heatmap(corr, vmin=-1, annot=True)
0.7s
#create a histogram of the distribution of attack power
sns.distplot(df['Attack'])
0.4s
#create a bar plot of Type 1 frequency
sns.countplot(x='Type 1',
              data=df, 
              palette=pkmn_type_colors)
plt.xticks(rotation=-45)
0.6s
#scatterplot by each stage type (evolution level)
g = sns.factorplot(x='Type 1', y='Attack',
                   data=df,
                   hue='Stage', 
                   col='Stage',
                   kind='swarm')
#rotate x-axis labels on ALL charts
g.set_xticklabels(rotation=-45)
3.2s

Back to the course outline

Source: https://elitedatascience.com/python-seaborn-tutorial

Runtimes (1)