An Introduction to seaborn
Dr. Atif Tahir, Dr. Abdul Aziz and Dr. Nuaman Durrani
Introduction
• Seaborn is a complement to matplotlib and not a replacement for it
• It is built on top of matplotlib
• Seaborn comes with a number of customized themes and a high-level
interface for controlling the look of matplotlib figures
Cont…
• Functions that visualize matrices of data and use clustering algorithms
to discover structure in those matrices
• A function to plot statistical time series data with flexible estimation
and representation of uncertainty around the estimate
• High-level abstractions for structuring grids of plots that let you easily
build complex visualizations
• For reference and more details, please
visit: https://seaborn.pydata.org/introduction.html
Commands to Play with Seaborn
1- Show seaborn Plots:
plt.show() to display seaborn plots
2- Use seaborn With Matplotlib Defaults:
set(), set_style(), set_context(), and set_palette()
3- Use seaborn’s colors as a colormap in matplotlib:
color_palette() with n_colors as the number of colors with the argument
4- Scale Seaborn Plots For Other Contexts:
set_context() to control the plot elements.
The four predefined contexts are “paper”, “notebook”, “talk” and
“poster”.
5- Set The Figure Size in Seaborn:
plt.subplots()
6- Add A Title:
set_title()
Loading A Built-in Seaborn
Data Set
import seaborn as sns
import matplotlib.pyplot as plt
# Load iris data
iris = sns.load_dataset("iris")
# Construct iris plot
sns.swarmplot(x="species", y="petal_length", data=iris)
# Show plot
plt.show()
Loading Your Data from Pandas DataFrame
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# Load in data
tips = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-
data/master/tips.csv")
# Create violinplot
sns.violinplot(x = "total_bill", data=tips)
# Show the plot
plt.show()
1- Regression Plot
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.lmplot(x='total_bill', y='tip', data=tips)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')sns.
lmplot(x='total_bill', y='tip', hue='sex', data=tips, palette = 'bright')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')sns.
lmplot(x='total_bill', y='tip', hue='sex', data=tips, col = 'sex')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()# Load the example tips dataset
iris = sns.load_dataset("iris")
# Plot tip as a function of toal bill across days
g = sns.lmplot(x="sepal_length", y="sepal_width",
hue="species",truncate=True, size=5, data=iris)
# Use more informative axis labels than are provided by default
g.set_axis_labels("Sepal length (mm)", "Sepal width (mm)")
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="ticks")
# Load the example dataset for Anscombe's quartet
df = sns.load_dataset("anscombe")
# Show the results of a linear regression within each dataset
sns.lmplot(x="x", y="y", col="dataset", hue="dataset", data=df,col_wrap=2,
ci=None, palette="muted", size=4,scatter_kws={"s": 50, "alpha": 1})
plt.show()
2- Distribution Plot
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.distplot(tips['total_bill'], kde=False, bins=30)
plt.show()
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", palette="muted", color_codes=True)
rs = np.random.RandomState(10)
# Set up the matplotlib figure
f, axes = plt.subplots(2, 2, figsize=(7, 7), sharex=True)
sns.despine(left=True)
# Generate a random univariate dataset
d = rs.normal(size=100)
# Plot a simple histogram with binsize determined automatically
sns.distplot(d, kde=False, color="b", ax=axes[0, 0])
# Plot a kernel density estimate and rug plot
sns.distplot(d, hist=False, rug=True, color="r", ax=axes[0, 1])
# Plot a filled kernel density estimate
sns.distplot(d, hist=False, color="g", kde_kws={"shade": True}, ax=axes[1, 0])
# Plot a historgram and kernel density estimate
sns.distplot(d, color="m", ax=axes[1, 1])
plt.setp(axes, yticks=[])
plt.tight_layout()
plt.show()
3- Joint Plot
• We can combine different types of plots together to create a join plot
such as scatter, hex, regression…etc.
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.jointplot(x= 'total_bill', y = 'tip', data=tips, kind='scatter‘)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.jointplot(x= 'total_bill', y = 'tip', data=tips, kind='regression')
plt.show()
4- Pair Plot
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.pairplot(tips)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.pairplot(tips, hue='sex', palette='coolwarm')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="ticks")
df = sns.load_dataset("iris")
sns.pairplot(df, hue="species")
plt.show()
5- Bar Plot
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.barplot(x = 'sex', y='total_bill', data=tips)
plt.show()
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")
# Load the example planets dataset
planets = sns.load_dataset("planets")
# Make a range of years to show categories with no observations
years = np.arange(2000, 2015)
# Draw a count plot to show the number of planets discovered each year
g = sns.factorplot(x="year", data=planets,
kind="count",palette="BuPu", size=6, aspect=1.5, order=years)
g.set_xticklabels(step=2)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")
# Initialize the matplotlib
figuref, ax = plt.subplots(figsize=(6, 15))
# Load the example car crash dataset
crashes = sns.load_dataset("car_crashes").sort_values("total", ascending=False)
# Plot the total crashes
sns.set_color_codes("pastel")
sns.barplot(x="total", y="abbrev", data=crashes,label="Total", color="b")
# Plot the crashes where alcohol was involved
sns.set_color_codes("muted")
sns.barplot(x="alcohol", y="abbrev", data=crashes,label="Alcohol-involved", color="b")
# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, 24), ylabel="",xlabel="Automobile collisions per billion miles")
sns.despine(left=True, bottom=True)
plt.show()
6- Count Plot
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.countplot(x = 'sex', data = tips)
plt.show()
7- Box Plot
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.boxplot(x = 'day', y='total_bill', data=tips, palette='rainbow')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.boxplot(x = 'day', y='total_bill', hue='smoker', data=tips,
palette='rainbow')
plt.show()
8- Violin Plot
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
sns.violinplot(x = 'day', y='total_bill', data=tips, palette='rainbow')
plt.show()
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
# Create a random dataset across several variables
rs = np.random.RandomState(0)
n, p = 40, 8
d = rs.normal(0, 2, (n, p))
d += np.log(np.arange(1, p + 1)) * -5 + 10
# Use cubehelix to get a custom sequential palette
pal = sns.cubehelix_palette(p, rot=-.5, dark=.3)
# Show each distribution with both violins and points
sns.violinplot(data=d, palette=pal, inner="points")
plt.show()
9- Heat Plot
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
tips.corr()
sns.heatmap(tips.corr(), cmap='coolwarm', annot=True)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
flights = sns.load_dataset('flights')
pvflights = flights.pivot_table(values='passengers', index='month',
columns='year')
sns.heatmap(pvflights,
linecolor='white', linewidths=1)
plt.show()