Graphs
Matplotlib is a low level graph plotting library in
python that serves as a visualization utility.
Scatter Plot
import matplotlib.pyplot as plt
x = [5,7,8,7,2,17,2,9,4,11,12,9,6]
y
= [99,86,87,88,111,86,103,87,94,78,77,85,86
]
plt.scatter(x, y)
plt.show()
Compare
import matplotlib.pyplot as plt
import numpy as np
#day one, the age and speed of 13 cars:
x =
np.array([5,7,8,7,2,17,2,9,4,11,12,9,6])
y =
np.array([99,86,87,88,111,86,103,87,94,78,7
7,85,86])
plt.scatter(x, y)
#day two, the age and speed of 15 cars:
x =
np.array([2,2,8,1,15,8,12,9,7,3,11,4,7,14,1
2])
y =
np.array([100,105,84,105,90,99,90,95,94,100
,79,112,91,80,85])
plt.scatter(x, y)
Colors
import matplotlib.pyplot as plt
import numpy as np
x =
np.array([5,7,8,7,2,17,2,9,4,11,12,9,6])
y =
np.array([99,86,87,88,111,86,103,87,94,78,7
7,85,86])
plt.scatter(x, y, color = 'hotpink')
x =
np.array([2,2,8,1,15,8,12,9,7,3,11,4,7,14,1
2])
y =
np.array([100,105,84,105,90,99,90,95,94,100
,79,112,91,80,85])
plt.scatter(x, y, color = '#88c999')
plt.show()
Size
import matplotlib.pyplot as plt
import numpy as np
x =
np.array([5,7,8,7,2,17,2,9,4,11,12,9,6])
y =
np.array([99,86,87,88,111,86,103,87,94,78,7
7,85,86])
sizes
= np.array([20,50,100,200,500,1000,60,90,10
,300,600,800,75])
plt.scatter(x, y, s=sizes)
plt.show()
Alpha
You can adjust the transparency of the dots with the alpha argument.
import matplotlib.pyplot as plt
import numpy as np
x =
np.array([5,7,8,7,2,17,2,9,4,11,12,9,6])
y =
np.array([99,86,87,88,111,86,103,87,94,78,7
7,85,86])
sizes
= np.array([20,50,100,200,500,1000,60,90,10
,300,600,800,75])
plt.scatter(x, y, s=sizes, alpha=0.5)
plt.show()
Histogram
A histogram is a graph showing frequency distributions.
It is a graph showing the number of observations within each given interval.
import matplotlib.pyplot as plt
import numpy as np
x = np.random.normal(170, 10, 250)
plt.hist(x)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
np.random.seed(42)
# For reproducibility
data = np.random.randn(1000)
# 1000 random values from a normal distribution
# Create histogram
plt.hist(data, bins=30, edgecolor='black', alpha=0.7)
# Add labels and title
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram Example')
# Show plot
plt.show()
Bar chart
import matplotlib.pyplot as plt
import numpy as np
x = np.array(["A", "B", "C", "D"])
y = np.array([3, 8, 1, 10])
plt.bar(x,y)
plt.show()
x = ["APPLES", "BANANAS"]
y = [400, 350]
plt.bar(x, y)
Horizontal Bars
import matplotlib.pyplot as plt
import numpy as np
x = np.array(["A", "B", "C", "D"])
y = np.array([3, 8, 1, 10])
plt.barh(x, y)
plt.show()
Bar Color
import matplotlib.pyplot as plt
import numpy as np
x = np.array(["A", "B", "C", "D"])
y = np.array([3, 8, 1, 10])
plt.bar(x, y, color = "red")
plt.show()
Color Names Supported by All Browsers
All modern browsers support the following 140
color names
Bar Width
import matplotlib.pyplot as plt
import numpy as np
x = np.array(["A", "B", "C", "D"])
y = np.array([3, 8, 1, 10])
plt.bar(x, y, width = 0.1)
plt.show()
The default width value is 0.8
Bar Height
import matplotlib.pyplot as plt
import numpy as np
x = np.array(["A", "B", "C", "D"])
y = np.array([3, 8, 1, 10])
plt.barh(x, y, height = 0.1)
plt.show()
The default height value is 0.8
• Histograms visualize quantitative data or numerical
data,
whereas bar charts display categorical variables.
Creating Pie Charts
import matplotlib.pyplot as plt
import numpy as np
y = np.array([35, 25, 25, 15])
plt.pie(y)
plt.show()
Labels
import matplotlib.pyplot as plt
import numpy as np
y = np.array([35, 25, 25, 15])
mylabels =
["Apples", "Bananas", "Cherries", "Dates"]
plt.pie(y, labels = mylabels)
plt.show()
Explode
import matplotlib.pyplot as plt
import numpy as np
y = np.array([35, 25, 25, 15])
mylabels =
["Apples", "Bananas", "Cherries", "Dates"]
myexplode = [0.2, 0, 0, 0]
plt.pie(y, labels = mylabels, explode =
myexplode)
plt.show()
Shadow
import matplotlib.pyplot as plt
import numpy as np
y = np.array([35, 25, 25, 15])
mylabels =
["Apples", "Bananas", "Cherries", "Dates"]
myexplode = [0.2, 0, 0, 0]
plt.pie(y, labels = mylabels, explode =
myexplode, shadow = True)
plt.show()
Legend
import matplotlib.pyplot as plt
import numpy as np
y = np.array([35, 25, 25, 15])
mylabels =
["Apples", "Bananas", "Cherries", "Dates"]
plt.pie(y, labels = mylabels)
plt.legend()
plt.show()
Legend With Header
import matplotlib.pyplot as plt
import numpy as np
y = np.array([35, 25, 25, 15])
mylabels =
["Apples", "Bananas", "Cherries", "Dates"]
plt.pie(y, labels = mylabels)
plt.legend(title = "Four Fruits:")
plt.show()
Line
import numpy as np
import matplotlib.pyplot as plt
x =
np.array([80, 85, 90, 95, 100, 105, 110, 11
5, 120, 125])
y =
np.array([240, 250, 260, 270, 280, 290, 300
, 310, 320, 330])
plt.title("Sports Watch Data")
plt.xlabel("Average Pulse")
plt.ylabel("Calorie Burnage")
plt.plot(x, y)
plt.grid(axis = 'x')
plt.show()
Linestyle
import matplotlib.pyplot as plt
import numpy as np
ypoints = np.array([3, 8, 1, 10])
plt.plot(ypoints, linestyle = 'dotted')
plt.show()
plt.plot(ypoints, linestyle = 'dashed')
plt.plot(ypoints, ls = ':')
Plotting Without Line
import matplotlib.pyplot as plt
import numpy as np
xpoints = np.array([1, 8])
ypoints = np.array([3, 10])
plt.plot(xpoints, ypoints, 'o')
plt.show()
import matplotlib.pyplot as plt
import numpy as np
xpoints = np.array([1, 2, 6, 8])
ypoints = np.array([3, 8, 1, 10])
plt.plot(xpoints, ypoints)
plt.show()
• A heat map is a two-dimensional representation of data
in which various values are represented by colors. A
simple heat map provides an immediate visual
summary of information across two axes, allowing users
to quickly grasp the most important or relevant data
points.
• a heatmap is a graphical representation of data where
values are depicted using colors. The data is typically
arranged in a grid or matrix format, with each cell
assigned a color based on its value.
Basic Heatmap
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
# generating 2-D 10x10 matrix of random numbers
# from 1 to 100
data = np.random.randint(low = 1, high = 100, size = (10, 10))
print("The data to be plotted:\n")
print(data)
# plotting the heatmap
hm = sn.heatmap(data = data)
# displaying the plotted heatmap
plt.show()
Anchoring the colormap
If we set the vmin value to 30 and the vmax value to 70, then only the cells with values
between 30 and 70 will be displayed. This is called anchoring the colormap.
# importing the modules
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
# generating 2-D 10x10 matrix of random numbers
# from 1 to 100
data = np.random.randint(low=1,
high=100,
size=(10, 10))
# setting the parameter values
vmin = 30
vmax = 70
# plotting the heatmap
hm = sn.heatmap(data=data,
vmin=vmin,
vmax=vmax)
# displaying the plotted heatmap
Choosing the colormap
we’ll be using tab20.
# importing the modules
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
# generating 2-D 10x10 matrix of random numbers
# from 1 to 100
data = np.random.randint(low=1,
high=100,
size=(10, 10))
# setting the parameter values
cmap = "tab20"
# plotting the heatmap
hm = sn.heatmap(data=data,
cmap=cmap)
# displaying the plotted heatmap
plt.show()
Displaying the cell values
# generating 2-D 10x10 matrix of random numbers
# from 1 to 100
data = np.random.randint(low=1,
high=100,
size=(10, 10))
# setting the parameter values
annot = True
# plotting the heatmap
hm = sn.heatmap(data=data,
annot=annot)
# displaying the plotted heatmap
plt.show()
Crime rate in
city
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Generate synthetic crime rate data (10x10 city grid)
np.random.seed(42)
city_size = (10, 10) # Grid representing city blocks
crime_data = np.random.poisson(lam=5, size=city_size) # Poisson distribution for
crime occurrences
# Create the heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(crime_data, cmap="Reds", annot=True, fmt="d", linewidths=0.5,
cbar=True)
# Labels and title
plt.title("Crime Rate Heatmap of a City")
plt.xlabel("City Blocks (X-axis)")
plt.ylabel("City Blocks (Y-axis)")
# Show the plot
plt.show()
Temperature variation
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Generate synthetic temperature data (10x10 grid representing a region)
np.random.seed(42)
region_size = (10, 10) # Grid representing different parts of the region
temperature_data = np.random.uniform(low=15, high=40, size=region_size) #
Temperatures in °C
# Create the heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(temperature_data, cmap="coolwarm", annot=True, fmt=".1f",
linewidths=0.5, cbar=True)
# Labels and title
plt.title("Temperature Variation Heatmap Across a Region")
plt.xlabel("Region Grid (X-axis)")
plt.ylabel("Region Grid (Y-axis)")
# Show the plot
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
# Load the CSV file
file_path = "Salaries.csv" # Ensure the correct path to your file
df = pd.read_csv(file_path)
# Scatter plot of Years Since PhD vs. Salary
plt.figure(figsize=(8, 6))
plt.scatter(df["yrs.since.phd"], df["salary"], alpha=0.5, color='b')
plt.xlabel("Years Since PhD")
plt.ylabel("Salary")
plt.title("Scatter Plot of Salary vs. Years Since PhD")
plt.grid(True)
plt.show()