In [1]: import numpy as np
import pandas as pd
In [2]: csv_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
df = pd.read_csv(csv_url, header = None)
col_names = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width','Species']
df = pd.read_csv(csv_url, names = col_names)
In [3]: df.head()
Out[3]: Sepal_Length Sepal_Width Petal_Length Petal_Width Species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
In [4]: column = len(list(df))
In [5]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Sepal_Length 150 non-null float64
1 Sepal_Width 150 non-null float64
2 Petal_Length 150 non-null float64
3 Petal_Width 150 non-null float64
4 Species 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
In [6]: np.unique(df['Species'])
array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)
Out[6]:
In [7]: import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
In [8]: fig, axes = plt.subplots(2, 2, figsize=(16, 8))
axes[0,0].set_title("Distribution of First Column")
axes[0,0].hist(df["Sepal_Length"]);
axes[0,1].set_title("Distribution of Second Column")
axes[0,1].hist(df["Sepal_Width"]);
axes[1,0].set_title("Distribution of Third Column")
axes[1,0].hist(df["Petal_Length"]);
axes[1,1].set_title("Distribution of Fourth Column")
axes[1,1].hist(df["Petal_Width"]);
In [10]: data_to_plot = [df["Sepal_Length"],df["Sepal_Width"],df["Petal_Length"],df["Petal_Width"]]
sns.set_style("whitegrid")
# Creating a figure instance
fig = plt.figure(1, figsize=(12,8))
# Creating an axes instance
ax = fig.add_subplot(111)
# Creating the boxplot
bp = ax.boxplot(data_to_plot);
In [ ]: