NAME : SANDBHOR SHREYAS SUNIL Div : B Batch : C ROLLNO : 40
from google.colab import files
uploaded = files.upload()
Choose Files Toyota.csv
Toyota.csv(text/csv) - 60530 bytes, last modified: 3/28/2023 - 100% done
Saving Toyota.csv to Toyota.csv
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib
inline import
numpy as np
cars_data = pd.read_csv('Toyota.csv')
cars_data.head(5)
Price Age KM FuelType HP MetColor Automatic CC Doors Weight
0 13500 23 46986 Diesel 90 1 0 2000 3 1165
1 13750 23 72937 Diesel 90 1 0 2000 3 1165
2 13950 24 41711 Diesel 90 1 0 2000 3 1165
3 14950 26 48000 Diesel 90 0 0 2000 3 1165
4 13750 30 38500 Diesel 90 0 0 2000 3 1170
Next steps: Generate code with cars_data
View recommended plots
cars_data.shape
(1436, 10)
cars_data.info()
<class
'pandas.core.frame.DataFrame'>
RangeIndex: 1436 entries, 0 to
1435
Data columns (total 10 columns):
# Column Non-Null Dtype
Count
0 Price 1436 non- int64
null
1 Age 1436 non- int64
null
2 KM 1436 non- int64
null
3 FuelType 1436 non- object
null
4 HP 1436 non- int64
null
5 MetColor 1436 non- int64
null
6 Automatic 1436 non- int64
null
7 CC 1436 non- int64
null
8 Doors 1436 non- int64
null
9 Weight 1436 non- int64
null
dtypes: int64(9),
object(1) memory usage:
112.3+ KB
cars_data.isnull().values.any()
False
cars_data.shape
(1436, 10)
cars_data.dropna(axis=0, inplace=True)
cars_data.shape
(1436, 10)
plt.scatter(cars_data['Age'],cars_data['Price'],
c='green') plt.title('Scatter plot of Price vs Age of
the cars')
plt.xlabel('Age(months)'
)
plt.ylabel('Price(Euros)
') plt.show()
plt.hist(cars_data['KM'])
(array([122., 331., 417., 301., 140., 64., 30., 17., 12.,
2.]), array([1.000000e+00, 2.430090e+04, 4.860080e+04,
7.290070e+04,
9.720060e+04, 1.215005e+05, 1.458004e+05, 1.701003e+05,
1.944002e+05, 2.187001e+05, 2.430000e+05]),
<BarContainer object of 10 artists>)
plt.hist(cars_data['KM'], color = 'red', edgecolor='white',
bins= 5) plt.title('Histogram of Kilometer')
plt.xlabel('Kilometer
')
plt.ylabel('Frequency
') plt.show()
cars_data['FuelType'].value_count
s() FuelType
Petrol 1264
Diesel 155
CNG 17
Name: count, dtype: int64
cars_data['Doors'].value_counts()
Door
s 67
5 4
3 62
2
4 13
8
2 2
Name: count, dtype: int64
counts=[674,622,138,
2] Doors=(5,3,4,2)
index=np.arange(len(Doors))
plt.bar(index, counts, color= ['red','blue','cyan','red'])
<BarContainer object of 4 artists>
counts= [979, 120, 12] # counts of each
category fuelType = ('Petrol',
'Diesel','CNG')
index = np.arange(len(fuelType))
index
array([0, 1, 2])
plt.bar(index, counts, color= ['red','blue','cyan'])# index= fuel type, count = height of the bars
plt.title('Bar plot of fuel
types') plt.xlabel('Fuel Types')
plt.ylabel('Frequency')
plt.xticks(index,fuelType, rotation = 90)# index= set the loaction of xticks , fuelType= Set the label
of yticks plt.show()
cars_data['Doors'].nuniqu
e() 4
import seaborn as sns
sns.set(style='darkgrid')
sns.regplot(x=cars_data['Age'],y=cars_data['Price'],fit_reg=True)
<Axes: xlabel='Age', ylabel='Price'>
sns.regplot(x=cars_data['Age'],y=cars_data['Price'],fit_reg=False)
<Axes: xlabel='Age', ylabel='Price'>
sns.regplot(x=cars_data['Age'],y=cars_data['Price'],marker='*',fit_reg=False)
<Axes: xlabel='Age', ylabel='Price'>
sns.lmplot(x='Age',y='Price',data=cars_data,fit_reg=True,hue='FuelType',legend=True,palette="Set2")
<seaborn.axisgrid.FacetGrid at 0x7ace3d82f850>
sns.distplot(cars_data['Age'],kde=False)
<ipython-input-29-0f8bc2d269a0>:1: UserWarning:
`distplot` is a deprecated function and will be removed in seaborn v0.14.0.
Please adapt your code to use either `displot` (a figure-level
function with similar flexibility) or `histplot` (an axes-level
function for histograms).
For a guide to updating your code to use the new functions,
please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
sns.distplot(cars_data['Age'],kde=False)
<Axes: xlabel='Age'>
sns.distplot(cars_data['Age'])
<ipython-input-30-67ef1d320a1e>:1: UserWarning:
`distplot` is a deprecated function and will be removed in seaborn v0.14.0.
Please adapt your code to use either `displot` (a figure-level
function with similar flexibility) or `histplot` (an axes-level
function for histograms).
For a guide to updating your code to use the new functions,
please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
sns.distplot(cars_data['Age'])
<Axes: xlabel='Age', ylabel='Density'>
sns.distplot(cars_data['Age'],kde=False,bins=5)
<ipython-input-31-8d160a8673c0>:1: UserWarning:
`distplot` is a deprecated function and will be removed in seaborn v0.14.0.
Please adapt your code to use either `displot` (a figure-level
function with similar flexibility) or `histplot` (an axes-level
function for histograms).
For a guide to updating your code to use the new functions,
please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
sns.distplot(cars_data['Age'],kde=False,bins=5)
<Axes: xlabel='Age'>
sns.countplot(x='FuelType',data=cars_data)
<Axes: xlabel='FuelType', ylabel='count'>
sns.countplot(x='FuelType',data=cars_data,hue="Automatic")
pd.crosstab(index=cars_data['Automatic'],columns=cars_data['FuelType'],dropna=True)
FuelType CNG Diesel Petrol
Automatic
0 16 155 1185
1 1 0 79