ML Programs
ML Programs
ipynb - Colaboratory
import numpy as np
a=[1,2,3] #creating a list, list is mutable
print(a)
a.append(4)
print(a)
b=(3,4) #creating a tuple, tuple is immutable, i.e.,
c.add(11)
print(c)
Output :
[1, 2, 3]
[1, 2, 3, 4]
{1, 3, 5, 7, 11}
{1: 'study', 2: 'play', 3: 'sleep'}
dict_keys([1, 2, 3])
dict_values(['study', 'play', 'sleep'])
#Creating Arrays
import numpy as np
a=np.array([1,2,3])
b=np.array([(1,2,3),(4,5,6)])
print(np.zeros(3))
print(np.ones((3,4)))
print(np.eye(5))
print(np.full((2,3),8))
print(np.random.random(5))
print(np.random.rand(2,3))
print(np.random.randint(1,10))
print(np.arange(0,10,1))
Output :
[0. 0. 0.]
[[1. 1. 1. 1.]
[1. 1. 1. 1.]
[1. 1. 1. 1.]]
[[1. 0. 0. 0. 0.]
[0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0.]
[0. 0. 0. 1. 0.]
[0. 0. 0. 0. 1.]]
[[8 8 8]
[8 8 8]]
[0 1 2 3 4 5 6 7 8 9]
#Inspecting Properties
import numpy as np
data1=np.array([[1,2,3],[4,5,6],[7,8,9]])
print(data1)
print(np.size(data1)) #Retuns total number of element
s in the array
print(np.ndim(data1))#Returns number of dimensions of
array
print(np.shape(data1)) #Returns tuple of integers rep
resenting
#the size of the array in each dimension
data2=np.array([9,7,1,2])
print(data2.dtype)
Output :
[[1 2 3]
[4 5 6]
[7 8 9]]
9
2
(3, 3)
int64
#Copying/Sorting/Reshaping
import numpy as np
a=np.array([1,2,3,4])
b=np.array([[1,2,3],[4,5,6],[7,8,9]])
s=np.copy(a) #Copies array to new memory
print(s)
print(b.flatten()) #Flattens 2D array to 1D array
print(b.reshape(9,1))
print(np.resize(b,(2,2)))
Output :
[1 2 3 4]
[1 2 3 4 5 6 7 8 9]
[[1]
[2]
[3]
[4]
[5]
[6]
[7]
[8]
[9]]
[[1 2]
[3 4]]
#Adding/Removing Elements
b1=np.array([1,2,3,4,5])
print(np.append(b1,3)) #appends values to end of the
array.
print(np.insert(b1,3,6)) #Inserts value into the arra
y before index 3.
b2=np.array([[4,-2,1],[1,-3,0],[2,0,-1]])
print(b2)
b3=np.insert(b2,1,2,axis=1)#inserts a column of all 2
's at index 1 of the array
print(b3)
print(np.delete(b2,1,axis=0)) #Deletes row at index 1
of the array
print(np.delete(b2,0,axis=1)) #Deletes column at inde
x 0 the array
Output :
[1 2 3 4 5 3]
[1 2 3 6 4 5]
[[ 4 -2 1]
[ 1 -3 0]
[ 2 0 -1]]
[[ 4 2 -2 1]
[ 1 2 -3 0]
[ 2 2 0 -1]]
[[ 4 -2 1]
[ 2 0 -1]]
[[-2 1]
[-3 0]
[ 0 -1]]
#Combining/Splitting
import numpy as np
a1=np.array([[1,2,3],[3,4,5],[6,7,8]])
print(a1)
b1=np.array([[5,6,7],[7,8,9],[1,2,3]])
print(b1)
c1=np.concatenate((a1,b1),axis=0)
d1=np.concatenate((a1,b1),axis=1)
print(c1)
print(d1)
print(np.hsplit(a1,1))
print(np.vsplit(a1,1))
Output :
[[1 2 3]
[3 4 5]
[6 7 8]]
[[5 6 7]
[7 8 9]
[1 2 3]]
[[1 2 3]
[3 4 5]
[6 7 8]
[5 6 7]
[7 8 9]
[1 2 3]]
[[1 2 3 5 6 7]
[3 4 5 7 8 9]
[6 7 8 1 2 3]]
[array([[1, 2, 3],
[3, 4, 5],
[6, 7, 8]])]
[array([[1, 2, 3],
[3, 4, 5],
[6, 7, 8]])]
#Indexing/Slicing/Subsetting
import numpy as np
a=np.array([1,2,3,4,5,6,7])
a[3]=0 #Assigns the array element on index 3 the valu
e of 0
print(a[2:5]) #Returns the elements at indices 2,3,4,
5
b=np.array([[1,2,3],[4,5,6],[7,8,9]])
b[1,2]=-12 #Assigning the value -
12 to element at index [1][2]
print(b)
print(b[1,:])
print(b[:,2])
print(b[0:2])
print(b[:,1:2])
print(b[:,[1,2]]) #selecting multiple columns at a ti
me
print(b[[0,2],:]) #selecting multiple rows at a time
print(b<5) #Returns array with boolean values
print(b[b<5]) #Returns array elements smaller than 5
print(b.T) #Returns transpose of the array
Output :
[3 0 5]
[[ 1 2 3]
[ 4 5 -12]
[ 7 8 9]]
[ 4 5 -12]
[ 3 -12 9]
[[ 1 2 3]
[ 4 5 -12]]
[[2]
[5]
[8]]
[[ 2 3]
[ 5 -12]
[ 8 9]]
[[1 2 3]
[7 8 9]]
[[ True True True]
[ True False True]
[False False False]]
[ 1 2 3 4 -12]
[[ 1 4 7]
[ 2 5 8]
[ 3 -12 9]]
#Scalar Math
data1=np.array([3,1,2,-4,5])
print(data1)
# Performs scalar arithmetic on the array
print((np.add(data1,1)),(np.subtract(data1,2)),
(np.multiply(data1,-1)))
Output :
[ 3 1 2 -4 5]
[ 2 0 1 -5 4] [ 1 -1 0 -6 3] [-3 -1 -2 4 -5]
#Vector Math
a1=np.array([2.7,3.1,-4.3,-5.8])
a2=np.array([1,0,9,7])
print((np.add(a1,a2)),(np.subtract(a1,a2)),
(np.multiply(a1,a2)))
print(np.array_equal(a1,a2))
print(np.log(a1)) #Natural log of each element in the
array
print(np.abs(a1)) #Absolute value of each element in
the array
print(np.ceil(a1)) #Rounds up to the nearest int
a3=[1.7,2.1,3.6,5.3,6.2,9.5]
print(np.floor(a3)) #Rounds down to the nearest int
print(np.round(a3)) #Rounds to the nearest integer
Output :
[3.7 3.1 4.7 1.2] [ 1.7 3.1 -13.3 -12.8] [ 2.7 0. -38.7
-40.6]
False
[0.99325177 1.13140211 nan nan]
[2.7 3.1 4.3 5.8]
[ 3. 4. -4. -5.]
[1. 2. 3. 5. 6. 9.]
[ 2. 2. 4. 5. 6. 10.]
#Statistics
a1=np.array([1,2,3,7,8]) #creates a numpy array
print(np.min(a1),np.max(a1),np.sum(a1))
#Returns mean, variance and standard deviation of the array.
print(np.mean(a1),np.var(a1),np.std(a1))
a2=np.array([[1,2,3],[4,5,6],[7,8,9]])
print(np.var(a2,axis=1)) #Returns variance of the array.
print(np.corrcoef(a2[1:],a2[2:])) #Returns correlation coeffic
ient of the array
Output :
1 8 21
4.2 7.760000000000001 2.785677655436824
[0.66666667 0.66666667 0.66666667]
[[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]]
2.pandas_lib.ipynb – Colaboratory
import pandas as pd
import io
df=pd.read_csv(io.BytesIO(uploaded['enjoysport.csv'])
)
print(df)
"Salary":[18000,90000,20000,50000,40000,100000],
"Married":[False,True,True,False,True,True]}
#Converts Dictionary into a DataFrame with specified
index.
data2=pd.DataFrame(data2,index=['Ram','Krishna','Sita
','Prasad','Gayatri','Shankar'])
print(data2,type(data2))
Output :
x 1
y 7
z 2
dtype: int64 <class 'pandas.core.series.Series'>
7
Age Height(in ft) Qualification Salary Married
Ram 25 5.6 B.Tech 18000 False
Krishna 45 6.1 B.Tech 90000 True
Sita 22 4.9 M.Phil 20000 True
Prasad 36 5.7 Ph.D 50000 False
Gayatri 29 5.1 B.Sc 40000 True
Shankar 60 5.9 CA 100000 True
<class 'pandas.core.frame.DataFrame'>
CodeText
Output :
<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, Ram to Shankar
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Age 6 non-null int64
1 Height(in ft) 6 non-null float64
2 Qualification 6 non-null object
3 Salary 6 non-null int64
4 Married 6 non-null bool
dtypes: bool(1), float64(1), int64(2), object(1)
memory usage: 246.0+ bytes
Age Height(in ft) Salary
Output :
Index(['Age', 'Height(in ft)', 'Qualification', 'Salary',
'Married'], dtype='object')
Index(['Ram', 'Krishna', 'Sita', 'Prasad', 'Gayatri', 'Shankar'],
dtype='object')
[[25 5.6 'B.Tech' 18000 False]
[45 6.1 'B.Tech' 90000 True]
[22 4.9 'M.Phil' 20000 True]
[36 5.7 'Ph.D' 50000 False]
[29 5.1 'B.Sc' 40000 True]
[60 5.9 'CA' 100000 True]]
Age Height(in ft) Qualification Salary Married
Ram 25 5.6 B.Tech 18000 False
Krishna 45 6.1 B.Tech 90000 True
Sita 22 4.9 M.Phil 20000 True
Prasad 36 5.7 Ph.D 50000 False
Gayatri 29 5.1 B.Sc 40000 True
Age Height(in ft) Qualification Salary Married
Ram 25 5.6 B.Tech 18000 False
Krishna 45 6.1 B.Tech 90000 True
Output :
Ram 18000
Krishna 90000
Sita 20000
Prasad 50000
Gayatri 40000
Shankar 100000
Name: Salary, dtype: int64
Age Height(in ft) Qualification Salary Married
Krishna 45 6.1 B.Tech 90000 True
Sita 22 4.9 M.Phil 20000 True
Prasad 36 5.7 Ph.D 50000 False
Gayatri 29 5.1 B.Sc 40000 True
Age Height(in ft) Qualification Salary Married
Ram 25 5.6 B.Tech 18000 False
Krishna 45 6.1 B.Tech 90000 True
Age Height(in ft) Qualification Salary Married
Prasad 36 5.7 Ph.D 50000 False
Gayatri 29 5.1 B.Sc 40000 True
Shankar 60 5.9 CA 100000 True
Krishna B.Tech
Sita M.Phil
Name: Qualification, dtype: object
Height(in ft) Qualification
Ram 5.6 B.Tech
Krishna 6.1 B.Tech
Height(in ft) Qualification
Ram 5.6 B.Tech
Krishna 6.1 B.Tech
Ram True
Krishna False
Sita True
Prasad True
Gayatri True
Shankar False
Name: Age, dtype: bool
Age Height(in ft) Qualification Salary Married
Ram 25 5.6 B.Tech 18000 False
Sita 22 4.9 M.Phil 20000 True
Prasad 36 5.7 Ph.D 50000 False
Gayatri 29 5.1 B.Sc 40000 True
Output :
Height(in ft) Qualification Salary Married
Ram 5.6 B.Tech 18000 False
Krishna 6.1 B.Tech 90000 True
Sita 4.9 M.Phil 20000 True
Prasad 5.7 Ph.D 50000 False
Gayatri 5.1 B.Sc 40000 True
Shankar 5.9 CA 100000 True
Age Height(in ft) Qualification Salary Married
Ram 25 5.6 B.Tech 18000 False
Krishna 45 6.1 B.Tech 90000 True
Prasad 36 5.7 Ph.D 50000 False
Gayatri 29 5.1 B.Sc 40000 True
Shankar 60 5.9 CA 100000 True
Output :
Age Height(in ft) Qualification Salary Married Address
Ram 25 5.6 B.Tech 18000 False Kkd
Krishna 45 6.1 B.Tech 90000 True Rjy
Sita 22 4.9 M.Phil 20000 True Bpt
Prasad 36 5.7 Ph.D 50000 False Slo
Gayatri 29 5.1 B.Sc 40000 True Ong
Shankar 60 5.9 CA 100000 True Bza
Age Height(in ft) Qualification Salary Married Address
Ram 25 5.6 B.Tech 18000 False Kkd
Krishna 45 6.1 B.Tech 90000 True Rjy
Sita 22 4.9 M.Phil 20000 True Bpt
Prasad 36 5.7 Ph.D 50000 False Slo
Gayatri 29 5.1 B.Sc 40000 True Ong
Shankar 60 5.9 CA 100000 True Bza
6 18 5.2 MCA 10000 False vskp
Output :
Age Height(in ft) Qualification Salary Married Address
Sita 22 4.9 M.Phil 20000 True Bpt
6 18 5.2 MCA 10000 False vskp
Krishna 45 6.1 B.Tech 90000 True Rjy
Ram 25 5.6 B.Tech 18000 False Kkd
Gayatri 29 5.1 B.Sc 40000 True Ong
Shankar 60 5.9 CA 100000 True Bza
Prasad 36 5.7 Ph.D 50000 False Slo
Age Height(in ft) Qualification Salary Married Address
6 18 5.2 MCA 10000 False vskp
Ram 25 5.6 B.Tech 18000 False Kkd
Sita 22 4.9 M.Phil 20000 True Bpt
Gayatri 29 5.1 B.Sc 40000 True Ong
Prasad 36 5.7 Ph.D 50000 False Slo
Krishna 45 6.1 B.Tech 90000 True Rjy
Shankar 60 5.9 CA 100000 True Bza
Age Height(in ft) Salary Married Address
Qualification
B.Sc 1 1 1 1 1
B.Tech 2 2 2 2 2
CA 1 1 1 1 1
M.Phil 1 1 1 1 1
MCA 1 1 1 1 1
Ph.D 1 1 1 1 1
3.matplot_lib.ipynb – Colaboratory
import numpy as np
import matplotlib.pyplot as plt
x=np.linspace(0,10,100)
y=x*x
plt.figure(figsize=(4,2))
plt.plot(x,y)
plt.title('Square function')
plt.xlabel("x")
plt.ylabel("$x^2$")
plt.figure(figsize=(5,5))
plt.plot(x,np.sin(x))
plt.title('sin(x)')
plt.xlabel("x")
plt.ylabel("sin(x)")
plt.figure(figsize=(6,3))
plt.plot(x,np.tan(x))
plt.title('Tangent function')
plt.xlabel("x")
plt.ylabel("tan(x)")
plt.figure(figsize=(3,3))
plt.plot(x,np.exp(x))
plt.title('Exponential function')
plt.xlabel("x")
plt.ylabel("e^x")
Output :
Output :
[7.2 9.6 5.1 1. 4.3 2.8 9.4 7.4 3.7 8.7]
[1.36 1.39 2.55 5.56 9.02 6.75 6.84 7.4 9.88 7.3 ]
Text(0, 0.5, 'y')
#Bar Plot
items=np.array(['Coke','Pepsi','Fanta','Maaza','Mirin
da'])
qty=np.array([100,85,20,30,45])
plt.bar(items,qty)
plt.title('Sales')
plt.xlabel('Beverages')
plt.ylabel('Qty Sold')
Output :
Text(0, 0.5, 'Qty Sold')
#Pie Plot
plt.pie(qty,labels=items,autopct='%0.1f')#autopct is
used to la
#wedge with their numerical value.
plt.title("% of Sales")
Output :
Text(0.5, 1.0, '% of Sales')
#Histogram
import numpy as np
from matplotlib import pyplot as plt
marks=np.random.randint(0,100,60)
grade_intervals=[0,30,50,80,100]
#print(marks)
plt.hist(marks,grade_intervals)
plt.title('Student Grades')
plt.xlabel('Percentage')
plt.ylabel('No.of Students')
Output :
Text(0, 0.5, 'No.of Students')
#Box Plot
math_marks=np.random.randint(10,100,180)
phy_marks=np.random.randint(0,100,180)
chem_marks=np.random.randint(30,100,180)
marks=[math_marks,phy_marks,chem_marks]
plt.boxplot(marks,labels=['Maths','Physics','Chemistr
y'])
Output :
import pandas as pd
import numpy as np
import io
df=pd.read_csv(io.BytesIO(uploaded['walkinghyp.csv'])
)
print(df)
def train(c,t):
for i, val in enumerate(t):
if val == "Yes":
specific_hypothesis= c[i].copy()
break
for i, val in enumerate(c):
if t[i] == "Yes":
for x in range(len(specific_hypothesis)):
if val[x] != specific_hypothesis[x]:
specific_hypothesis[x] ='?'
return specific_hypothesis
print("The final hypothesis is:",train(d,target))
Output :
Saving walkinghyp.csv to walkinghyp.csv
Time Weather Temperature Company Humidity Wind Goes
0 Morning Sunny Warm Yes Mild Strong Yes
1 Evening Rainy Cold No Mild Normal No
2 Morning Sunny Moderate Yes Normal Normal Yes
3 Evening Sunny Cold Yes High Strong Yes
The final hypothesis is: ['?' 'Sunny' '?' 'Yes' '?' '?']
5.Candidate_Elimination.ipynb – Colaboratory
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import io
df=pd.read_csv(io.BytesIO(uploaded['MBA Salary.csv'])
)
# print(df)
x=df.iloc[:,-2]
x=x.values
x=x.reshape(-1,1)
#print(x)
y=df.iloc[:,-1]
y=y.values
y=y.reshape(-1,1)
#print(y)
plt.scatter(x,y)
Output :
<matplotlib.collections.PathCollection at 0x7f0a25da7490>
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import io
data=pd.read_csv(io.BytesIO(uploaded['Iris.csv']))
print(data)
x=data.values[:,1:5]
y=data.values[:,-1]
y=y.reshape(-1,1)
#print(y)
#print(x)
x_train,x_test,y_train,y_test=train_test_split(x,y,te
st_size=0.3)
#Perform training with GiniIndex
clf_gini=DecisionTreeClassifier(criterion='gini',rand
om_state=100,max_depth=3)
clf_gini.fit(x_train,y_train)
#Perform training with Entropy
clf_entropy=DecisionTreeClassifier(criterion='entropy
',random_state=100,max_depth=3)
clf_entropy.fit(x_train,y_train)
y_pred=clf_entropy.predict(x_test)
print("Confusion Matrix:",confusion_matrix(y_test, y_
pred))
print ("Accuracy :",accuracy_score(y_test,y_pred)*100
)
Output :
Species
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files
uploaded=files.upload()
data=pd.read_csv('Iris.csv')
data.head()
#Encoding the categorical column
data=data.replace({"Species": {"Iris-setosa":1,"Iris-
versicolor":2,"Iris-virginica":3}})
#Visualize the new dataset
data.head()
#plt.figure(1)
sns.heatmap(data.corr())
plt.title('Correlation On iris Classes')
x = data.iloc[:,:-1]
y = data.iloc[:, -1].values
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x, y
, test_size = 0.25, random_state = 0)
#Create the SVM model
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
#Fit the model for the data
classifier.fit(x_train, y_train)
#Make the prediction
y_pred = classifier.predict(x_test)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
from sklearn.metrics import accuracy_score
print ("Accuracy:",accuracy_score(y_test,y_pred)*100)
Output :
[[13 0 0]
[ 0 16 0]
[ 0 0 9]]
Accuracy : 100.0
10. K-MEANS CLUSTERING
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
import io
dataset = pd.read_csv(io.BytesIO(uploaded['Iris.csv']))
print(dataset)
Output :
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
\
0 1 5.1 3.5 1.4 0.2
1 2 4.9 3.0 1.4 0.2
2 3 4.7 3.2 1.3 0.2
3 4 4.6 3.1 1.5 0.2
4 5 5.0 3.6 1.4 0.2
.. ... ... ... ... ...
145 146 6.7 3.0 5.2 2.3
146 147 6.3 2.5 5.0 1.9
147 148 6.5 3.0 5.2 2.0
148 149 6.2 3.4 5.4 2.3
149 150 5.9 3.0 5.1 1.8
Species
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica