3/14/25, 4:36 PM L-3 (Data Frame Part 2).
ipynb - Colab
keyboard_arrow_down Python Data Frames Part 2
# import libraries
import numpy as np
import pandas as pd
#Create a Dictionary of series
d1 = {'Name':pd.Series(['Tina','Jatin','Ritu','Vinish','Sita','Ritu','Sita','Ritu',
'Darvid','Gurpreet','Beena','Adesh']),
'Age':pd.Series([25,26,25,23,31,29,30,34,40,30,51,46]),
'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])
}
#Create a DataFrame
df = pd.DataFrame(d1)
print(df)
print(df.describe(include=['object']))
Name Age Rating
0 Tina 25 4.23
1 Jatin 26 3.24
2 Ritu 25 3.98
3 Vinish 23 2.56
4 Sita 31 3.20
5 Ritu 29 4.60
6 Sita 30 3.80
7 Ritu 34 3.78
8 Darvid 40 2.98
9 Gurpreet 30 4.80
10 Beena 51 4.10
11 Adesh 46 3.65
Name
count 12
unique 9
top Ritu
freq 3
print(df.describe(include=['number']))
Age Rating
count 12.000000 12.000000
mean 32.500000 3.743333
std 8.816307 0.661628
min 23.000000 2.560000
25% 25.750000 3.230000
50% 30.000000 3.790000
75% 35.500000 4.132500
max 51.000000 4.800000
print(df.describe(include='all'))
Name Age Rating
count 12 12.000000 12.000000
unique 9 NaN NaN
top Ritu NaN NaN
freq 3 NaN NaN
mean NaN 32.500000 3.743333
std NaN 8.816307 0.661628
min NaN 23.000000 2.560000
25% NaN 25.750000 3.230000
50% NaN 30.000000 3.790000
75% NaN 35.500000 4.132500
max NaN 51.000000 4.800000
#Pandas Dataframe.pop() : Deletes a COLUMN from the dataframe
print(marks)
marks.pop("Maths")
print("\n Changed Frame\n")
print(marks)
RollNo Name Eco Maths
0 1 Arnab 18 57
1 2 Kritika 23 45
2 3 Divyam 51 37
3 4 Vivaan 40 60
4 5 Aaaroosh 18 27
Changed Frame
https://colab.research.google.com/drive/1r50IfA2defohJdwiqmLmTdqZ_JIP0wf2#printMode=true 1/5
3/14/25, 4:36 PM L-3 (Data Frame Part 2).ipynb - Colab
RollNo Name Eco
0 1 Arnab 18
1 2 Kritika 23
2 3 Divyam 51
3 4 Vivaan 40
4 5 Aaaroosh 18
# Dataframe.drop() - Does not change the dataframe unless inplace = True
# A list of index labels is passed and the rows corresponding to those labels are dropped using drop()
marks.drop([1,4], inplace = True)
print(marks)
# Dataframe.drop() for dropping a column
marks.drop("Eco", axis=1)
print(marks)
RollNo Name Eco
0 1 Arnab 18
2 3 Divyam 51
3 4 Vivaan 40
marks.drop("Eco", axis=1, inplace = True)
print(marks)
RollNo Name
0 1 Arnab
2 3 Divyam
3 4 Vivaan
# Extracting rows using Pandas .loc[] .iloc[]
# Create a sample student dataset consisting of 5 columns – age, section, city, gender, and favorite color.
# This dataset will contain both numerical as well as categorical variables:
# create a sample dataframe
data = pd.DataFrame({
'age' : [ 10, 22, 13, 21, 12, 11, 17],
'section' : [ 'A', 'B', 'C', 'B', 'B', 'A', 'A'],
'city' : [ 'Gurgaon', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai'],
'gender' : [ 'M', 'F', 'F', 'M', 'M', 'M', 'F'],
'favourite_color' : [ 'red', np.NAN, 'yellow', np.NAN, 'black', 'green', 'red'] })
print(data)
age section city gender favourite_color
0 10 A Gurgaon M red
1 22 B Delhi F NaN
2 13 C Mumbai F yellow
3 21 B Delhi M NaN
4 12 B Mumbai M black
5 11 A Delhi M green
6 17 A Mumbai F red
print(data.loc[1:3]) # Observe that the entire range of row labels is displayed
age section city gender favourite_color
1 22 B Delhi F NaN
2 13 C Mumbai F yellow
3 21 B Delhi M NaN
print(data.loc[[1,4,5]]) # list of row labels
age section city gender favourite_color
1 22 B Delhi F NaN
4 12 B Mumbai M black
5 11 A Delhi M green
# select all rows with a condition in a column
print("\nData with age greater than 15\n")
print(data.loc[data.age >= 15])
Data with age greater than 15
age section city gender favourite_color
1 22 B Delhi F NaN
3 21 B Delhi M NaN
6 17 A Mumbai F red
https://colab.research.google.com/drive/1r50IfA2defohJdwiqmLmTdqZ_JIP0wf2#printMode=true 2/5
3/14/25, 4:36 PM L-3 (Data Frame Part 2).ipynb - Colab
# select rows with multiple conditions
print(data.loc[(data.age >= 12) & (data.gender == 'M')])
age section city gender favourite_color
3 21 B Delhi M NaN
4 12 B Mumbai M black
# select few columns with a condition
data.loc[(data.age >= 12), ['city', 'gender']]
city gender
1 Delhi F
2 Mumbai F
3 Delhi M
4 Mumbai M
6 Mumbai F
# Update the values of a particular column on selected rows
print(data)
data.loc[(data.age >= 12), ['section']] = 'M'
print(data)
age section city gender favourite_color
0 10 A Gurgaon M red
1 22 B Delhi F NaN
2 13 C Mumbai F yellow
3 21 B Delhi M NaN
4 12 B Mumbai M black
5 11 A Delhi M green
6 17 A Mumbai F red
age section city gender favourite_color
0 10 A Gurgaon M red
1 22 M Delhi F NaN
2 13 M Mumbai F yellow
3 21 M Delhi M NaN
4 12 M Mumbai M black
5 11 A Delhi M green
6 17 M Mumbai F red
# update multiple columns with condition
data.loc[(data.age >= 20), ['section', 'city']] = ['S','Pune']
print(data)
age section city gender favourite_color
0 10 A Gurgaon M red
1 22 S Pune F NaN
2 13 M Mumbai F yellow
3 21 S Pune M NaN
4 12 M Mumbai M black
5 11 A Delhi M green
6 17 M Mumbai F red
data.index=['a','b','c','d','e','f','g']
print(data)
age section city gender favourite_color
a 10 A Gurgaon M red
b 22 S Pune F NaN
c 13 M Mumbai F yellow
d 21 S Pune M NaN
e 12 M Mumbai M black
f 11 A Delhi M green
g 17 M Mumbai F red
data.loc[0:2] # error as labels are 'a' to 'g'
https://colab.research.google.com/drive/1r50IfA2defohJdwiqmLmTdqZ_JIP0wf2#printMode=true 3/5
3/14/25, 4:36 PM L-3 (Data Frame Part 2).ipynb - Colab
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-46-b83772436b9b> in <cell line: 1>()
----> 1 data.loc[0:2] # error as labels are 'a' to 'g'
6 frames
/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/base.py in _maybe_cast_slice_bound(self, label, side, kind)
6621 # reject them, if index does not contain label
6622 if (is_float(label) or is_integer(label)) and label not in self:
-> 6623 raise self._invalid_indexer("slice", label)
6624
6625 return label
TypeError: cannot do slice indexing on Index with these indexers [0] of type int
#iloc()
# select rows with indexes
data.iloc[[0,2]]
age section city gender favourite_color
a 10 A Gurgaon M red
c 13 M Mumbai F yellow
# select rows with particular indexes and particular columns
data.iloc[[0,2],[1,3]]
section gender
a A M
c M F
# select a range of rows
data.iloc[1:3]
age section city gender favourite_color
b 22 S Pune F NaN
c 13 M Mumbai F yellow
# select a range of rows and columns
data.iloc[1:3,2:4]
city gender
b Pune F
c Mumbai F
# changing index labels and column labels with rename()
print(marks)
marks.rename(index={0:'R1', 1:'R2',2:'R3',3:'R4',4:'R5'} , inplace = True)
print(marks)
RollNo Name
0 1 Arnab
2 3 Divyam
3 4 Vivaan
RollNo Name
R1 1 Arnab
R3 3 Divyam
R4 4 Vivaan
marks.rename(columns={'RollNo':'Roll Num', 'Name':'Student Name','Eco':'Economics','Maths':'Mathematics'} ,
inplace = True)
print(marks)
Roll Num Student Name
R1 1 Arnab
R3 3 Divyam
R4 4 Vivaan
https://colab.research.google.com/drive/1r50IfA2defohJdwiqmLmTdqZ_JIP0wf2#printMode=true 4/5
3/14/25, 4:36 PM L-3 (Data Frame Part 2).ipynb - Colab
https://colab.research.google.com/drive/1r50IfA2defohJdwiqmLmTdqZ_JIP0wf2#printMode=true 5/5