# Before using any methods of pandas, you have to import the library
import pandas as pd
Orders = {
"Orderno" : [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,0,21,17],
"CustomerName" :
["Rk","Mike","Ben","Veronica","Maria","Lata","Judiath","Blake","George
","Duke","Prish",
"Ivan",None,"Nancy","Sarvesh","Jay","Jayant","Margaret","Jay"],
"OrderAmount" : [2400,1432,173,258,3402,7143,143422,1734,2143,12,-
23473,17343,593,432,943,999,1843,0,999],
"OrderQty" :
[24,32,17,2,4,143,None,172,432,21,2,17,13,12,8,12,6,3,12],
"OrderStatus" :
["Open","Closed","InProgress","Cancelled",None,"Open","Closed","InProg
ress","Cancelled",
"Open","Closed","InProgress","Cancelled",None,"Open","Closed","InProgr
ess","Cancelled","Closed"],
}
ordersdf = pd.DataFrame(Orders)
print(ordersdf)
Orderno CustomerName OrderAmount OrderQty OrderStatus
0 1 Rk 2400 24.0 Open
1 2 Mike 1432 32.0 Closed
2 3 Ben 173 17.0 InProgress
3 4 Veronica 258 2.0 Cancelled
4 5 Maria 3402 4.0 None
5 6 Lata 7143 143.0 Open
6 7 Judiath 143422 NaN Closed
7 8 Blake 1734 172.0 InProgress
8 9 George 2143 432.0 Cancelled
9 10 Duke 12 21.0 Open
10 11 Prish -23473 2.0 Closed
11 12 Ivan 17343 17.0 InProgress
12 13 None 593 13.0 Cancelled
13 14 Nancy 432 12.0 None
14 15 Sarvesh 943 8.0 Open
15 17 Jay 999 12.0 Closed
16 0 Jayant 1843 6.0 InProgress
17 21 Margaret 0 3.0 Cancelled
18 17 Jay 999 12.0 Closed
# Lets try to read this data from a csv file.
eordersdf = pd.read_csv("eOrders.csv", encoding='cp1252')
print(eordersdf)
Order ID Customer Name Customer Segment Product
Category \
0 88522 Bonnie Potter Corporate Office Supplies
1 90193 Ronnie Proctor Home Office Furniture
2 90192 Marcus Dunlap Home Office Furniture
3 86838 Gwendolyn F Tyson Small Business Furniture
4 86838 Gwendolyn F Tyson Small Business Office Supplies
5 86838 Gwendolyn F Tyson Small Business Office Supplies
6 86838 Gwendolyn F Tyson Small Business Office Supplies
7 86837 Timothy Reese Small Business Office Supplies
8 86839 Timothy Reese Small Business Office Supplies
9 86836 Sarah Ramsey Small Business Office Supplies
10 86836 Sarah Ramsey Small Business Technology
11 90031 Laurie Hanna Small Business Furniture
12 90032 Jim Rodgers Small Business Technology
13 41793 Tony Wilkins Winters Small Business Furniture
14 42949 Tony Wilkins Winters Small Business Office Supplies
15 42949 Tony Wilkins Winters Small Business Technology
16 87651 Edna Thomas Corporate Office Supplies
17 87651 Edna Thomas Corporate Office Supplies
18 87652 Guy Gallagher Corporate Furniture
19 89199 Matthew Berman Corporate Office Supplies
20 89200 Matthew Berman Corporate Technology
21 89202 Matthew Berman Corporate Technology
22 89203 Matthew Berman Corporate Technology
23 89201 Ricky Hensley Corporate Office Supplies
24 89201 Ricky Hensley Corporate Office Supplies
25 91454 Theodore Moran Consumer Technology
26 88426 Lorraine Kelly Corporate Technology
Product Name Postal Code \
0 SANFORD Liquid Accent™ Tank-Style Highlighters 98221
1 Global Troy™ Executive Leather Low-Back Tilter 91776
2 DAX Two-Tone Rosewood/Black Document Frame, De... 7203
3 Howard Miller 12-3/4 Diameter Accuwave DS ™ Wa... 55372
4 Newell 321 55372
5 Newell 351 55372
6 OIC Colored Binder Clips, Assorted Sizes 55372
7 Grip Seal Envelopes 11787
8 Tyvek ® Top-Opening Peel & Seel ® Envelopes, Gray 11787
9 Staples Gold Paper Clips 13210
10 StarTAC 7797 13210
11 DAX Natural Wood-Tone Poster Frame 59601
12 Hewlett Packard 6S Scientific Calculator 59801
13 DAX Natural Wood-Tone Poster Frame 10012
14 Advantus Plastic Paper Clips 10012
15 Hewlett Packard 6S Scientific Calculator 10012
16 Xerox 194 92677
17 Newell 323 92677
18 Eldon Image Series Black Desk Accessories 90712
19 Hunt Boston® Vacuum Mount KS Pencil Sharpener 97526
20 Canon P1-DHIII Palm Printing Calculator 97526
21 StarTAC 3000 97526
22 TimeportP7382 97526
23 Storex DuraTech Recycled Plastic Frosted Binders 97030
24 Newell 338 97030
25 AT&T 2230 Dual Handset Phone With Caller ID/Ca... 98052
26 6160 98373
Order Date Profit Quantity ordered Sales
0 07-01-2015 4.56000 4.0 13.01
1 13-06-2015 4390.36650 12.0 6362.85
2 15-02-2015 -53.80960 22.0 211.15
3 12-05-2015 803.47050 16.0 1164.45
4 12-05-2015 -24.03000 7.0 22.23
5 12-05-2015 -37.03000 4.0 13.99
6 12-05-2015 -0.71000 4.0 14.26
7 8042015 -59.82000 7.0 33.47
8 28-05-2015 261.87570 10.0 379.53
9 12-02-2015 2.63000 6.0 18.80
10 12-02-2015 652.73310 NaN 945.99
11 15-05-2015 314.48130 17.0 455.77
12 21-05-2015 -114.63990 18.0 231.79
13 15-05-2015 384.38000 70.0 1876.69
14 21-05-2015 -17.49000 58.0 293.06
15 21-05-2015 -114.63990 71.0 914.29
16 28-01-2015 -28.29680 1.0 67.49
17 28-01-2015 -5.30720 1.0 2.25
18 02-05-2015 8.89400 12.0 54.78
19 22-02-2015 144.69000 13.0 424.68
20 27-03-2015 -35.87880 2.0 40.17
21 20-01-2015 209.99700 8.0 783.55
22 12-03-2015 3568.09600 22.0 3838.14
23 15-06-2015 -84.43760 13.0 58.68
24 15-06-2015 24.31200 18.0 53.10
25 10-05-2015 25.91382 6.0 647.07
26 10-03-2015 162.66600 6.0 627.04
# when you need to work on a group. Like Order Status
grpordersdf= ordersdf.groupby(['OrderStatus'])
print(grpordersdf)
grpordersdf["OrderAmount"].mean()
<pandas.core.groupby.generic.DataFrameGroupBy object at
0x000002096B973730>
OrderStatus
Cancelled 748.50
Closed 24675.80
InProgress 5273.25
Open 2624.50
Name: OrderAmount, dtype: float64
# by default the group keys are sorted during the groupby operation.
# You may want to pass sort=False for potential speedup:
ordersdf.groupby(['OrderStatus'],sort=False)[["OrderAmount"]].mean()
# Sorting Data
# We can sort the data by a value in the column.
# By default the sorting will occur in ascending order and a new data
frame is return.
# Create a new data frame from the original sorted by the column
Customer Name
df_sorted = ordersdf.sort_values( by ='CustomerName')
df_sorted
#We can sort the data using 2 or more columns:
df_sorted = ordersdf.sort_values( by
=['CustomerName','OrderStatus'],ascending=[True,False])
df_sorted
# Lets look at attributes of dataframes we use generally
print("Print head of the df")
ordersdf.head(10)
print("Print Shape of the df")
ordersdf.shape
print("Print info of the df")
ordersdf.info()
print("Print describe of the df")
ordersdf.describe()
ordersdf.dtypes
# Lets look at apply()
# this allows us to apply a particular custom function to all the
elements of data frame.
def OrdVolume(oCount):
if oCount >= 150:
ordVol = "High"
elif oCount >= 30 and oCount < 150:
ordVol = "Medium"
else:
ordVol = "Low"
return ordVol
ordersdf["OrderQuantum"] = ordersdf["OrderAmount"].apply(OrdVolume)
print(ordersdf)
import pandas as pd
# concat - You need to have matching columns. If no matching found,
then it might introduce NaN
df1 = pd.DataFrame({'A': [1, 2], 'M': [3, 4]})
df2 = pd.DataFrame({'X': [5, 6], 'K': [4, 8]})
horizontal_concat = pd.concat([df1, df2], axis=0) # row base
concat
print("Horizontal:")
display(horizontal_concat)
vertical_concat = pd.concat([df1, df2], axis=1) # column based concat
print("Vertical:")
display(vertical_concat)
# Merging two pandas DataFrames on their index is necessary when
working with datasets
# that share the same row identifiers but have different columns.
# Join()
# import pandas module
import pandas as pd
# create student dataframe
data1 = pd.DataFrame({'id': [1, 2, 3, 4],
'name': ['manoj', 'manoja', 'manoji', 'manij']},
index=['one', 'two', 'three', 'four'])
# create marks dataframe
data2 = pd.DataFrame({'s_id': [1, 2, 3, 6, 7],
'marks': [98, 90, 78, 86, 78]},
index=['one', 'two', 'three', 'siz', 'seven'])
# join two dataframes
print(data1.join(data2))
import pandas as pd
# Here merge is used to join the two dataframes,
# how - indicates what type of merge,
# how = "inner" means joining on the column in "on clause" matching
values
# on - indicates on which column
data1 = pd.DataFrame({'Sr.no': ['1', '2', '3', '4', '5'],
'Name': ['Rashmi', 'Arun', 'John',
'Kshitu', 'Bresha'],
'Roll No': ['1', '2', '3', '4', '5']})
data2 = pd.DataFrame({'Sr.no': ['2', '4', '6', '7', '8'],
'Gender': ['F', 'M', 'M', 'F', 'F'],
'Interest': ['Writing', 'Cricket', 'Dancing',
'Chess', 'Sleeping']})
# Merging the dataframes
pd.merge(data1, data2, how ='inner', on ='Sr.no')
import pandas as pd
# Here merge is used to join the two dataframes,
# how - indicates what type of merge,
# on - indicates on which column
data1 = pd.DataFrame({'Sr.no': ['1', '2', '3', '4', '5'],
'Name': ['Rashmi', 'Arun', 'John',
'Kshitu', 'Bresha'],
'Roll No': ['1', '2', '3', '4', '5']})
data2 = pd.DataFrame({'Sr.no': ['2', '4', '6', '7', '8'],
'Gender': ['F', 'M', 'M', 'F', 'F'],
'Interest': ['Writing', 'Cricket', 'Dancing',
'Chess', 'Sleeping']})
# Merging the dataframes similar to left join
# pick all from data1 and matching from data2
pd.merge(data1, data2, how ='left', on ='Sr.no')
data1 = pd.DataFrame({'Sr.no': ['1', '2', '3', '4', '5'],
'Name': ['Rashmi', 'Arun', 'John',
'Kshitu', 'Bresha'],
'Roll No': ['1', '2', '3', '4', '5']})
data2 = pd.DataFrame({'Sr.no': ['2', '4', '6', '7', '8'],
'Gender': ['F', 'M', 'M', 'F', 'F'],
'Interest': ['Writing', 'Cricket', 'Dancing',
'Chess', 'Sleeping']})
# Merging the dataframes similar to right join
# pick all from data2 and matching from data1
pd.merge(data1, data2, how ='right', on ='Sr.no')
data1 = pd.DataFrame({'Sr.no': ['1', '2', '3', '4', '5'],
'Name': ['Rashmi', 'Arun', 'John',
'Kshitu', 'Bresha'],
'Roll No': ['1', '2', '3', '4', '5']})
data2 = pd.DataFrame({'Sr.no': ['2', '4', '6', '7', '8'],
'Gender': ['F', 'M', 'M', 'F', 'F'],
'Interest': ['Writing', 'Cricket', 'Dancing',
'Chess', 'Sleeping']})
# Merging the dataframes similar to right join
# pick all matching from both data1 and data2 as well as only found in
data1, data2
pd.merge(data1, data2, how ='outer', on ='Sr.no')