0% found this document useful (0 votes)

9 views8 pages

Solution

The document provides a comprehensive guide on data analysis and visualization using Python libraries such as Matplotlib, Pandas, and Seaborn. It includes various coding examples covering topics like plotting rainfall data, data manipulation with DataFrames, statistical analysis, and generating visualizations like heatmaps and boxplots. Additionally, it discusses employee salary data analysis and categorization of ages using bins.

Uploaded by

Aditya singh Rajput

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

9 views8 pages

Solution

Uploaded by

Aditya singh Rajput

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 8

# Solutions for Data Analysis and Visualization (UPC: 2343012002)

S.No. 1673

# SECTION A

# Q1 (a)

import matplotlib.pyplot as plt

rainfall = [5, 2, 7, 8, 2]

days = [1, 3, 5, 1, 9]

plt.plot(days, rainfall, 'ro', markersize=10)

plt.title("Rainfall over Days")

plt.xlabel("Days")

plt.ylabel("Rainfall")

plt.show()

# Q1 (b)

import pandas as pd

company = pd.DataFrame({'Name': ['Sangeeta', 'Sarika', 'Sangeeta', 'Babita', 'Sarika'], 'Age': [18, 30,
45, 32, 25]})

# (i)

company['Name'].unique()

# (ii)

company.groupby('Name')['Age'].mean()

Q1 (c)

section1 = pd.DataFrame({'RollNo': [1,2,3, 4], 'Name': ['Abhav', 'Vihaan','Chitra','Devansh']})

section2 = pd.DataFrame({'RollNo': [1,5,3, 2], 'Name': ['Roni', 'Kabeer','Ishani','Vihaan']})

# (i)

print(section1)

# (ii)

merged = pd.merge(section2, section1, on='Name', how='inner')

print(merged)

# (iii)
common = pd.merge(section1, section2, on=['Name', 'RollNo'])

print(common)

# Q1 (d)

al = np.zeros((2, 3))

[[0,0,0],[0,0,0]]

a2 = [[3, 4, 5], [7, 8, 9]]

print(np.add(al, a2))

[[3.,4.,5.,],[7.,8.,9.]]

a1=np.append(a1,a2,axis=0)

print(a1)

[[0,0,0],[0,0,0], [3.,4.,5.,],[7.,8.,9.]]

print('shape of array',a1.shape)

(4,3)

# Q1 (e)

empSalary = np.array([4000, 5200, 6100, 7000, 4900, 8000, 3000, 9200, 6300, 4800])

# (i)

len(empSalary[empSalary>5000])# (ii)

incentive = empSalary * 0.1

print("Incentives:", incentive)

# Q1 (f)

data = pd.DataFrame([[2, 4, 6], [np.NaN, 8, 10], [np.NaN, 12, np.NaN], [np.NaN, np.NaN, np.NaN]])

print(data)

0 1 2

0 2.0 4.0 6.0

1 NaN 8.0 10.0

2 NaN 12.0 NaN

3 NaN NaN NaN

print(data.dropna(thresh=2))
0 1 2

0 2.0 4.0 6.0

1 NaN 8.0 10.0

print(data.fillna(method="ffill", limit=2))

0 1 2

0 2.0 4.0 6.0

1 2.0 8.0 10.0

2 2.0 12.0 10.0

3 NaN 12.0 10.0

# SECTION B

# Q2 (a)

df = pd.DataFrame(np.arange(12).reshape(4, 3), index=[['North', 'North', 'South', 'South'], [1, 2, 1,

2]], columns=[['Delhi', 'Delhi', 'Chandigarh'], ['Green', 'Red', 'Green']])

df.index.names = ['key1', 'key2']

print(df)

df1 = df.swaplevel('key1', 'key2')

print(df1)

df2 = df1.sort_index(level=0)

print(df2)

# Q2 (b)

markSheet = np.random.randint(60, 101, size=(2, 3))

print(markSheet)

print("Datatype:", markSheet.dtype)

print("Shape:", markSheet.shape)

print("Dimension:", markSheet.ndim)

# Q2 (c)
itemRate = pd.DataFrame({'Item': ['Apples', 'Oranges'], 'Rate': [220, 90]})

itemRate['Rate'] *= 2

print(itemRate)

print("Item with Min Rate:", itemRate.loc[itemRate['Rate'].idxmin()])

Q 3 import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt

# DataFrame

data = {

'Name': ['Mohan', 'Sohan', 'Jeevan', 'Gita', 'Meenu', 'Gopal', 'Rajeev'],

'Hours_studied': [2.5, 4.0, 6.0, 8.0, 10.0, 1.0, 5.0],

'Marks_obtained': [40, 52, 64, 70, 90, 10, 60]

df_Student = pd.DataFrame(data)

# 1. Students with maximum marks

max_marks = df_Student['Marks_obtained'].max()

top_students = df_Student[df_Student['Marks_obtained'] == max_marks]['Name'].tolist()

print("Students with maximum marks:", top_students)

# 2. Average hours studied

avg_hours = df_Student['Hours_studied'].mean()

print("Average hours studied:", avg_hours)

# 3. Correlation and Covariance

correlation = df_Student[['Hours_studied', 'Marks_obtained']].corr()

covariance = df_Student[['Hours_studied', 'Marks_obtained']].cov()

print("Correlation:\n", correlation)

print("Covariance:\n", covariance)
# 4. Heatmap

sns.heatmap(df_Student[['Hours_studied', 'Marks_obtained']].corr(), annot=True, cmap='coolwarm')

plt.title('Heatmap: Hours Studied vs Marks Obtained')

plt.show()

[0 1 2 3 4 5]

ii.

[[1 2 3]

[4 6 8]]

iii.

[[2. 1. 0.66666667]

[0.5 0.33333333 0.25 ]]

iv.

1 [4 6 8] [[1 2 3]]

[0]

Q 4 a)

Q4 (b)

df=pd.DataFrame({'person':['A','B','C','D','E','A','B','C','D'],'sales':
[1000,300,400,500,800,1000,500,700,50],'quarter':[1,1,1,1,1,2,2,2,2],'country':
['US','Japna','Brazil','UK','US','Brazil','Japan','Brazil','US']})sns.boxplot(x='sales', data=data)

max_sales=df[df['country']=='Brazil']['sales'].max()

min_sales=df[df['country']=='Brazil']['sales'].min()

df.groupby('country')['sales'].sum()

max_avg_sales=df.groupby('person')['sales'].mean().max()

df[df['sales']==max_avg_sales]['person']
df['sales'].describe()

boxplot = df.boxplot(column='sales')

plt.show()

# Q5 (a)

c1 = np.arange(0, 24)

c2 = c1.reshape((2, 12))

c2[:, 3:] = 0

print(c1)

print(c2)

print(c1 * 2)

print(c2.reshape((3, 8)))

# Q5 (b)

excel_data = pd.DataFrame({

'Employee id': [101, 102, 103, 104, 105, 106],

'Department': ['CS', 'CS', 'CS', 'English', 'English', 'English'],

'Salary': [2000, 2002, 2040, 2045, 2030, 2006],

'Age': [24, 23, 34, 39, 43, 34]

})

excel_data.to_excel("data.xlsx", index=False)

df1 = pd.read_excel("data.xlsx", index_col='Employee id')

fig, axes = plt.subplots(1, 2)

df1.plot.scatter(x='Salary', y='Age', ax=axes[0], title='Salary vs Age')

df1['Salary_bins'] = pd.cut(df1['Salary'], 3)

df1['Salary_bins'].value_counts().plot(kind='bar', ax=axes[1])

plt.savefig("Employees.png")

# Q6 (a)

s1 = pd.Series([5, 0, -4, 8])

print(s1)

print(s1.rank())
data1 = pd.DataFrame({'One': ['a', 'b'] * 2 + ['b'], 'Two': [21, 22, 21, 23, 24]})

print(data1)

data2 = data1.drop_duplicates(['One', 'Two'], keep='last')

print(data2)

df1 = pd.DataFrame({'A': [21, 32], 'B': [27, 30]})

df2 = pd.DataFrame({'A': [23, 41]})

df2['A'][1] = df2['A'][1] + 10

print(df1)

print(df2)

print(df2 > df1['B'].min())

# Q6 (b)

ages = np.array([20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32])

categories = pd.cut(ages, bins=[18, 25, 35, 60, 100], labels=['Youth', 'YoungAdult', 'MiddleAged',
'Senior'])

print(categories.value_counts())

quantile_bins = pd.qcut(ages, q=4)

print(quantile_bins.value_counts())

# Q7

empData = pd.DataFrame({

'Gender': ['Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female', 'Female', 'Male',
'Male', 'Male'],

'Role': ['Data Analyst']*3 + ['Data Scientist']*3 + ['Manager']*3 + ['Data Analyst', 'Data Scientist',
'Manager'],

'Experience': [1, 1, 3, 5, 6, 1, 2, 3, 5, 6, 10, 11],

'Salary': [48000, 42000, 51000, 62000, 71000, 73000, 82000, 87000, 91000, 45000, 56000, 66000]

})

# (a)

print(empData)

# (b)

print(empData.groupby('Role')['Salary'].sum())
# (c)

print(empData[empData['Gender'] == 'Female'].groupby('Role').size())

# (d)

print(empData.groupby('Gender')['Salary'].agg(['max', 'min']))

# (e)

avg_salary = empData['Salary'].mean()

empData = empData[empData['Salary'] >= avg_salary]

print(empData)

Threats in The Digital World Data Breaches and Cyber Attacks
No ratings yet
Threats in The Digital World Data Breaches and Cyber Attacks
9 pages
TB 216 Workshop Manual TB216
No ratings yet
TB 216 Workshop Manual TB216
296 pages
Ict Policies and Issues Implication To Teaching and Learning
100% (3)
Ict Policies and Issues Implication To Teaching and Learning
25 pages
Class12 IP Practical File
No ratings yet
Class12 IP Practical File
6 pages
Wallmart Project Report
79% (24)
Wallmart Project Report
26 pages
The Economist
No ratings yet
The Economist
27 pages
Kurikulim Socs
No ratings yet
Kurikulim Socs
16 pages
Dav 2024 Pyq
No ratings yet
Dav 2024 Pyq
7 pages
Revision Notes DataFrame XII IP
No ratings yet
Revision Notes DataFrame XII IP
8 pages
Declaration of Trust
83% (6)
Declaration of Trust
3 pages
Unit 3 Python B.SC IT
No ratings yet
Unit 3 Python B.SC IT
18 pages
CS6303 Computer Architecture 2
No ratings yet
CS6303 Computer Architecture 2
56 pages
Class12 IP Practical Solutions
No ratings yet
Class12 IP Practical Solutions
39 pages
Foundation (NCA) Sample PAGES 1
No ratings yet
Foundation (NCA) Sample PAGES 1
3 pages
The Effectiveness of Indian Mango (Magnifera Indica) As Fertilizer For Monggo Plants (Vigna Radiata)
No ratings yet
The Effectiveness of Indian Mango (Magnifera Indica) As Fertilizer For Monggo Plants (Vigna Radiata)
6 pages
Class12 IP Practical File With Outputs
No ratings yet
Class12 IP Practical File With Outputs
8 pages
PLC, Scada Training
100% (1)
PLC, Scada Training
47 pages
Davp Pyq 2023 Solution
No ratings yet
Davp Pyq 2023 Solution
15 pages
Practical File Questions With Answers
No ratings yet
Practical File Questions With Answers
7 pages
Informatics Practices Record Class 12
No ratings yet
Informatics Practices Record Class 12
60 pages
File Chinh Thuc - HSG 2020 - Vòng 2
No ratings yet
File Chinh Thuc - HSG 2020 - Vòng 2
17 pages
CSC - 310 Advanced Python Programming Continuous Assessment-2 Assignment:Ca2
No ratings yet
CSC - 310 Advanced Python Programming Continuous Assessment-2 Assignment:Ca2
33 pages
Ip Practical File
No ratings yet
Ip Practical File
20 pages
12 IP Practial Programs 2025-26
No ratings yet
12 IP Practial Programs 2025-26
10 pages
12 Ip Practical List With Solution Complete
No ratings yet
12 Ip Practical List With Solution Complete
5 pages
Pandas Syntax Revision For ML
No ratings yet
Pandas Syntax Revision For ML
10 pages
Gec Practicals
No ratings yet
Gec Practicals
31 pages
Data Analysis CheatSheet
No ratings yet
Data Analysis CheatSheet
2 pages
23bet10114 Naman Gupta Assignment-1
No ratings yet
23bet10114 Naman Gupta Assignment-1
17 pages
Numpy Boolean Indexing: Filter
No ratings yet
Numpy Boolean Indexing: Filter
39 pages
DA Lab Manual r22
No ratings yet
DA Lab Manual r22
31 pages
PythonFinal (8,9,10,11)
No ratings yet
PythonFinal (8,9,10,11)
12 pages
Xii Ip Practical List 2022-23-1
No ratings yet
Xii Ip Practical List 2022-23-1
23 pages
PythonFinal (8,9,10,11) Removed
No ratings yet
PythonFinal (8,9,10,11) Removed
8 pages
Lab Record IP
No ratings yet
Lab Record IP
13 pages
Vanshika Goyal Gec Practicals
No ratings yet
Vanshika Goyal Gec Practicals
31 pages
AI & Data Science Lab Record
No ratings yet
AI & Data Science Lab Record
28 pages
Pds
No ratings yet
Pds
3 pages
Pci Leasing and Finance
No ratings yet
Pci Leasing and Finance
6 pages
DSDBAAssignment2 SUMEET
No ratings yet
DSDBAAssignment2 SUMEET
8 pages
Data Analysis and Visualization Guide
No ratings yet
Data Analysis and Visualization Guide
18 pages
L-2 (Data Frame Part 1) .Ipynb - Colab
No ratings yet
L-2 (Data Frame Part 1) .Ipynb - Colab
5 pages
Python Pandas-DataFrames Complete - Jupyter Notebook
No ratings yet
Python Pandas-DataFrames Complete - Jupyter Notebook
34 pages
Practice Questions2
No ratings yet
Practice Questions2
2 pages
Marking Scheme Practical Paper
No ratings yet
Marking Scheme Practical Paper
5 pages
Answers Practical File
No ratings yet
Answers Practical File
19 pages
10) Merging Dataframes: # Detecting Duplicates
No ratings yet
10) Merging Dataframes: # Detecting Duplicates
7 pages
Ans Key Set A
No ratings yet
Ans Key Set A
6 pages
Practical File ANKIT RAJ CLASS 12-F
No ratings yet
Practical File ANKIT RAJ CLASS 12-F
48 pages
Cheat Sheet
No ratings yet
Cheat Sheet
15 pages
Programs of Python Pandas
No ratings yet
Programs of Python Pandas
15 pages
Exp 3
No ratings yet
Exp 3
10 pages
List of Practical Ip065 Xii Session 2025 CKC Academy
No ratings yet
List of Practical Ip065 Xii Session 2025 CKC Academy
19 pages
Print Print Print Print: Import As
No ratings yet
Print Print Print Print: Import As
6 pages
Data Analysis Exam for CS Majors
No ratings yet
Data Analysis Exam for CS Majors
12 pages
Data Analyzer
No ratings yet
Data Analyzer
10 pages
Mayank Chaudhary DEV Practicals
No ratings yet
Mayank Chaudhary DEV Practicals
14 pages
Python Slips
No ratings yet
Python Slips
9 pages
Cs Sem III Dav Upc 2343012002 Sl. No. Qp. 1673 Dec '23
No ratings yet
Cs Sem III Dav Upc 2343012002 Sl. No. Qp. 1673 Dec '23
12 pages
Week 3 GGG
No ratings yet
Week 3 GGG
17 pages
Practical File Programs
No ratings yet
Practical File Programs
8 pages
Middle East Real Estate Predictions - Dubai
No ratings yet
Middle East Real Estate Predictions - Dubai
28 pages
Ip Practical File
No ratings yet
Ip Practical File
20 pages
Grade 12 - IP Practicals (1 To 9)
No ratings yet
Grade 12 - IP Practicals (1 To 9)
12 pages
Pandas for Data Analysis Enthusiasts
No ratings yet
Pandas for Data Analysis Enthusiasts
9 pages
Set B
No ratings yet
Set B
8 pages
XII CBSE IP Lab Solutions (2024-25)
No ratings yet
XII CBSE IP Lab Solutions (2024-25)
15 pages
Creation of Series Using List, Dictionary & Ndarray
No ratings yet
Creation of Series Using List, Dictionary & Ndarray
65 pages
Practical File IP
No ratings yet
Practical File IP
27 pages
GE2 - Exercise 2.1 Juvine Ramos
No ratings yet
GE2 - Exercise 2.1 Juvine Ramos
4 pages
Safety Data Sheet Idlube XL: 1. Identification of The Substance/Preparation and The Company
No ratings yet
Safety Data Sheet Idlube XL: 1. Identification of The Substance/Preparation and The Company
4 pages
Analytical VaR VaR Mapping
No ratings yet
Analytical VaR VaR Mapping
13 pages
Ip Practical File
No ratings yet
Ip Practical File
20 pages
Romantic Escapade - South & North Goa
No ratings yet
Romantic Escapade - South & North Goa
15 pages
Dataframe in Pandas
No ratings yet
Dataframe in Pandas
23 pages
Pyhtonpractice Questions
No ratings yet
Pyhtonpractice Questions
5 pages
Design and Fabrication of Hoverbike
No ratings yet
Design and Fabrication of Hoverbike
11 pages
Classic Cars Script
No ratings yet
Classic Cars Script
4 pages
Ilovepdf Merged
No ratings yet
Ilovepdf Merged
23 pages
Ip Project
No ratings yet
Ip Project
27 pages
LTE End To End Call Flow: With Logs Using Common Troubleshooting Tools
100% (1)
LTE End To End Call Flow: With Logs Using Common Troubleshooting Tools
132 pages
Women's Day - Famous Space Women
No ratings yet
Women's Day - Famous Space Women
2 pages
Rajneeti: Council of Ministers S. No. Name Department Office
No ratings yet
Rajneeti: Council of Ministers S. No. Name Department Office
20 pages
Cyber Security 14SL
No ratings yet
Cyber Security 14SL
14 pages
Successive Differentiation Guide
No ratings yet
Successive Differentiation Guide
10 pages
Essential of Financial Accounting
No ratings yet
Essential of Financial Accounting
8 pages
The Evolving Landscape of Cyber Security and Cyberspace
No ratings yet
The Evolving Landscape of Cyber Security and Cyberspace
8 pages
Personal Details:: A Study On "Recruitment and Selection Practices On Sterling Resorts Private Limited, Kodaikanal
No ratings yet
Personal Details:: A Study On "Recruitment and Selection Practices On Sterling Resorts Private Limited, Kodaikanal
6 pages
4 Startup Roles To Hire
No ratings yet
4 Startup Roles To Hire
8 pages
TR Bro Updated Erl221
No ratings yet
TR Bro Updated Erl221
4 pages
Automatic Night Lamp With
No ratings yet
Automatic Night Lamp With
3 pages
Name: Booking ID:: Sumit Kumar +14 D/AU/220225/1153567
No ratings yet
Name: Booking ID:: Sumit Kumar +14 D/AU/220225/1153567
1 page
Behavior Aspect of Public Sector Planning and Budg
No ratings yet
Behavior Aspect of Public Sector Planning and Budg
3 pages
How To Earn Online Webinar
No ratings yet
How To Earn Online Webinar
29 pages

Solution

Uploaded by

Solution

Uploaded by

# Solutions for Data Analysis and Visualization (UPC: 2343012002)

import matplotlib.pyplot as plt

plt.plot(days, rainfall, 'ro', markersize=10)

plt.title("Rainfall over Days")

section1 = pd.DataFrame({'RollNo': [1,2,3, 4], 'Name': ['Abhav', 'Vihaan','Chitra','Devansh']})

section2 = pd.DataFrame({'RollNo': [1,5,3, 2], 'Name': ['Roni', 'Kabeer','Ishani','Vihaan']})

merged = pd.merge(section2, section1, on='Name', how='inner')

a2 = [[3, 4, 5], [7, 8, 9]]

incentive = empSalary * 0.1

0 2.0 4.0 6.0

1 NaN 8.0 10.0

2 NaN 12.0 NaN

3 NaN NaN NaN

0 2.0 4.0 6.0

1 NaN 8.0 10.0

0 2.0 4.0 6.0

1 2.0 8.0 10.0

2 2.0 12.0 10.0

3 NaN 12.0 10.0

df = pd.DataFrame(np.arange(12).reshape(4, 3), index=[['North', 'North', 'South', 'South'], [1, 2, 1,

df.index.names = ['key1', 'key2']

df1 = df.swaplevel('key1', 'key2')

markSheet = np.random.randint(60, 101, size=(2, 3))

print("Item with Min Rate:", itemRate.loc[itemRate['Rate'].idxmin()])

import seaborn as sns

import matplotlib.pyplot as plt

'Name': ['Mohan', 'Sohan', 'Jeevan', 'Gita', 'Meenu', 'Gopal', 'Rajeev'],

'Hours_studied': [2.5, 4.0, 6.0, 8.0, 10.0, 1.0, 5.0],

'Marks_obtained': [40, 52, 64, 70, 90, 10, 60]

# 1. Students with maximum marks

top_students = df_Student[df_Student['Marks_obtained'] == max_marks]['Name'].tolist()

print("Students with maximum marks:", top_students)

# 2. Average hours studied

print("Average hours studied:", avg_hours)

# 3. Correlation and Covariance

correlation = df_Student[['Hours_studied', 'Marks_obtained']].corr()

covariance = df_Student[['Hours_studied', 'Marks_obtained']].cov()

sns.heatmap(df_Student[['Hours_studied', 'Marks_obtained']].corr(), annot=True, cmap='coolwarm')

plt.title('Heatmap: Hours Studied vs Marks Obtained')

[0.5 0.33333333 0.25 ]]

'Employee id': [101, 102, 103, 104, 105, 106],

'Department': ['CS', 'CS', 'CS', 'English', 'English', 'English'],

'Salary': [2000, 2002, 2040, 2045, 2030, 2006],

'Age': [24, 23, 34, 39, 43, 34]

df1 = pd.read_excel("data.xlsx", index_col='Employee id')

fig, axes = plt.subplots(1, 2)

df1.plot.scatter(x='Salary', y='Age', ax=axes[0], title='Salary vs Age')

s1 = pd.Series([5, 0, -4, 8])

data2 = data1.drop_duplicates(['One', 'Two'], keep='last')

df1 = pd.DataFrame({'A': [21, 32], 'B': [27, 30]})

df2 = pd.DataFrame({'A': [23, 41]})

print(df2 > df1['B'].min())

quantile_bins = pd.qcut(ages, q=4)

'Experience': [1, 1, 3, 5, 6, 1, 2, 3, 5, 6, 10, 11],

empData = empData[empData['Salary'] >= avg_salary]

You might also like