0% found this document useful (0 votes)

37 views20 pages

Stats Assignment

Uploaded by

mhuzaifahanif02

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

37 views20 pages

Stats Assignment

Uploaded by

mhuzaifahanif02

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 20

Probability and Statistics Assignment

Statistical Analysis of various

Cases

Prepared by:
Muhammad Huzaifa Hanif
3rd Semester
BSSE Group “B”
Question 1

Chips Frequency

0-2 34

3-5 39

6-8 7

=80

MEAN
X=∑X/n= 246 / 80 = 3.075

MEDIAN
Median = Value of the (n+1/2)th term
Mode of the given data is 3

Mode
Mode of the given data is 2
PIE AND BAR GRAPHS

PYTHON CODE FOR PIE AND BAR GRAPH

import matplotlib.pyplot as plt

# Bar chart

plt.bar(freq_table['Defective Chips'], freq_table['Frequency'], color='blue')

plt.xlabel('Number of Defective Chips')

plt.ylabel('Frequency')

plt.title('Frequency Distribution of Defective Chips')

plt.show()
# Pie chart

plt.pie(freq_table['Frequency'], labels=freq_table['Defective Chips'], autopct='%1.1f%%')

plt.title('Defective Chips Distribution')

plt.show()

# Histogram

plt.hist(defective_chips, bins=range(10), edgecolor='black', alpha=0.7)

plt.xlabel('Number of Defective Chips')

plt.ylabel('Frequency')

plt.title('Histogram of Defective Chips')

plt.show()

POLYGON GRAPH

PYTHON CODE FOR THE POLYGON GRAPH

import matplotlib.pyplot as plt

from collections import Counter

import numpy as np
# The given data

data =
[1,3,4,7,2,7,5,5,2,2,4,2,4,3,2,2,7,1,3,3,2,5,0,0,1,2,5,5,4,1,3,2,6,3,8,2,2,3,1,6,3,4,1,2,5,1,3,3,
3,2,1,2,5,5,4,1,4,3,1,0,2,1,2,4,4,5,3,3,4,0,5,2,5,6,2,5,3,3,3,1]

# Count the frequency of each number in the data

frequency = Counter(data)

# Separate the data into labels and values

labels, values = zip(*sorted(frequency.items()))

# Calculate midpoints for the frequency polygon

midpoints = np.array(labels)

# Create a frequency polygon

plt.figure(figsize=(10, 6))

plt.plot(midpoints, values, marker='o', linestyle='-', color='blue')

plt.title("Frequency Polygon of Data")

plt.xlabel("Numbers")

plt.ylabel("Frequency")

plt.xticks(midpoints) # Set x-ticks to be the same as midpoints

plt.grid()

plt.show()

HISTOGRAM
THE PYTHON CODE FOR THE HISTOGRAM IS AS FOLLOWING
import matplotlib.pyplot as plt

# The given data

data =
[1,3,4,7,2,7,5,5,2,2,4,2,4,3,2,2,7,1,3,3,2,5,0,0,1,2,5,5,4,1,3,2,6,3,8,2,2,3,1,6,3,4,1,2,5,1,3,3,
3,2,1,2,5,5,4,1,4,3,1,0,2,1,2,4,4,5,3,3,4,0,5,2,5,6,2,5,3,3,3,1]

# Create a histogram

plt.figure(figsize=(10, 6))

plt.hist(data, bins=range(0, 10), edgecolor='black', alpha=0.7)

plt.title("Histogram of Data")

plt.xlabel("Numbers")

plt.ylabel("Frequency")

plt.xticks(range(0, 10)) # Set x-ticks to show numbers from 0 to 9

plt.grid(axis='y', alpha=0.75) # Add grid lines for the y-axis

plt.show()
Question 2

Devices Frequency

634-643 3

644-653 7

654-663 16

664-673 13

674-683 23

684-693 11

694-703 17

704-713 8

714-723 13

724-734 9

=120

MEAN
Calculation of the mean is as follows
X=∑X/n= 82413
686.775
120

MEDIAN
Median= Value of the (n+1/2)th term
Median of the data is 683

Mode
Mode of the data is 660

STANDARD DEVIATION
Standard deviation σ = 25.560862824508

No of devices exceeding 700mhz is: 33

Percentage= (Number of devices exceeding 700 MHz / Total number of devices)

×100

Percentage= (33 / 128) × 10 0= 25.78

Percentage of the devices exceeding 700 Mhz is:25.78%

FREQUENCY DISTRIBUTION

634: 1 670: 2 698: 2

637: 1 672: 2 700: 1
642: 1 675: 3 701: 2
644: 1 676: 1 702: 2
648: 1 677: 1 703: 1
649: 2 678: 1 704: 3
652: 2 679: 2 705: 2
680: 3 706: 1
681: 4 710: 2
682: 1 715: 1
683: 4 717: 1
684: 1 718: 2
719: 1
653: 1
655: 1
656: 2
658: 1
659: 2
660: 5
661: 1
662: 1
663: 2
664: 1
665: 2
667: 1
668: 3
669: 2

PYTHON CODE FOR STEM AND LEAF

from collections import defaultdict

# The given data

data =
[680,669,719,699,670,710,722,663,658,634,720,690,677,669,700,718,690,681,702,696,692,690,694,660,
649,675,701,721,683,735,688,763,672,698,659,704,681,679,691,683,705,746,706,649,668,672,690,724,6
52,720,660,695,701,724,668,698,668,660,680,739,717,727,653,637,660,693,679,682,724,642,704,695,70
4,652,664,702,661,720,695,670,656,718,660,648,683,723,710,680,684,705,681,748,697,703,660,722,662
,644,683,695,678,674,656,667,683,691,680,685,681,715,665,676,665,675,655,659,720,675,697,663]

# Function to generate the stem-and-leaf plot

def stem_and_leaf_plot(data):

stem_leaf = defaultdict(list)

# Separate each number into stem and leaf

for num in sorted(data):

stem = num // 10 # The stem is everything but the last digit

leaf = num % 10 # The leaf is the last digit

stem_leaf[stem].append(leaf)

# Display the stem-and-leaf plot

print("Stem | Leaf")

print("-------------")

for stem, leaves in sorted(stem_leaf.items()):

leaves_str = ' '.join(str(leaf) for leaf in sorted(leaves))

print(f"{stem} | {leaves_str}")

# Create the stem-and-leaf plot

stem_and_leaf_plot(data)

STEM AND LEAF

PROBLEM 3
The statistical data is in the attached .csv file

PYTHON CODE FOR THE DATA SET IN EXCEL IS

import pandas as pd
import matplotlib.pyplot as plt

# Load data from CSV

df = pd.read_csv(r'D:\UNI\LECTURES RELATED TEACHERS\SEMESTER 3\SIR NOOR BADSHAH(probability)\

ASSIGNMENT 2\Real_Estate.csv')

#r'C:\Users\HP\Documents\Real_Estate.csv'

# Plot each column

for column in df.columns:

# Line plot

plt.figure(figsize=(10, 6))

plt.plot(df[column])

plt.title(f'Line Plot of {column}')

plt.xlabel('Index')

plt.ylabel('Value')

plt.show()

# Bar plot

plt.figure(figsize=(10, 6))

plt.bar(df.index, df[column])

plt.title(f'Bar Plot of {column}')

plt.xlabel('Index')

plt.ylabel('Value')

plt.show()

# Histogram

plt.figure(figsize=(10, 6))

plt.hist(df[column], bins=10)

plt.title(f'Histogram of {column}')

plt.xlabel('Value')

plt.ylabel('Frequency')

plt.show()
# Reload data with first column as index

data = pd.read_csv('Real_Estate.csv', index_col=0)

# Display data types

print("Data Types:\n", data.dtypes)

# Find non-numeric columns

non_numeric_columns = data.select_dtypes(exclude=['number']).columns

print("\nNon-Numeric Columns:\n", non_numeric_columns)

# Convert non-numeric columns to numeric if possible

for column in non_numeric_columns:

if data[column].dtype == 'object':

try:

data[column] = pd.to_numeric(data[column], errors='coerce') # Convert invalid values

to NaN

except ValueError:

print(f"Cannot convert {column} to numeric.")

elif data[column].dtype == 'datetime64[ns]':

data[column] = pd.to_datetime(data[column])

data[column] = data[column].apply(lambda x: x.timestamp())

# Display statistics for numeric columns

print("\nNumeric Columns Statistics:")

numeric_data = data.select_dtypes(include=['int64', 'float64'])

for column in numeric_data.columns:

print(f"\nColumn: {column}")

stats = data[column].describe(percentiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

print(stats.to_string()) # print statistics without index name

print(f"Q1 (25%): {data[column].quantile(0.25)}")

print(f"Q2 (50%): {data[column].quantile(0.5)}")

print(f"Q3 (75%): {data[column].quantile(0.75)}")

print(f"Interquartile Range (IQR): {data[column].quantile(0.75) -
data[column].quantile(0.25)}")

print(f"Skewness: {data[column].skew()}")

print(f"Kurtosis: {data[column].kurtosis()}")

THE GRAPHS FOR THE DATA SET IS

Python Cheat Sheet: Pandas - Numpy - Sklearn Matplotlib - Seaborn BS4 - Selenium - Scrapy
100% (3)
Python Cheat Sheet: Pandas - Numpy - Sklearn Matplotlib - Seaborn BS4 - Selenium - Scrapy
9 pages
Synthetic Indices Trading Guide
100% (12)
Synthetic Indices Trading Guide
25 pages
Data Analytics Lab Manual Final1
No ratings yet
Data Analytics Lab Manual Final1
32 pages
How To Crack GATE - IES - BARC - Electronic Devices and Circuits (EDC)
No ratings yet
How To Crack GATE - IES - BARC - Electronic Devices and Circuits (EDC)
4 pages
Plotting Graph10072019
No ratings yet
Plotting Graph10072019
30 pages
Data Science Research Paper
No ratings yet
Data Science Research Paper
51 pages
Grade 10 AI Practicals DATA SCIENCE-Solution
No ratings yet
Grade 10 AI Practicals DATA SCIENCE-Solution
6 pages
Dsa Lab Record (Ai&Ds)
No ratings yet
Dsa Lab Record (Ai&Ds)
34 pages
Smart Care
No ratings yet
Smart Care
47 pages
AOPA - GPS Technology
100% (1)
AOPA - GPS Technology
16 pages
23bet10114 Naman Gupta Assignment-1
No ratings yet
23bet10114 Naman Gupta Assignment-1
17 pages
Solutions Modernstatistics
No ratings yet
Solutions Modernstatistics
140 pages
UNIT 3 Data Science LM 2023
No ratings yet
UNIT 3 Data Science LM 2023
20 pages
Data Science Experiments
No ratings yet
Data Science Experiments
31 pages
Exp 2 SDK Ok
No ratings yet
Exp 2 SDK Ok
18 pages
Dav Lab Manual Final
No ratings yet
Dav Lab Manual Final
16 pages
FDS Lab 1 Manuel .1..1new
No ratings yet
FDS Lab 1 Manuel .1..1new
38 pages
Graphs Using Matplotlib
No ratings yet
Graphs Using Matplotlib
23 pages
FDS Lab 1 Manuel .1..1new
No ratings yet
FDS Lab 1 Manuel .1..1new
34 pages
Solutions Modernstatistics
No ratings yet
Solutions Modernstatistics
144 pages
CS1010S Lecture 11 - Visualising Data
No ratings yet
CS1010S Lecture 11 - Visualising Data
68 pages
AD3411
No ratings yet
AD3411
28 pages
Batch2 FDS Printout
No ratings yet
Batch2 FDS Printout
38 pages
Ip - Report - Kuti Page
No ratings yet
Ip - Report - Kuti Page
37 pages
Data Science Practicals - Ipynb
No ratings yet
Data Science Practicals - Ipynb
54 pages
PP Manual Exp No.08
No ratings yet
PP Manual Exp No.08
9 pages
Unit V Notes
No ratings yet
Unit V Notes
11 pages
Lab 9
No ratings yet
Lab 9
2 pages
Data Science Python Cheat Sheet
No ratings yet
Data Science Python Cheat Sheet
25 pages
Fundamentals of Data Science Students
No ratings yet
Fundamentals of Data Science Students
52 pages
Python Code - Summary Statistics
No ratings yet
Python Code - Summary Statistics
6 pages
Data Visualization in Python
No ratings yet
Data Visualization in Python
11 pages
Course - Introduction To Data Science (SD211105)
No ratings yet
Course - Introduction To Data Science (SD211105)
10 pages
Reworded Assignment With Content
No ratings yet
Reworded Assignment With Content
2 pages
Preksha Ai Practical Class 10th - 070428
No ratings yet
Preksha Ai Practical Class 10th - 070428
13 pages
Ai Tools and Applications-Lab
No ratings yet
Ai Tools and Applications-Lab
33 pages
Math 189 HW-1: Data Analysis with Pandas
No ratings yet
Math 189 HW-1: Data Analysis with Pandas
11 pages
End Semester Answer Key Format-Fods
No ratings yet
End Semester Answer Key Format-Fods
8 pages
Experiment 11
No ratings yet
Experiment 11
5 pages
10 Exp Python
No ratings yet
10 Exp Python
3 pages
Exploratory Data Analysis with Graphs
No ratings yet
Exploratory Data Analysis with Graphs
41 pages
BDA File
No ratings yet
BDA File
26 pages
Fds QB
No ratings yet
Fds QB
6 pages
Pharma Code Printing Guide
No ratings yet
Pharma Code Printing Guide
12 pages
Unit 5
No ratings yet
Unit 5
10 pages
Data Sci
No ratings yet
Data Sci
10 pages
Lab Programs 1 To 5
No ratings yet
Lab Programs 1 To 5
12 pages
Data Science and Analtics Laboratory
No ratings yet
Data Science and Analtics Laboratory
21 pages
MLRecord
No ratings yet
MLRecord
24 pages
Print Print Print Print: Import As
No ratings yet
Print Print Print Print: Import As
6 pages
DXV Guidelines
No ratings yet
DXV Guidelines
3 pages
Python Data Analysis Guide
No ratings yet
Python Data Analysis Guide
15 pages
Lab 3
No ratings yet
Lab 3
14 pages
Chapter 3
No ratings yet
Chapter 3
28 pages
Answers 1
No ratings yet
Answers 1
17 pages
Week1-SPT2 Descriptive Statistics
No ratings yet
Week1-SPT2 Descriptive Statistics
8 pages
Study Material For XII Computer Science On: Data Visualization Using Pyplot
No ratings yet
Study Material For XII Computer Science On: Data Visualization Using Pyplot
22 pages
AstroPyRefCard Ferrari PDF
No ratings yet
AstroPyRefCard Ferrari PDF
7 pages
12th Practical
No ratings yet
12th Practical
21 pages
19 B9 IELTS T2 Essays 240 T2 Questions
100% (1)
19 B9 IELTS T2 Essays 240 T2 Questions
116 pages
Thesis Statement About Gadgets
100% (2)
Thesis Statement About Gadgets
7 pages
Mayank Chaudhary DEV Practicals
No ratings yet
Mayank Chaudhary DEV Practicals
14 pages
Lesson 3 Transportation Problem
No ratings yet
Lesson 3 Transportation Problem
41 pages
Wireless Printer Manual
No ratings yet
Wireless Printer Manual
16 pages
Final - Emt 11 - 12 Q2 0802 PS
No ratings yet
Final - Emt 11 - 12 Q2 0802 PS
53 pages
Worksheet-1 (Python)
No ratings yet
Worksheet-1 (Python)
9 pages
BPM Strategies for Enterprises
No ratings yet
BPM Strategies for Enterprises
10 pages
Informatics Practices Class 12 Cbse Notes Data Handling
0% (1)
Informatics Practices Class 12 Cbse Notes Data Handling
17 pages
Math Homework Sheets For 6th Graders
No ratings yet
Math Homework Sheets For 6th Graders
7 pages
Weak-Measurement Elements of Reality: Lev Vaidman
No ratings yet
Weak-Measurement Elements of Reality: Lev Vaidman
11 pages
Content From Jose Portilla's Udemy Course Learning Python For Data Analysis and Visualization Notes by Michael Brothers, Available On
No ratings yet
Content From Jose Portilla's Udemy Course Learning Python For Data Analysis and Visualization Notes by Michael Brothers, Available On
13 pages
Time Series Analysis Group 9
No ratings yet
Time Series Analysis Group 9
16 pages
Prasana Kumar.S: Educational Qualification
No ratings yet
Prasana Kumar.S: Educational Qualification
2 pages
Object Oriented Programming - ABAP Oops-Abap - 1
No ratings yet
Object Oriented Programming - ABAP Oops-Abap - 1
8 pages
ABAP Web Service Client Proxy Guide
No ratings yet
ABAP Web Service Client Proxy Guide
20 pages
Transistor Amplifier Design FINAL
No ratings yet
Transistor Amplifier Design FINAL
12 pages
Quiz CH10&11 - Time Series Analysis and Forecasting & Predictive Data Mining - Preethi Chowdary Narra
No ratings yet
Quiz CH10&11 - Time Series Analysis and Forecasting & Predictive Data Mining - Preethi Chowdary Narra
4 pages
Data Acquisition in MATLAB
No ratings yet
Data Acquisition in MATLAB
27 pages
Exam Paper 2020 Oct
100% (1)
Exam Paper 2020 Oct
7 pages
Mandarine Log
No ratings yet
Mandarine Log
37 pages
A VLSI Analog Computer - Math Co-Processor For A Digital Computer
No ratings yet
A VLSI Analog Computer - Math Co-Processor For A Digital Computer
3 pages
Huawei RTN 905e Brochure
No ratings yet
Huawei RTN 905e Brochure
2 pages
1 - Introduction To BI
No ratings yet
1 - Introduction To BI
16 pages
Bus Naming On Xilinx Schematics PDF
No ratings yet
Bus Naming On Xilinx Schematics PDF
3 pages
Log
No ratings yet
Log
4 pages
Sales Performance Report
No ratings yet
Sales Performance Report
4 pages
DLL - Math6 - Week 1
No ratings yet
DLL - Math6 - Week 1
12 pages
V6 SuperCharger For Android-Update9 RC12-BlackDog-63457 Fix - SH
No ratings yet
V6 SuperCharger For Android-Update9 RC12-BlackDog-63457 Fix - SH
218 pages
DIPS v7 Rosette Plot Manual
No ratings yet
DIPS v7 Rosette Plot Manual
20 pages

Stats Assignment

Uploaded by

Stats Assignment

Uploaded by

Probability and Statistics Assignment

Statistical Analysis of various

PYTHON CODE FOR PIE AND BAR GRAPH

import matplotlib.pyplot as plt

plt.bar(freq_table['Defective Chips'], freq_table['Frequency'], color='blue')

plt.xlabel('Number of Defective Chips')

plt.title('Frequency Distribution of Defective Chips')

plt.pie(freq_table['Frequency'], labels=freq_table['Defective Chips'], autopct='%1.1f%%')

plt.title('Defective Chips Distribution')

plt.hist(defective_chips, bins=range(10), edgecolor='black', alpha=0.7)

plt.xlabel('Number of Defective Chips')

plt.title('Histogram of Defective Chips')

PYTHON CODE FOR THE POLYGON GRAPH

from collections import Counter

# Count the frequency of each number in the data

# Separate the data into labels and values

labels, values = zip(*sorted(frequency.items()))

# Calculate midpoints for the frequency polygon

# Create a frequency polygon

plt.plot(midpoints, values, marker='o', linestyle='-', color='blue')

plt.title("Frequency Polygon of Data")

plt.xticks(midpoints) # Set x-ticks to be the same as midpoints

# The given data

plt.hist(data, bins=range(0, 10), edgecolor='black', alpha=0.7)

plt.xticks(range(0, 10)) # Set x-ticks to show numbers from 0 to 9

plt.grid(axis='y', alpha=0.75) # Add grid lines for the y-axis

No of devices exceeding 700mhz is: 33

Percentage= (Number of devices exceeding 700 MHz / Total number of devices)

Percentage= (33 / 128) × 10 0= 25.78

Percentage of the devices exceeding 700 Mhz is:25.78%

634: 1 670: 2 698: 2

PYTHON CODE FOR STEM AND LEAF

from collections import defaultdict

# The given data

# Function to generate the stem-and-leaf plot

# Separate each number into stem and leaf

for num in sorted(data):

stem = num // 10 # The stem is everything but the last digit

leaf = num % 10 # The leaf is the last digit

# Display the stem-and-leaf plot

for stem, leaves in sorted(stem_leaf.items()):

leaves_str = ' '.join(str(leaf) for leaf in sorted(leaves))

# Create the stem-and-leaf plot

STEM AND LEAF

PYTHON CODE FOR THE DATA SET IN EXCEL IS

# Load data from CSV

df = pd.read_csv(r'D:\UNI\LECTURES RELATED TEACHERS\SEMESTER 3\SIR NOOR BADSHAH(probability)\

# Plot each column

for column in df.columns:

plt.title(f'Line Plot of {column}')

plt.title(f'Bar Plot of {column}')

data = pd.read_csv('Real_Estate.csv', index_col=0)

# Display data types

print("Data Types:\n", data.dtypes)

# Find non-numeric columns

print("\nNon-Numeric Columns:\n", non_numeric_columns)

# Convert non-numeric columns to numeric if possible

for column in non_numeric_columns:

data[column] = pd.to_numeric(data[column], errors='coerce') # Convert invalid values

print(f"Cannot convert {column} to numeric.")

elif data[column].dtype == 'datetime64[ns]':

data[column] = data[column].apply(lambda x: x.timestamp())

# Display statistics for numeric columns

print("\nNumeric Columns Statistics:")

numeric_data = data.select_dtypes(include=['int64', 'float64'])

for column in numeric_data.columns:

print(stats.to_string()) # print statistics without index name

print(f"Q1 (25%): {data[column].quantile(0.25)}")

print(f"Q2 (50%): {data[column].quantile(0.5)}")

print(f"Q3 (75%): {data[column].quantile(0.75)}")

THE GRAPHS FOR THE DATA SET IS

You might also like