Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
37 views20 pages

Stats Assignment

Uploaded by

mhuzaifahanif02
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
37 views20 pages

Stats Assignment

Uploaded by

mhuzaifahanif02
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 20

Probability and Statistics Assignment

Statistical Analysis of various


Cases

Prepared by:
Muhammad Huzaifa Hanif
3rd Semester
BSSE Group “B”
Question 1

Chips Frequency

0-2 34

3-5 39

6-8 7

=80

MEAN
X=∑X/n= 246 / 80 = 3.075

MEDIAN
Median = Value of the (n+1/2)th term
Mode of the given data is 3

Mode
Mode of the given data is 2
PIE AND BAR GRAPHS

PYTHON CODE FOR PIE AND BAR GRAPH

import matplotlib.pyplot as plt

# Bar chart

plt.bar(freq_table['Defective Chips'], freq_table['Frequency'], color='blue')

plt.xlabel('Number of Defective Chips')

plt.ylabel('Frequency')

plt.title('Frequency Distribution of Defective Chips')

plt.show()
# Pie chart

plt.pie(freq_table['Frequency'], labels=freq_table['Defective Chips'], autopct='%1.1f%%')

plt.title('Defective Chips Distribution')

plt.show()

# Histogram

plt.hist(defective_chips, bins=range(10), edgecolor='black', alpha=0.7)

plt.xlabel('Number of Defective Chips')

plt.ylabel('Frequency')

plt.title('Histogram of Defective Chips')

plt.show()

POLYGON GRAPH

PYTHON CODE FOR THE POLYGON GRAPH


import matplotlib.pyplot as plt

from collections import Counter

import numpy as np
# The given data

data =
[1,3,4,7,2,7,5,5,2,2,4,2,4,3,2,2,7,1,3,3,2,5,0,0,1,2,5,5,4,1,3,2,6,3,8,2,2,3,1,6,3,4,1,2,5,1,3,3,
3,2,1,2,5,5,4,1,4,3,1,0,2,1,2,4,4,5,3,3,4,0,5,2,5,6,2,5,3,3,3,1]

# Count the frequency of each number in the data

frequency = Counter(data)

# Separate the data into labels and values

labels, values = zip(*sorted(frequency.items()))

# Calculate midpoints for the frequency polygon

midpoints = np.array(labels)

# Create a frequency polygon

plt.figure(figsize=(10, 6))

plt.plot(midpoints, values, marker='o', linestyle='-', color='blue')

plt.title("Frequency Polygon of Data")

plt.xlabel("Numbers")

plt.ylabel("Frequency")

plt.xticks(midpoints) # Set x-ticks to be the same as midpoints

plt.grid()

plt.show()

HISTOGRAM
THE PYTHON CODE FOR THE HISTOGRAM IS AS FOLLOWING
import matplotlib.pyplot as plt

# The given data

data =
[1,3,4,7,2,7,5,5,2,2,4,2,4,3,2,2,7,1,3,3,2,5,0,0,1,2,5,5,4,1,3,2,6,3,8,2,2,3,1,6,3,4,1,2,5,1,3,3,
3,2,1,2,5,5,4,1,4,3,1,0,2,1,2,4,4,5,3,3,4,0,5,2,5,6,2,5,3,3,3,1]

# Create a histogram

plt.figure(figsize=(10, 6))

plt.hist(data, bins=range(0, 10), edgecolor='black', alpha=0.7)

plt.title("Histogram of Data")

plt.xlabel("Numbers")

plt.ylabel("Frequency")

plt.xticks(range(0, 10)) # Set x-ticks to show numbers from 0 to 9

plt.grid(axis='y', alpha=0.75) # Add grid lines for the y-axis

plt.show()
Question 2

Devices Frequency

634-643 3

644-653 7

654-663 16

664-673 13

674-683 23

684-693 11

694-703 17

704-713 8

714-723 13

724-734 9

=120

MEAN
Calculation of the mean is as follows
X=∑X/n= 82413
686.775
120

MEDIAN
Median= Value of the (n+1/2)th term
Median of the data is 683

Mode
Mode of the data is 660

STANDARD DEVIATION
Standard deviation σ = 25.560862824508

No of devices exceeding 700mhz is: 33

Percentage= (Number of devices exceeding 700 MHz / Total number of devices)


×100

Percentage= (33 / 128) × 10 0= 25.78

Percentage of the devices exceeding 700 Mhz is:25.78%

FREQUENCY DISTRIBUTION

634: 1 670: 2 698: 2


637: 1 672: 2 700: 1
642: 1 675: 3 701: 2
644: 1 676: 1 702: 2
648: 1 677: 1 703: 1
649: 2 678: 1 704: 3
652: 2 679: 2 705: 2
680: 3 706: 1
681: 4 710: 2
682: 1 715: 1
683: 4 717: 1
684: 1 718: 2
719: 1
653: 1
655: 1
656: 2
658: 1
659: 2
660: 5
661: 1
662: 1
663: 2
664: 1
665: 2
667: 1
668: 3
669: 2

PYTHON CODE FOR STEM AND LEAF

from collections import defaultdict

# The given data

data =
[680,669,719,699,670,710,722,663,658,634,720,690,677,669,700,718,690,681,702,696,692,690,694,660,
649,675,701,721,683,735,688,763,672,698,659,704,681,679,691,683,705,746,706,649,668,672,690,724,6
52,720,660,695,701,724,668,698,668,660,680,739,717,727,653,637,660,693,679,682,724,642,704,695,70
4,652,664,702,661,720,695,670,656,718,660,648,683,723,710,680,684,705,681,748,697,703,660,722,662
,644,683,695,678,674,656,667,683,691,680,685,681,715,665,676,665,675,655,659,720,675,697,663]

# Function to generate the stem-and-leaf plot

def stem_and_leaf_plot(data):

stem_leaf = defaultdict(list)

# Separate each number into stem and leaf

for num in sorted(data):

stem = num // 10 # The stem is everything but the last digit

leaf = num % 10 # The leaf is the last digit


stem_leaf[stem].append(leaf)

# Display the stem-and-leaf plot

print("Stem | Leaf")

print("-------------")

for stem, leaves in sorted(stem_leaf.items()):

leaves_str = ' '.join(str(leaf) for leaf in sorted(leaves))

print(f"{stem} | {leaves_str}")

# Create the stem-and-leaf plot

stem_and_leaf_plot(data)

STEM AND LEAF

PROBLEM 3
The statistical data is in the attached .csv file

PYTHON CODE FOR THE DATA SET IN EXCEL IS

import pandas as pd
import matplotlib.pyplot as plt

# Load data from CSV

df = pd.read_csv(r'D:\UNI\LECTURES RELATED TEACHERS\SEMESTER 3\SIR NOOR BADSHAH(probability)\


ASSIGNMENT 2\Real_Estate.csv')

#r'C:\Users\HP\Documents\Real_Estate.csv'

# Plot each column

for column in df.columns:

# Line plot

plt.figure(figsize=(10, 6))

plt.plot(df[column])

plt.title(f'Line Plot of {column}')

plt.xlabel('Index')

plt.ylabel('Value')

plt.show()

# Bar plot

plt.figure(figsize=(10, 6))

plt.bar(df.index, df[column])

plt.title(f'Bar Plot of {column}')

plt.xlabel('Index')

plt.ylabel('Value')

plt.show()

# Histogram

plt.figure(figsize=(10, 6))

plt.hist(df[column], bins=10)

plt.title(f'Histogram of {column}')

plt.xlabel('Value')

plt.ylabel('Frequency')

plt.show()
# Reload data with first column as index

data = pd.read_csv('Real_Estate.csv', index_col=0)

# Display data types

print("Data Types:\n", data.dtypes)

# Find non-numeric columns

non_numeric_columns = data.select_dtypes(exclude=['number']).columns

print("\nNon-Numeric Columns:\n", non_numeric_columns)

# Convert non-numeric columns to numeric if possible

for column in non_numeric_columns:

if data[column].dtype == 'object':

try:

data[column] = pd.to_numeric(data[column], errors='coerce') # Convert invalid values


to NaN

except ValueError:

print(f"Cannot convert {column} to numeric.")

elif data[column].dtype == 'datetime64[ns]':

data[column] = pd.to_datetime(data[column])

data[column] = data[column].apply(lambda x: x.timestamp())

# Display statistics for numeric columns

print("\nNumeric Columns Statistics:")

numeric_data = data.select_dtypes(include=['int64', 'float64'])

for column in numeric_data.columns:

print(f"\nColumn: {column}")

stats = data[column].describe(percentiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

print(stats.to_string()) # print statistics without index name

print(f"Q1 (25%): {data[column].quantile(0.25)}")

print(f"Q2 (50%): {data[column].quantile(0.5)}")

print(f"Q3 (75%): {data[column].quantile(0.75)}")


print(f"Interquartile Range (IQR): {data[column].quantile(0.75) -
data[column].quantile(0.25)}")

print(f"Skewness: {data[column].skew()}")

print(f"Kurtosis: {data[column].kurtosis()}")

THE GRAPHS FOR THE DATA SET IS

You might also like