Probability and Statistics Assignment
Statistical Analysis of various
Cases
Prepared by:
Muhammad Huzaifa Hanif
3rd Semester
BSSE Group “B”
Question 1
Chips Frequency
0-2 34
3-5 39
6-8 7
=80
MEAN
X=∑X/n= 246 / 80 = 3.075
MEDIAN
Median = Value of the (n+1/2)th term
Mode of the given data is 3
Mode
Mode of the given data is 2
PIE AND BAR GRAPHS
PYTHON CODE FOR PIE AND BAR GRAPH
import matplotlib.pyplot as plt
# Bar chart
plt.bar(freq_table['Defective Chips'], freq_table['Frequency'], color='blue')
plt.xlabel('Number of Defective Chips')
plt.ylabel('Frequency')
plt.title('Frequency Distribution of Defective Chips')
plt.show()
# Pie chart
plt.pie(freq_table['Frequency'], labels=freq_table['Defective Chips'], autopct='%1.1f%%')
plt.title('Defective Chips Distribution')
plt.show()
# Histogram
plt.hist(defective_chips, bins=range(10), edgecolor='black', alpha=0.7)
plt.xlabel('Number of Defective Chips')
plt.ylabel('Frequency')
plt.title('Histogram of Defective Chips')
plt.show()
POLYGON GRAPH
PYTHON CODE FOR THE POLYGON GRAPH
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
# The given data
data =
[1,3,4,7,2,7,5,5,2,2,4,2,4,3,2,2,7,1,3,3,2,5,0,0,1,2,5,5,4,1,3,2,6,3,8,2,2,3,1,6,3,4,1,2,5,1,3,3,
3,2,1,2,5,5,4,1,4,3,1,0,2,1,2,4,4,5,3,3,4,0,5,2,5,6,2,5,3,3,3,1]
# Count the frequency of each number in the data
frequency = Counter(data)
# Separate the data into labels and values
labels, values = zip(*sorted(frequency.items()))
# Calculate midpoints for the frequency polygon
midpoints = np.array(labels)
# Create a frequency polygon
plt.figure(figsize=(10, 6))
plt.plot(midpoints, values, marker='o', linestyle='-', color='blue')
plt.title("Frequency Polygon of Data")
plt.xlabel("Numbers")
plt.ylabel("Frequency")
plt.xticks(midpoints) # Set x-ticks to be the same as midpoints
plt.grid()
plt.show()
HISTOGRAM
THE PYTHON CODE FOR THE HISTOGRAM IS AS FOLLOWING
import matplotlib.pyplot as plt
# The given data
data =
[1,3,4,7,2,7,5,5,2,2,4,2,4,3,2,2,7,1,3,3,2,5,0,0,1,2,5,5,4,1,3,2,6,3,8,2,2,3,1,6,3,4,1,2,5,1,3,3,
3,2,1,2,5,5,4,1,4,3,1,0,2,1,2,4,4,5,3,3,4,0,5,2,5,6,2,5,3,3,3,1]
# Create a histogram
plt.figure(figsize=(10, 6))
plt.hist(data, bins=range(0, 10), edgecolor='black', alpha=0.7)
plt.title("Histogram of Data")
plt.xlabel("Numbers")
plt.ylabel("Frequency")
plt.xticks(range(0, 10)) # Set x-ticks to show numbers from 0 to 9
plt.grid(axis='y', alpha=0.75) # Add grid lines for the y-axis
plt.show()
Question 2
Devices Frequency
634-643 3
644-653 7
654-663 16
664-673 13
674-683 23
684-693 11
694-703 17
704-713 8
714-723 13
724-734 9
=120
MEAN
Calculation of the mean is as follows
X=∑X/n= 82413
686.775
120
MEDIAN
Median= Value of the (n+1/2)th term
Median of the data is 683
Mode
Mode of the data is 660
STANDARD DEVIATION
Standard deviation σ = 25.560862824508
No of devices exceeding 700mhz is: 33
Percentage= (Number of devices exceeding 700 MHz / Total number of devices)
×100
Percentage= (33 / 128) × 10 0= 25.78
Percentage of the devices exceeding 700 Mhz is:25.78%
FREQUENCY DISTRIBUTION
634: 1 670: 2 698: 2
637: 1 672: 2 700: 1
642: 1 675: 3 701: 2
644: 1 676: 1 702: 2
648: 1 677: 1 703: 1
649: 2 678: 1 704: 3
652: 2 679: 2 705: 2
680: 3 706: 1
681: 4 710: 2
682: 1 715: 1
683: 4 717: 1
684: 1 718: 2
719: 1
653: 1
655: 1
656: 2
658: 1
659: 2
660: 5
661: 1
662: 1
663: 2
664: 1
665: 2
667: 1
668: 3
669: 2
PYTHON CODE FOR STEM AND LEAF
from collections import defaultdict
# The given data
data =
[680,669,719,699,670,710,722,663,658,634,720,690,677,669,700,718,690,681,702,696,692,690,694,660,
649,675,701,721,683,735,688,763,672,698,659,704,681,679,691,683,705,746,706,649,668,672,690,724,6
52,720,660,695,701,724,668,698,668,660,680,739,717,727,653,637,660,693,679,682,724,642,704,695,70
4,652,664,702,661,720,695,670,656,718,660,648,683,723,710,680,684,705,681,748,697,703,660,722,662
,644,683,695,678,674,656,667,683,691,680,685,681,715,665,676,665,675,655,659,720,675,697,663]
# Function to generate the stem-and-leaf plot
def stem_and_leaf_plot(data):
stem_leaf = defaultdict(list)
# Separate each number into stem and leaf
for num in sorted(data):
stem = num // 10 # The stem is everything but the last digit
leaf = num % 10 # The leaf is the last digit
stem_leaf[stem].append(leaf)
# Display the stem-and-leaf plot
print("Stem | Leaf")
print("-------------")
for stem, leaves in sorted(stem_leaf.items()):
leaves_str = ' '.join(str(leaf) for leaf in sorted(leaves))
print(f"{stem} | {leaves_str}")
# Create the stem-and-leaf plot
stem_and_leaf_plot(data)
STEM AND LEAF
PROBLEM 3
The statistical data is in the attached .csv file
PYTHON CODE FOR THE DATA SET IN EXCEL IS
import pandas as pd
import matplotlib.pyplot as plt
# Load data from CSV
df = pd.read_csv(r'D:\UNI\LECTURES RELATED TEACHERS\SEMESTER 3\SIR NOOR BADSHAH(probability)\
ASSIGNMENT 2\Real_Estate.csv')
#r'C:\Users\HP\Documents\Real_Estate.csv'
# Plot each column
for column in df.columns:
# Line plot
plt.figure(figsize=(10, 6))
plt.plot(df[column])
plt.title(f'Line Plot of {column}')
plt.xlabel('Index')
plt.ylabel('Value')
plt.show()
# Bar plot
plt.figure(figsize=(10, 6))
plt.bar(df.index, df[column])
plt.title(f'Bar Plot of {column}')
plt.xlabel('Index')
plt.ylabel('Value')
plt.show()
# Histogram
plt.figure(figsize=(10, 6))
plt.hist(df[column], bins=10)
plt.title(f'Histogram of {column}')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()
# Reload data with first column as index
data = pd.read_csv('Real_Estate.csv', index_col=0)
# Display data types
print("Data Types:\n", data.dtypes)
# Find non-numeric columns
non_numeric_columns = data.select_dtypes(exclude=['number']).columns
print("\nNon-Numeric Columns:\n", non_numeric_columns)
# Convert non-numeric columns to numeric if possible
for column in non_numeric_columns:
if data[column].dtype == 'object':
try:
data[column] = pd.to_numeric(data[column], errors='coerce') # Convert invalid values
to NaN
except ValueError:
print(f"Cannot convert {column} to numeric.")
elif data[column].dtype == 'datetime64[ns]':
data[column] = pd.to_datetime(data[column])
data[column] = data[column].apply(lambda x: x.timestamp())
# Display statistics for numeric columns
print("\nNumeric Columns Statistics:")
numeric_data = data.select_dtypes(include=['int64', 'float64'])
for column in numeric_data.columns:
print(f"\nColumn: {column}")
stats = data[column].describe(percentiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
print(stats.to_string()) # print statistics without index name
print(f"Q1 (25%): {data[column].quantile(0.25)}")
print(f"Q2 (50%): {data[column].quantile(0.5)}")
print(f"Q3 (75%): {data[column].quantile(0.75)}")
print(f"Interquartile Range (IQR): {data[column].quantile(0.75) -
data[column].quantile(0.25)}")
print(f"Skewness: {data[column].skew()}")
print(f"Kurtosis: {data[column].kurtosis()}")
THE GRAPHS FOR THE DATA SET IS