import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv(r"C:\Users\TANUR\Downloads\ml_1 - ml_1.csv")
df.head()
Subject number Data-1 (LG) Data2 (HG)
0 1 2.63 8.88
1 2 1.87 8.99
2 3 2.46 3.95
3 4 1.82 4.36
4 5 2.14 7.00
#QUES 1 (A)-> Draw and compare histograms of these two groups of tumor data (Data1(LG) and Data2(HG)).
# Extract the tumor data
data1_lg = ['Data-1 (LG)']
data2_hg = ['Data2 (HG)']
# Histogram for Data-1 (LG)
df.hist(data1_lg, bins=10, color='blue', alpha=0.7, label='LG (Low Grade)')
#Histogram for Data-2 (HG)
df.hist(data2_hg, bins=10, color='red', alpha=0.7, label='HG (High Grade)')
array([[<Axes: title={'center': 'Data2 (HG)'}>]], dtype=object)
##Low Grade (LG) tumors (Data1) tend to have a smaller size distribution
##High Grade (HG) tumors (Data2) show a broader distribution, with sizes generally larger and more spread out compare
#QUES 1 (B)-> Compare box and whisker plots of these two groups of data.
df.boxplot(data1_lg, patch_artist=True, boxprops=dict(facecolor='blue', color='blue'), medianprops=dict(color='black'
df.boxplot(data2_hg, patch_artist=True, boxprops=dict(facecolor='red', color='red'), medianprops=dict(color='black'
<Axes: >
#The box plot for LG tumors shows a smaller range with fewer outliers, indicating lower variability in tumor sizes.
#The box plot for HG tumors, on the other hand, displays a wider range, highlighting greater variability in the sizes
#Ques-2 -> A)Take input from user (list of "ANY" 10 elements)
#B)Find the size of the list
#C) Sort the given list in ascending or descending form
#D) Calculate the following :-(i) Mean (ii) Median (iii) Mode (iv) Variance (v) 90 percentile value
import statistics as stats
user_input = input("Enter 10 elements separated by spaces: ")
user_list = [int(i) for i in user_input.split()]
# Check if the user has entered exactly 10 elements
if len(user_list) != 10:
print("Please enter exactly 10 elements!")
else:
# B. Find the size of the list
size_of_list = len(user_list)
print("Size of the list:", size_of_list)
Size of the list: 10
# C. Sort the list (ask user for ascending or descending)
sort_order = input("Enter 'asc' for ascending or 'desc' for descending sort: ")
if sort_order == 'asc':
sorted_list = sorted(user_list)
print("Sorted list in ascending order:", sorted_list)
elif sort_order == 'desc':
sorted_list = sorted(user_list, reverse=True)
print("Sorted list in descending order:", sorted_list)
else:
print("Invalid sort order input. Please enter 'asc' or 'desc'.")
Sorted list in ascending order: [2, 2, 15, 17, 18, 19, 45, 67, 97, 105]
# D. Calculate statistical metrics
# (i) Mean
mean_value = np.mean(user_list)
print("Mean:", mean_value)
Mean: 38.7
# (ii) Median
median_value = np.median(user_list)
print("Median:", median_value)
Median: 18.5
# (iii) Mode
try:
mode_value = stats.mode(user_list)
print("Mode:", mode_value)
except stats.StatisticsError:
print("No mode found, all values appear only once.")
Mode: 2.0
# (iv) Variance
variance_value = np.var(user_list)
print("Variance:", variance_value)
Variance: 1317.81
# (v) 90th percentile
percentile_90 = np.percentile(user_list, 90)
print("90th Percentile:", percentile_90)
90th Percentile: 97.8
Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js