Course: 2206
Order Statistics and Non-Parametric Statistics
Practical Problem: 02
Order Statistics
Problem 1: Uniform Distribution
• Problem:
A company measures delivery time in minutes and
models it as a Uniform(20, 40) distribution. Simulate
100 samples of delivery times.
1. Find the minimum and maximum delivery time (i.e.,
1st and 50th order statistics).
2. Estimate the 25th percentile (i.e., 13th order
statistic).
3. Compare the sample 25th percentile with the
theoretical quantile.
R-Code
• # Set seed for reproducibility
• set.seed(123)
• # Generate 50 samples from Uniform(20, 40)
• sample_uniform <- runif(50, min = 20, max = 40)
• # Sort the sample to get order statistics
• sorted_uniform <- sort(sample_uniform)
• # 1. Minimum and Maximum (1st and 50th order statistics)
• min_time <- sorted_uniform[1]
• max_time <- sorted_uniform[50]
• # 2. 25th percentile (13th order statistic for n=50)
• percentile_25_sample <- sorted_uniform[13]
Cont…
• # 3. Theoretical 25th percentile from Uniform(20, 40)
• percentile_25_theoretical <- qunif(0.25, min = 20, max = 40)
• # Print results
• cat("Uniform(20,40) Sample - Order Statistics\n")
• cat("Minimum delivery time (1st OS):", min_time, "\n")
• cat("Maximum delivery time (50th OS):", max_time, "\n")
• cat("Sample 25th percentile (13th OS):", percentile_25_sample, "\n")
• cat("Theoretical 25th percentile:", percentile_25_theoretical, "\n")
CDF, PDf, Mean and Variance
Mean Variance
R-Code
• # Set parameters
• a <- 20
• b <- 40
• n <- 100
• x_vals <- seq(a, b, length.out = 1000)
• # Functions for order statistics of Uniform(a, b)
• # PDF of k-th order statistic: f_{(k)}(x)
• pdf_order_stat <- function(x, k, n, a, b) {
• coef <- factorial(n) / (factorial(k - 1) * factorial(n - k))
• coef * ((x - a)^(k - 1)) * ((b - x)^(n - k)) / (b - a)^(n)}
• # CDF of k-th order statistic (numerical integration)
• cdf_order_stat <- function(x, k, n, a, b) {
• integrate(function(t) pdf_order_stat(t, k, n, a, b), lower = a, upper = x)
Cont…
• # Expected value of k-th order stat
• expected_order_stat <- function(k, n, a, b) {
• a + (k / (n + 1)) * (b - a)}
• # Variance of k-th order stat
• variance_order_stat <- function(k, n, a, b) {
• ((k * (n - k + 1)) / ((n + 1)^2 * (n + 2))) * (b - a)^2}
• # Mean and variance for min, max, median
• k_min <- 1
• k_max <- n
• k_med <- round(n / 2)
• mean_min <- expected_order_stat(k_min, n, a, b)
• mean_max <- expected_order_stat(k_max, n, a, b)
• mean_med <- expected_order_stat(k_med, n, a, b)
• var_min <- variance_order_stat(k_min, n, a, b)
• var_max <- variance_order_stat(k_max, n, a, b)
• var_med <- variance_order_stat(k_med, n, a, b)
Cont…
# Range mean and variance (approximated)
mean_range <- mean_max - mean_min
var_range <- var_max + var_min # Assuming independence (approximate)
# Print results
cat("Expected Min (X_(1)):", mean_min, "\n")
cat("Variance Min:", var_min, "\n\n")
cat("Expected Max (X_(n)):", mean_max, "\n")
cat("Variance Max:", var_max, "\n\n")
cat("Expected Median (X_(50)):", mean_med, "\n")
cat("Variance Median:", var_med, "\n\n")
cat("Expected Range:", mean_range, "\n")
cat("Variance Range (approx.):", var_range, "\n")
Cont…
• # Optional: plot PDFs
plot(x_vals, sapply(x_vals, function(x) pdf_order_stat(x, k_min,
n, a, b)),
type = "l", col = "blue", lwd = 2, ylab = "Density", xlab = "x",
main = "PDFs of Order Statistics")
• lines(x_vals, sapply(x_vals, function(x) pdf_order_stat(x,
k_max, n, a, b)), col = "red", lwd = 2)
• lines(x_vals, sapply(x_vals, function(x) pdf_order_stat(x,
k_med, n, a, b)), col = "darkgreen", lwd = 2)
• legend("topright", legend = c("Min", "Max", "Median"),
col = c("blue", "red", "darkgreen"), lty = 1, lwd = 2)
Problem 2: Exponential Distribution
• Problem:
• Suppose the waiting time (in minutes) at a clinic
follows an Exponential(λ = 0.2) distribution (i.e.,
mean = 5 minutes). Simulate 50 patient wait times.
1. Find the median waiting time (25th order statistic).
2. Construct a 95% non-parametric confidence interval
for the median using order statistics.
3. Compare the sample median with the theoretical
median.
R-Code
• # Set seed
• set.seed(456)
• # Generate 50 samples from Exponential(lambda = 0.2)
• sample_exp <- rexp(50, rate = 0.2)
• # Sort the sample to get order statistics
• sorted_exp <- sort(sample_exp)
• # 1. Median (25th order statistic for n=50)
• median_sample <- sorted_exp[25]
• # 2. 95% Confidence interval for median (non-parametric CI using order statistics)
• # For n = 50, 95% CI for median is roughly between 20th and 31st order statistics
• lower_ci <- sorted_exp[20]
• upper_ci <- sorted_exp[31]
Cont…
• # 3. Theoretical median of Exponential(0.2)
• median_theoretical <- qexp(0.5, rate = 0.2)
• # Print results
• cat("Exponential(0.2) Sample - Order Statistics\n")
• cat("Sample Median (25th OS):", median_sample, "\
n")
• cat("95% Non-parametric CI for Median: [", lower_ci,
",", upper_ci, "]\n")
• cat("Theoretical Median:", median_theoretical, "\n")
Example_01
• set.seed(123)
• # Generate data
• data_unif <- runif(50, min = 20, max = 40)
• sorted_unif <- sort(data_unif)
• # Define desired percentiles (10%, 25%, 50%, 75%, 90%)
• probs <- c(0.1, 0.25, 0.5, 0.75, 0.9)
• # Theoretical quantiles
• theoretical_q_unif <- qunif(probs, min = 20, max = 40)
• # Empirical quantiles using type = 1 (inverse of empirical distribution function)
• empirical_q_unif <- quantile(data_unif, probs = probs, type = 1)
• # Print comparison
• comparison_unif <- data.frame(
• Percentile = probs * 100,
• Empirical = round(empirical_q_unif, 2),
• Theoretical = round(theoretical_q_unif, 2),
• Difference = round(empirical_q_unif - theoretical_q_unif, 2)
• )
• print(comparison_unif)
Example 2: Exponential(λ = 0.2)
• set.seed(456)
• # Generate data
• data_exp <- rexp(50, rate = 0.2)
• sorted_exp <- sort(data_exp)
• # Define desired percentiles
• probs <- c(0.1, 0.25, 0.5, 0.75, 0.9)
• # Theoretical quantiles
• theoretical_q_exp <- qexp(probs, rate = 0.2)
• # Empirical quantiles (again using type = 1)
• empirical_q_exp <- quantile(data_exp, probs = probs, type = 1)
• # Print comparison
• comparison_exp <- data.frame(
• Percentile = probs * 100,
• Empirical = round(empirical_q_exp, 2),
• Theoretical = round(theoretical_q_exp, 2),
• Difference = round(empirical_q_exp - theoretical_q_exp, 2)
• )
• print(comparison_exp)
• # Uniform distribution visual check
• qqplot(theoretical_q_unif, empirical_q_unif,
• main = "Empirical vs Theoretical Quantiles
(Uniform)",
• xlab = "Theoretical Quantiles", ylab =
"Empirical Quantiles",
• pch = 19, col = "blue")
• abline(0, 1, col = "red", lwd = 2)