--- title: "Linear Regression Code" output: html_document --- # Load and Format data Here, we read and format data. ```{r} library(readr) avocado_df<-read_csv('avocado_df.csv') avocado_df$AveragePrice <- as.numeric(avocado_df$AveragePrice) avocado_df$TotalVolume <- as.numeric(avocado_df$TotalVolume) avocado_df$Month <- as.factor(avocado_df$Month) avocado_df$Type <- as.factor(avocado_df$Type) avocado_df$Region <- as.factor(avocado_df$Region) avocado_df$Year <- as.factor(avocado_df$Year) ``` # Summary of Data ```{r} head(avocado_df) ``` ```{r} summary(avocado_df) ``` # Price vs. Demand ```{r} plot(avocado_df$TotalVolume,avocado_df$AveragePrice, xlab = 'Total Volume', ylab = 'Average Price') ``` # Linear Modeling in R ## Constant Term ### Model ```{r} model_constant <- lm(AveragePrice ~ 1, data = avocado_df) ``` ### Model Summary ```{r} summary(model_constant) ``` ```{r} mean(avocado_df$AveragePrice) ``` ## TotalVolume + Constant Term ### Model ```{r} model_volume <-lm(AveragePrice ~ 1 + TotalVolume, data = avocado_df) ``` ### Model Summary ```{r} summary(model_volume) ``` ### Plot of model We can plot the model using the fitted model points. ```{r} fitted_values <- model_volume$fitted.values print(fitted_values) ``` ```{r} plot(avocado_df$TotalVolume,avocado_df$AveragePrice, xlab= 'Total Volume', ylab = 'Average Price') lines(avocado_df$TotalVolume,fitted_values,col = 'blue') ``` We can plot the model using the coefficients of the model. ```{r} coefs <- coef(model_volume) print(coefs) ``` ```{r} plot(avocado_df$TotalVolume,avocado_df$AveragePrice, xlab= 'Total Volume', ylab = 'Average Price') abline(coefs[1],coefs[2], col='blue') ``` ### Model Predictions ```{r} unknown_df <- data.frame(TotalVolume=c(15.0,5.0)) predict(model_volume,unknown_df) ``` ### Model Comparison ```{r} anova(model_constant, model_volume) ``` ## Model with Price and Type ### Model ```{r} model_volume_type <- lm(AveragePrice ~ 1 + TotalVolume + Type, data=avocado_df) ``` ### Model Summary ```{r} summary(model_volume_type) ``` ### Plot Model ```{r} coefs <- coef(model_volume_type) print(coefs) ``` ```{r} #create scatter plots plot(avocado_df$TotalVolume[avocado_df$Type=='organic'],avocado_df$AveragePrice[avocado_df$Type=='organic'], xlab= 'Total Volume', ylab = 'Average Price', col = 'black',xlim=c(4,19)) points(avocado_df$TotalVolume[avocado_df$Type=='conventional'],avocado_df$AveragePrice[avocado_df$Type=='conventional'], xlab= 'Total Volume', ylab = 'Average Price', col = 'grey') #create lines abline(coefs[1]+coefs[3],coefs[2], col='red') abline(coefs[1],coefs[2], col='blue') legend(x=15,y=3,legend=c('Organic','Conventional','Predicted Organic','Predicted Conventional'), col=c("black", "grey",'red','blue'),pch=c(1,1,NA,NA),lty=c(NA,NA,1,1), cex = 0.7) ``` ### Model Predictions ```{r} unknown_df <- data.frame(TotalVolume=c(15.0,5.0), Type = c("conventional","organic")) predict(model_volume_type,unknown_df) ``` ### Model Comparison ```{r} anova(model_volume,model_volume_type) ``` ## Model with Price and Type Interaction ### Model ```{r} model_volume_type_interaction <- lm(AveragePrice ~ 1 + TotalVolume + Type + TotalVolume:Type, data=avocado_df) ``` ### Model Summary ```{r} summary(model_volume_type_interaction) ``` ### Model Alternative ```{r} model_volume_type_interaction <- lm(AveragePrice ~ 1 + TotalVolume*Type, data=avocado_df) ``` ### Model Summary ```{r} summary(model_volume_type_interaction) ``` ### Plot of Model ```{r} coefs <- coef(model_volume_type_interaction) print(coefs) ``` ```{r} plot(avocado_df$TotalVolume[avocado_df$Type=='organic'],avocado_df$AveragePrice[avocado_df$Type=='organic'], xlab= 'Total Volume', ylab = 'Average Price', col = 'black',xlim=c(4,19)) points(avocado_df$TotalVolume[avocado_df$Type=='conventional'],avocado_df$AveragePrice[avocado_df$Type=='conventional'], xlab= 'Total Volume', ylab = 'Average Price', col = 'grey') legend(x=15,y=3,legend=c('Organic','Conventional'),col=c("black", "grey"),pch=1) abline(coefs[1],coefs[2], col='blue') abline(coefs[1]+coefs[3],coefs[2]+coefs[4], col='red') ``` ### Model Predictions ```{r} unknown_df <- data.frame(TotalVolume=c(15.0,5.0), Type = c("conventional","organic")) predict(model_volume_type_interaction,unknown_df) ``` ### Model Comparison ```{r} anova(model_constant, model_volume_type_interaction) ``` ```{r} anova(model_volume_type, model_volume_type_interaction) ``` ## Model with Total Volume and Year ```{r} model_volume_year <- lm(AveragePrice ~ 1 + TotalVolume + Year, data = avocado_df) summary(model_volume_year) ```