---
title: "Linear Regression Code"
output: html_document
---
# Load and Format data
Here, we read and format data.
```{r}
library(readr)
avocado_df<-read_csv('avocado_df.csv')
avocado_df$AveragePrice <- as.numeric(avocado_df$AveragePrice)
avocado_df$TotalVolume <- as.numeric(avocado_df$TotalVolume)
avocado_df$Month <- as.factor(avocado_df$Month)
avocado_df$Type <- as.factor(avocado_df$Type)
avocado_df$Region <- as.factor(avocado_df$Region)
avocado_df$Year <- as.factor(avocado_df$Year)
```
# Summary of Data
```{r}
head(avocado_df)
```
```{r}
summary(avocado_df)
```
# Price vs. Demand
```{r}
plot(avocado_df$TotalVolume,avocado_df$AveragePrice, xlab = 'Total Volume', ylab = 'Average Price')
```
# Linear Modeling in R
## Constant Term
### Model
```{r}
model_constant <- lm(AveragePrice ~ 1, data = avocado_df)
```
### Model Summary
```{r}
summary(model_constant)
```
```{r}
mean(avocado_df$AveragePrice)
```
## TotalVolume + Constant Term
### Model
```{r}
model_volume <-lm(AveragePrice ~ 1 + TotalVolume, data = avocado_df)
```
### Model Summary
```{r}
summary(model_volume)
```
### Plot of model
We can plot the model using the fitted model points.
```{r}
fitted_values <- model_volume$fitted.values
print(fitted_values)
```
```{r}
plot(avocado_df$TotalVolume,avocado_df$AveragePrice, xlab= 'Total Volume', ylab = 'Average Price')
lines(avocado_df$TotalVolume,fitted_values,col = 'blue')
```
We can plot the model using the coefficients of the model.
```{r}
coefs <- coef(model_volume)
print(coefs)
```
```{r}
plot(avocado_df$TotalVolume,avocado_df$AveragePrice, xlab= 'Total Volume', ylab = 'Average Price')
abline(coefs[1],coefs[2], col='blue')
```
### Model Predictions
```{r}
unknown_df <- data.frame(TotalVolume=c(15.0,5.0))
predict(model_volume,unknown_df)
```
### Model Comparison
```{r}
anova(model_constant, model_volume)
```
## Model with Price and Type
### Model
```{r}
model_volume_type <- lm(AveragePrice ~ 1 + TotalVolume + Type, data=avocado_df)
```
### Model Summary
```{r}
summary(model_volume_type)
```
### Plot Model
```{r}
coefs <- coef(model_volume_type)
print(coefs)
```
```{r}
#create scatter plots
plot(avocado_df$TotalVolume[avocado_df$Type=='organic'],avocado_df$AveragePrice[avocado_df$Type=='organic'], xlab= 'Total Volume', ylab = 'Average Price', col = 'black',xlim=c(4,19))
points(avocado_df$TotalVolume[avocado_df$Type=='conventional'],avocado_df$AveragePrice[avocado_df$Type=='conventional'], xlab= 'Total Volume', ylab = 'Average Price', col = 'grey')
#create lines
abline(coefs[1]+coefs[3],coefs[2], col='red')
abline(coefs[1],coefs[2], col='blue')
legend(x=15,y=3,legend=c('Organic','Conventional','Predicted Organic','Predicted Conventional'),
col=c("black", "grey",'red','blue'),pch=c(1,1,NA,NA),lty=c(NA,NA,1,1), cex = 0.7)
```
### Model Predictions
```{r}
unknown_df <- data.frame(TotalVolume=c(15.0,5.0), Type = c("conventional","organic"))
predict(model_volume_type,unknown_df)
```
### Model Comparison
```{r}
anova(model_volume,model_volume_type)
```
## Model with Price and Type Interaction
### Model
```{r}
model_volume_type_interaction <- lm(AveragePrice ~ 1 + TotalVolume + Type + TotalVolume:Type, data=avocado_df)
```
### Model Summary
```{r}
summary(model_volume_type_interaction)
```
### Model Alternative
```{r}
model_volume_type_interaction <- lm(AveragePrice ~ 1 + TotalVolume*Type, data=avocado_df)
```
### Model Summary
```{r}
summary(model_volume_type_interaction)
```
### Plot of Model
```{r}
coefs <- coef(model_volume_type_interaction)
print(coefs)
```
```{r}
plot(avocado_df$TotalVolume[avocado_df$Type=='organic'],avocado_df$AveragePrice[avocado_df$Type=='organic'], xlab= 'Total Volume', ylab = 'Average Price', col = 'black',xlim=c(4,19))
points(avocado_df$TotalVolume[avocado_df$Type=='conventional'],avocado_df$AveragePrice[avocado_df$Type=='conventional'], xlab= 'Total Volume', ylab = 'Average Price', col = 'grey')
legend(x=15,y=3,legend=c('Organic','Conventional'),col=c("black", "grey"),pch=1)
abline(coefs[1],coefs[2], col='blue')
abline(coefs[1]+coefs[3],coefs[2]+coefs[4], col='red')
```
### Model Predictions
```{r}
unknown_df <- data.frame(TotalVolume=c(15.0,5.0), Type = c("conventional","organic"))
predict(model_volume_type_interaction,unknown_df)
```
### Model Comparison
```{r}
anova(model_constant, model_volume_type_interaction)
```
```{r}
anova(model_volume_type, model_volume_type_interaction)
```
## Model with Total Volume and Year
```{r}
model_volume_year <- lm(AveragePrice ~ 1 + TotalVolume + Year, data = avocado_df)
summary(model_volume_year)
```