---
title: "Linear Regression -- Anscombe Wrap-Up"
author: "Evan L. Ray"
date: "September 29, 2017"
output: ioslides_presentation
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
require(ggplot2)
require(dplyr)
require(tidyr)
require(readr)
```
## Wrap Up for Anscombe Lab
```{r, message=FALSE, echo = FALSE}
anscombe <- read_csv("https://mhc-stat140-2017.github.io/data/base_r/anscombe.csv")
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2.5}
ggplot() +
geom_point(aes(x = x1, y = y1), data = anscombe) +
geom_smooth(aes(x = x1, y = y1), data = anscombe, method = "lm", se = FALSE)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2.5}
ggplot() +
geom_point(aes(x = x2, y = y2), data = anscombe) +
geom_smooth(aes(x = x2, y = y2), data = anscombe, method = "lm", se = FALSE)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2.5}
ggplot() +
geom_point(aes(x = x3, y = y3), data = anscombe) +
geom_smooth(aes(x = x3, y = y3), data = anscombe, method = "lm", se = FALSE)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2.5}
ggplot() +
geom_point(aes(x = x4, y = y4), data = anscombe) +
geom_smooth(aes(x = x4, y = y4), data = anscombe, method = "lm", se = FALSE)
```
## High Leverage Observations
* Observation $i$ has **high leverage** if $x_i$ is far from $\bar{x}$ -- pulls the line close to $(x_i, y_i)$
```{r}
anscombe$x5 <- anscombe$x6 <- anscombe$x4
anscombe$y5 <- anscombe$y6 <- anscombe$y4
anscombe$y5[8] <- 7
anscombe$y6[8] <- 0
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = x4, y = y4), data = anscombe) +
geom_smooth(aes(x = x4, y = y4), data = anscombe, method = "lm", se = FALSE)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = x5, y = y5), data = anscombe) +
geom_smooth(aes(x = x5, y = y5), data = anscombe, method = "lm", se = FALSE)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = x6, y = y6), data = anscombe) +
geom_smooth(aes(x = x6, y = y6), data = anscombe, method = "lm", se = FALSE)
```
## Plot of Residuals vs. Explanatory
```{r, message=FALSE, echo = FALSE}
fit1 <- lm(y1 ~ x1, data = anscombe)
fit2 <- lm(y2 ~ x2, data = anscombe)
fit3 <- lm(y3 ~ x3, data = anscombe)
fit4 <- lm(y4 ~ x4, data = anscombe)
anscombe <- mutate(anscombe,
residual1 = residuals(fit1),
residual2 = residuals(fit2),
residual3 = residuals(fit3),
residual4 = residuals(fit4)
)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = x1, y = residual1), data = anscombe)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = x2, y = residual2), data = anscombe)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = x3, y = residual3), data = anscombe)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = x4, y = residual4), data = anscombe)
```
* Look for even scatter aroud 0, no patterns
## Plot of Residuals vs. Predicted/Fitted
```{r, message=FALSE, echo = FALSE}
fit1 <- lm(y1 ~ x1, data = anscombe)
fit2 <- lm(y2 ~ x2, data = anscombe)
fit3 <- lm(y3 ~ x3, data = anscombe)
fit4 <- lm(y4 ~ x4, data = anscombe)
anscombe <- mutate(anscombe,
predicted1 = predict(fit1),
predicted2 = predict(fit2),
predicted3 = predict(fit3),
predicted4 = predict(fit4),
residual1 = residuals(fit1),
residual2 = residuals(fit2),
residual3 = residuals(fit3),
residual4 = residuals(fit4)
)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = predicted1, y = residual1), data = anscombe)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = predicted2, y = residual2), data = anscombe)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = predicted3, y = residual3), data = anscombe)
```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2}
ggplot() +
geom_point(aes(x = predicted4, y = residual4), data = anscombe)
```
* Look for even scatter aroud 0, no patterns