--- title: "Linear Regression -- Anscombe Wrap-Up" author: "Evan L. Ray" date: "September 29, 2017" output: ioslides_presentation --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE) require(ggplot2) require(dplyr) require(tidyr) require(readr) ``` ## Wrap Up for Anscombe Lab ```{r, message=FALSE, echo = FALSE} anscombe <- read_csv("https://mhc-stat140-2017.github.io/data/base_r/anscombe.csv") ```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2.5} ggplot() + geom_point(aes(x = x1, y = y1), data = anscombe) + geom_smooth(aes(x = x1, y = y1), data = anscombe, method = "lm", se = FALSE) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2.5} ggplot() + geom_point(aes(x = x2, y = y2), data = anscombe) + geom_smooth(aes(x = x2, y = y2), data = anscombe, method = "lm", se = FALSE) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2.5} ggplot() + geom_point(aes(x = x3, y = y3), data = anscombe) + geom_smooth(aes(x = x3, y = y3), data = anscombe, method = "lm", se = FALSE) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2.5} ggplot() + geom_point(aes(x = x4, y = y4), data = anscombe) + geom_smooth(aes(x = x4, y = y4), data = anscombe, method = "lm", se = FALSE) ```
## High Leverage Observations * Observation $i$ has **high leverage** if $x_i$ is far from $\bar{x}$ -- pulls the line close to $(x_i, y_i)$ ```{r} anscombe$x5 <- anscombe$x6 <- anscombe$x4 anscombe$y5 <- anscombe$y6 <- anscombe$y4 anscombe$y5[8] <- 7 anscombe$y6[8] <- 0 ```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = x4, y = y4), data = anscombe) + geom_smooth(aes(x = x4, y = y4), data = anscombe, method = "lm", se = FALSE) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = x5, y = y5), data = anscombe) + geom_smooth(aes(x = x5, y = y5), data = anscombe, method = "lm", se = FALSE) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = x6, y = y6), data = anscombe) + geom_smooth(aes(x = x6, y = y6), data = anscombe, method = "lm", se = FALSE) ```
## Plot of Residuals vs. Explanatory ```{r, message=FALSE, echo = FALSE} fit1 <- lm(y1 ~ x1, data = anscombe) fit2 <- lm(y2 ~ x2, data = anscombe) fit3 <- lm(y3 ~ x3, data = anscombe) fit4 <- lm(y4 ~ x4, data = anscombe) anscombe <- mutate(anscombe, residual1 = residuals(fit1), residual2 = residuals(fit2), residual3 = residuals(fit3), residual4 = residuals(fit4) ) ```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = x1, y = residual1), data = anscombe) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = x2, y = residual2), data = anscombe) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = x3, y = residual3), data = anscombe) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = x4, y = residual4), data = anscombe) ```
* Look for even scatter aroud 0, no patterns ## Plot of Residuals vs. Predicted/Fitted ```{r, message=FALSE, echo = FALSE} fit1 <- lm(y1 ~ x1, data = anscombe) fit2 <- lm(y2 ~ x2, data = anscombe) fit3 <- lm(y3 ~ x3, data = anscombe) fit4 <- lm(y4 ~ x4, data = anscombe) anscombe <- mutate(anscombe, predicted1 = predict(fit1), predicted2 = predict(fit2), predicted3 = predict(fit3), predicted4 = predict(fit4), residual1 = residuals(fit1), residual2 = residuals(fit2), residual3 = residuals(fit3), residual4 = residuals(fit4) ) ```
```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = predicted1, y = residual1), data = anscombe) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = predicted2, y = residual2), data = anscombe) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = predicted3, y = residual3), data = anscombe) ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE, fig.width=3.5, fig.height=2} ggplot() + geom_point(aes(x = predicted4, y = residual4), data = anscombe) ```
* Look for even scatter aroud 0, no patterns