Lab 5A: Statistical Modeling (Easy)
- Load the Iris data set.
data(iris)
- Peek at the data.
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
- Look at unique species.
unique(iris$Species)
## [1] setosa versicolor virginica
## Levels: setosa versicolor virginica
Create a Gaussian Distribution Model
- Create a plot of sepal width
plot(density(iris$Sepal.Width))
- Get the mean
irisMean <- mean(iris$Sepal.Width)
- Print the mean
print(irisMean)
## [1] 3.057333
- Get the standard deviation
irisStdDev <- sd(iris$Sepal.Width)
- Print the standard deviation
print(irisStdDev)
## [1] 0.4358663
- Create points along x-axis of the distribution
distributionX <- seq(
from = min(iris$Sepal.Width),
to = max(iris$Sepal.Width),
length = 100)
- Compute the y-axis height of each point
distributionY <- dnorm(
x = distributionX,
mean = irisMean,
sd = irisStdDev)
- Add the distribution to the plot
plot(density(iris$Sepal.Width))
lines(
x = distributionX,
y = distributionY,
col = "red")
- Generate/predict new values from model
values <- rnorm(
n = 10000,
mean = mean(iris$Sepal.Width),
sd = sd(iris$Sepal.Width))
- Add plot of distribution of generated values
plot(density(iris$Sepal.Width))
plot(density(iris$Sepal.Width))
lines(
x = distributionX,
y = distributionY,
col = "red")
lines(
x = density(values),
col = "blue")
- Get mean of generated values
mean(values)
## [1] 3.059143
- Get standard deviation of generated values
sd(values)
## [1] 0.4397619
Create a Simple Linear Regression Model
- Create a scatterplot matrix.
plot(iris[1:4])
- Create a scatterplot of petal length vs width.
plot(
x = iris$Petal.Length,
y = iris$Petal.Width)
- Create a linear regression model.
model <- lm(
formula = Petal.Width ~ Petal.Length,
data = iris)
- Draw linear regression model on the scatterplot.
plot(
x = iris$Petal.Length,
y = iris$Petal.Width)
lines(
x = iris$Petal.Length,
y = model$fitted,
col = "red",
lwd = 3)
- Get the correlation coefficient.
cor(
x = iris$Petal.Length,
y = iris$Petal.Width)
## [1] 0.9628654
- Summarize the model.
summary(model)
##
## Call:
## lm(formula = Petal.Width ~ Petal.Length, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56515 -0.12358 -0.01898 0.13288 0.64272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.363076 0.039762 -9.131 4.7e-16 ***
## Petal.Length 0.415755 0.009582 43.387 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2065 on 148 degrees of freedom
## Multiple R-squared: 0.9271, Adjusted R-squared: 0.9266
## F-statistic: 1882 on 1 and 148 DF, p-value: < 2.2e-16
- Create new petal lengths to predict.
unknownLengths <- data.frame(
Petal.Length = c(2, 5, 7))
- Predict new unknown values from the model.
predict(
object = model,
newdata = unknownLengths)
## 1 2 3
## 0.4684353 1.7157016 2.5472124