# This file includes some R commands to input and transform some data, fit a linear model and describe the results in text and using plots # read in data using the c() function to create two vectors, x and y # note that we will often read in data from a csv or other text file x <- c(14.36, 14.48, 14.53, 14.52, 14.35, 14.31, 14.44, 14.23, 14.32, 14.57, 14.28, 14.36, 14.50, 14.52, 14.28, 14.13, 14.54, 14.60, 14.86, 14.28, 14.09, 14.20, 14.50, 14.02, 14.45) y <- c(13.84, 14.41, 14.22, 14.63, 13.95, 14.37, 14.41, 13.99, 13.89, 14.59, 14.32, 14.31, 14.43, 14.44, 14.14, 13.90, 14.37, 14.34, 14.78, 13.76, 13.85, 13.89, 14.22, 13.80, 14.67) plot(x,y) # draw a simple scatter plot # the same plot, using some formatting options in R. Type help(plot) or ?plot at the R prompt for more details of the plot() command, or search the R help files at http://www.rseek.org/ plot(y ~ x, main = "Moisture level data", xlab = "In process level", ylab = "Final level", pch = 16) # print out some descriptive statistics mean(x) mean(y) cor(x,y) # correlation of x,y sd(x) # standard deviations of x and y sd(y) cov(x,y) # covariance of x and y #center the x variable by subtracting its mean x.c <- scale(x, scale = FALSE) x.log <- log(x) #run the model using centered x variable model1 <- lm(y~x) model2 <- lm(y~x.log) #print important details from the model summary(model1) #or an anova results table anova(model1) coef(model1) # print only the model coefficients confint(model1, level = 0.95) # print confidence intervals for the coefficients plot(residuals(model1), fitted(model1)) # extract residuals and fitted values from the model and include in a plot # plot histogram of the residuals hist(residuals(model1), freq = FALSE, main = "Histogram of residuals", xlab = "Residuals", col = "pink") # and overlay a normal distribution sigma2hat <- sum((fitted(model1)-y)^2)/model1$df curve(dnorm(x, 0, sqrt(sigma2hat)), add = TRUE, col = "red") # a qqplot is another way of checking the residuals are distributed normally qqnorm(residuals(model1), pch = 16) qqline(residuals(model1), col = "red") par(mfrow = c(2,2)) # set the chart window to have 2 rows and 2 columns plot(model1) # a simple way to plot useful model diagnostics in one go