# This exercise uses the data set C0206DT from the book's CD.
d=read.table(file=file.choose(),header=TRUE)
attach(d)
d
#Let's first get the regression coefficients "by-hand" by using formula 2.4.3
rxy=cor(TIME,PUBS)
#we have made a vector called rxy that is equal to the correlation between time and publications
rxy
#thus, our correlation between these two variables is equal to .656
sdy=sd(PUBS)
sdx=sd(TIME)
#we have created two new vectors, sdy and sdx, which are the standard deviations of Publications and Time respectively
Byx=rxy*(sdy/sdx)
#now we have created a vector that is our raw score regression coefficient
Byx
#thus we can expect a 1.98 change in Y for every 1 unit change in X
#we can compute the intercept by using equation 2.4.4
Bo= (mean(PUBS)-(Byx*mean(TIME)))
Bo
#this is our intercept or the point at which the regression line crosses the Y axis
#notice that one can use created vectors (e.g. rxy=cor(TIME,PUBS) or simply commands cor(TIME,PUBS) interchangeably
#Now, let's use some of R's functions for computing regression coefficients
#the linear model function lm() is used for most regression equations we will cover in this book
#let's specify a vector called reg that is equal to our regression equation
reg=lm(PUBS~TIME)
#after we specify this vector we can view model parameters by using the summary() function
summary(reg)
#now we can see that we get the same parameter estimates, under Estimate for both the intercept and our independent variable (TIME)
#summary also gives us the standard error, t value, and p value
#at the bottom of the output is the Multiple R-Squared, Adjusted R-Squared and the F-Statistic for the model
#we can make a plot to view our regression line as well
plot(TIME,PUBS)
#this gives us a basic scatter plot with PUBS on the y axis, and TIME on the x axis
#to put our regression line in there, let's use the abline() command
abline(reg)
#we simply use the vector of our regression model to estimate the regression line