# load the data d = read.csv("galton_students_110.csv") head(d) plot(x = d$child_height_cm, y = d$mid_parent_height_cm) library(tinyplot) plt(d$mid_parent_height_cm, d$child_height_cm, grid = TRUE) plt_add(type = "lm") # 2. run the model and save as m m = lm( formula = child_height_cm ~ mid_parent_height_cm, data = d ) # where is this R2 thing? summary(m) # R2 = 0.26 # predict 160 and 180 cm tall parents predictions = data.frame(mid_parent_height_cm = c(160, 180)) predictions$pred_child = predict(m , newdata = predictions) # get residual standard error summary(m) # 4.5 # how to compute b1 from correlation? r = cor(d$mid_parent_height_cm, d$child_height_cm) # 0.51 sy = sd(d$child_height_cm) sx = sd(d$mid_parent_height_cm) # standard way: cov(x,y) / var(x) b1 = r * sy / sx # compare to coef(m)[2] coef(m)[2] # load STAR data star_df = read.csv("star_data.csv") star_df = star_df[complete.cases(star_df) , ] library(dplyr) # create 3 indicator variables TRUE/FALSE for each level of the `star` variable star_dummies = star_df %>% mutate( is_regular = (star == "regular"), is_regular_plus = (star == "regular+aide"), is_small = (star == "small") ) # quick check: all of those need to have an average # in (0,1) star_dummies %>% summarise(across(starts_with("is_"), mean)) # create sum of those 3 star_dummies = star_dummies %>% mutate( is_sum_of_all = is_regular + is_regular_plus + is_small ) star_dummies %>% summarise(across(starts_with("is_"), mean)) # regress math on is_regular_plus lm(math ~ is_regular_plus, data = star_dummies) # predicted score in regular plus class 555.4726 + (-0.5487 ) * 1 # regress math on is_regular_plus lm(math ~ is_regular + is_regular_plus + is_small, data = star_dummies) # wrong # small is the reference category lm(math ~ is_regular + is_regular_plus, data = star_dummies) # regular is the reference category lm(math ~ is_small + is_regular_plus, data = star_dummies) # full setup # regress math on star lm(math ~ star, star_dummies) # regressing number on a character variable! # R is smart to convert that into a as many dummies # as there are different levels in `star`