B R Code
Chapter 1: Introduction
# Este es un comentario en un bloque de códigos de R. Los comentarios
# comienzan con un símbolo de hashtag y R los ignora. El código
# debajo de ese símbolo será interpretado por R una vez que usted lo pegue o #que lo digite en la consola.
x <- c(4, 8, 15, 16, 23, 42)
mean(x) # El código que aparece después del hashtag (#) se ignora.
sd(x)
# Calcular la media aritmética de x
mx <- mean(x)
# Mostrar x y su media aritmética
x
mx
print(mx)
# Algunos ejemplos requieren datos y funciones de otros paquetes de R.
# Para instalar los paquetes que se requieren (solo se debe hacer una vez).
install.packages("devtools")
install.packages("knitr")
install.packages("ggplot2")
# Para instalar epmr desde github
devtools::install_github("talbano/epmr")
# Cargar los paquetes requeridos (se debe hacer cada vez que se reinicie R).
library("epmr")
library("ggplot2")
# Crear una variable por factores
classroom <- c("A", "B", "C", "A", "B", "C")
classroom <- factor(classroom)
classroom[c(1, 4)]
# Combinar variables como columnas en un marco de datos.
mydata <- data.frame(scores = x, classroom)
mydata[1:4, ]
# Cargar el conjunto de datos de PISA y mostrar algunos de sus subconjuntos
data(PISA09)
PISA09[c(1, 10000, 40000), c(1, 6, 7, 38)]
PISA09$age[1:10]
# Obtener estadísticas descriptivas para tres variables de PISA09.
dstudy(PISA09[, c("elab", "cstrat", "memor")])
# Revisar la clase de la variable país y crear una tabla de frecuencias.
class(PISA09$cnt)
table(PISA09$cnt)
# Convierta una tabla de frecuencias por país en porcentajes y luego redondee el resultado a 2 decimales.
cntpct <- table(PISA09$cnt) / nrow(PISA09) * 100
round(cntpct, 2)
# Crea un gráfico de barras para la cantidad de estudiantes por país.
ggplot(PISA09, aes(cnt)) + geom_bar(stat = "count")
# Los histogramas son más apropiados para variables o puntuaciones continuas.
qplot(PISA09$memor, binwidth = .5)
# Crea una serie de gráficos de caja para las puntuaciones en la variable memor por país.
ggplot(PISA09, aes(cnt, memor)) + geom_boxplot()
# Verifique el mínimo, el máximo y el rango para la variable age.
min(PISA09$age)
max(PISA09$age)
range(PISA09$age)
# Verifique las desviaciones estándar en un ítem para Hong Kong (HKG) y # Alemania (DEU)
sd(PISA09$st33q01[PISA09$cnt == "HKG"], na.rm = TRUE)
sd(PISA09$st33q01[PISA09$cnt == "DEU"], na.rm = TRUE)
# Obtener la covarianza y la correlación para variables diferentes.
# El argumento use especifica cómo proceder con los datos faltantes.
# Vea ?cor para más detalles.
cov(PISA09$age, PISA09$grade, use = "complete")
cor(PISA09[, c("elab", "cstrat", "memor")],
use = "complete")
# Diagrama de dispersión para dos variables de estrategias de aprendizaje
# geom_point() agrega los puntos al gráfico.
# position_jitter() los mueve un poco para dejar al descubierto las
# densidades de puntos que de otro modo se superpondrían.
ggplot(PISA09[PISA09$cnt == "USA", ], aes(elab, cstrat)) +
geom_point(position = position_jitter(w = 0.1, h = 0.1))
# Convierta la variable age en puntuaciones-z, luego haga un reescalamiento para obtener un nuevo promedio y una nueva desviación estándar.
# Usted debería verificar la media aritmética y la desviación estándar tanto para zage como para newage.
# También vea setmean() y setsd() del paquete epmr, y scale()
# de la base de R.
zage <- (PISA09$age - mean(PISA09$age)) / sd(PISA09$age)
newage <- (zage * 150) + 500
# Histogramas de la variable age y la versión reescalada de la variable age
ggplot(PISA09, aes(factor(round(zage, 2)))) + geom_bar()
ggplot(PISA09, aes(factor(round(newage, 2)))) + geom_bar()
Chapter 2: Measurement, Scales, and Scoring
# R setup for this chapter
# Required packages are assumed to be installed,
# see chapter 1
library("epmr")
library("ggplot2")
# Functions we'll use in this chapter
# data(), class(), factor(), c(), from chapter 1
# head() to print the first six rows or values in an
# object
# paste0() for pasting together text and using it to index
# a data set
# apply() for applying a function over rows or columns of
# a data set
# tapply() for applying a function over groups
# dstudy() from the epmr package for getting descriptives
# ggplot(), aes(), and geom_boxplot() for plotting
# round(), nrow(), and with() for examining data
# We'll use a data set included in the base R packages
# called state.x77
# Load state data
data(state)
# Print first 6 rows, all columns
head(state.x77)
# Comparing two classifications of nominal variables in R
# Nominal classroom labels as character
roomnumber <- c("1", "2", "3", "1", "2", "3")
class(roomnumber)
# Nominal classroom labels as factor
roomnumber <- factor(roomnumber)
class(roomnumber)
# Names of all reading items, to be used as an indexing
# object
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09",
"r452q03", "r452q04", "r452q06", "r452q07", "r458q01",
"r458q07", "r458q04")
# Paste an "s"" to the end of each name, for scored items
rsitems <- paste0(ritems, "s")
# apply() applies to a data set (the first argument)
# across either
# rows or columns (the second argument) the function named
# (in the third argument). See also rowSums(). Here, we
# treat missings as 0s, by excluding them from the sum.
PISA09$rtotal <- apply(PISA09[, rsitems], 1, sum,
na.rm = TRUE)
dstudy(PISA09$rtotal)
# Bar plot of reading totals, which are, ironically,
# converted to a factor before plotting, so ggplot treats
# them as a discrete
# scale rather than continuous. Continuous defaults to a
# histogram which isn't as useful.
ggplot(PISA09, aes(factor(rtotal))) + geom_bar()
# The apply() function again, used to iterate through
# reading items, and for each column (hence the 2), run a
# frequency table.
# Then, divide the result by the number of students, and
# round to 2 decimal places.
rtab <- apply(PISA09[, rsitems], 2, table)
round(rtab / nrow(PISA09), 2)
# Reverse code two variables
PISA09$st33q01r <- recode(PISA09$st33q01)
PISA09$st33q02r <- recode(PISA09$st33q02)
# Names of all attitude toward school items, to be used as
# an indexing object
# Note the added "r" in the names for the first two items,
# which were recoded
atsitems <- c("st33q01r", "st33q02r", "st33q03",
"st33q04")
# Calculate total scores again using apply(), and look at
# descriptives
PISA09$atotal <- apply(PISA09[, atsitems], 1, sum,
na.rm = TRUE)
dstudy(PISA09$atotal)
# tapply() is like apply() but instead of specifying rows
# or columns of a matrix, we provide an index variable.
# Here, median() will be applied over subsets of rtotal by
# grade. with() is used to subset only German students.
with(PISA09[PISA09$cnt == "DEU", ],
tapply(rtotal, grade, median, na.rm = TRUE))
# Most German students are in 9th grade. Medians aren't as
# useful for grades 7, 11, and 12.
table(PISA09$grade[PISA09$cnt == "DEU"])
# Get box plots of reading total scores
ggplot(PISA09[PISA09$cnt == "DEU" & PISA09$grade %in%
8:10, ], aes(x = factor(grade), y = rtotal)) +
geom_boxplot()
Chapter 5: Reliability
# R setup for this chapter
# Required packages are assumed to be installed,
# see chapter 1
library("epmr")
library("ggplot2")
# Functions we'll use in this chapter
# set.seed(), rnorm(), sample(), and runif() to simulate
# data
# rowSums() for getting totals by row
# rsim() and setrange() from epmr to simulate and modify
# scores
# rstudy(), alpha(), and sem() from epmr to get
# reliability and SEM
# geom_point(), geom_abline(), and geom_errorbar() for
# plotting
# diag() for getting the diagonal elements from a matrix
# astudy() from epmr to get interrater agreement
# gstudy() from epmr to run a generalizability study
# Simulate a constant true score, and randomly varying
# error scores from
# a normal population with mean 0 and SD 1
# set.seed() gives R a starting point for generating
# random numbers
# so we can get the same results on different computers
# You should check the mean and SD of E and X
# Creating a histogram of X might be interesting too
set.seed(160416)
myt <- 20
mye <- rnorm(1000, mean = 0, sd = 1)
myx <- myt + mye
# Calculate total reading scores, as in Chapter 2
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09",
"r452q03", "r452q04", "r452q06", "r452q07", "r458q01",
"r458q07", "r458q04")
rsitems <- paste0(ritems, "s")
xscores <- rowSums(PISA09[PISA09$cnt == "BEL", rsitems],
na.rm = TRUE)
# Simulate error scores based on known SEM of 1.4, which
# we'll calculate later, then create true scores
# True scores are truncated to fall bewteen 0 and 11 using
# setrange()
escores <- rnorm(length(xscores), 0, 1.4)
tscores <- setrange(xscores - escores, y = xscores)
# Combine in a data frame and create a scatterplot
scores <- data.frame(x1 = xscores, t = tscores,
e = escores)
ggplot(scores, aes(x1, t)) +
geom_point(position = position_jitter(w = .3)) +
geom_abline(col = "blue")
# Simulate scores for a new form of the reading test
# called y
# rho is the made up reliability, which is set to 0.80
# x is the original reading total scores
# Form y is slightly easier than x with mean 6 and SD 3
xysim <- rsim(rho = .8, x = scores$x1, meany = 6, sdy = 3)
scores$x2 <- round(setrange(xysim$y, scores$x1))
ggplot(scores, aes(x1, x2)) +
geom_point(position = position_jitter(w = .3, h = .3)) +
geom_abline(col = "blue")
# Split half correlation, assuming we only had scores on
# one test form
# With an odd number of reading items, one half has 5
# items and the other has 6
xsplit1 <- rowSums(PISA09[PISA09$cnt == "BEL",
rsitems[1:5]])
xsplit2 <- rowSums(PISA09[PISA09$cnt == "BEL",
rsitems[6:11]])
cor(xsplit1, xsplit2, use = "complete")
# sbr() in the epmr package uses the Spearman-Brown
# formula to estimate how reliability would change when
# test length changes by a factor k
# If test length were doubled, k would be 2
sbr(r = cor(xsplit1, xsplit2, use = "complete"), k = 2)
# epmr includes rstudy() which estimates alpha and a
# related form of reliability called omega, along with
# corresponding SEM
# You can also use epmr::alpha() to obtain coefficient
# alpha directly
rstudy(PISA09[, rsitems])
# Get alpha and SEM for students in Belgium
# ggplot2 also has a function called alpha, so we have to
# specify the package name
bela <- epmr::alpha(PISA09[PISA09$cnt == "BEL", rsitems])
# The sem function from epmr also overlaps with sem from
# another package so we're spelling it out here in long
# form
belsem <- epmr::sem(r = bela, sd = sd(scores$x1,
na.rm = T))
# Plot the 11 possible total scores against themselves
# Error bars are shown for 1 SEM, giving a 68% confidence
# interval and 2 SEM, giving the 95% confidence interval
# x is converted to factor to show discrete values on the
# x-axis
beldat <- data.frame(x = 1:11, sem = belsem)
ggplot(beldat, aes(factor(x), x)) +
geom_errorbar(aes(ymin = x - sem * 2,
ymax = x + sem * 2), col = "violet") +
geom_errorbar(aes(ymin = x - sem, ymax = x + sem),
col = "yellow") +
geom_point()
# Simulate random coin flips for two raters
# runif() generates random numbers from a uniform
# distribution
flip1 <- round(runif(30))
flip2 <- round(runif(30))
table(flip1, flip2)
# Simulate essay scores from two raters with a population
# correlation of 0.90, and slightly different mean scores,
# with score range 0 to 6
# Note the capital T is an abbreviation for TRUE
essays <- rsim(100, rho = .9, meanx = 4, meany = 3,
sdx = 1.5, sdy = 1.5, to.data.frame = T)
colnames(essays) <- c("r1", "r2")
essays <- round(setrange(essays, to = c(0, 6)))
# Use a cut off of greater than or equal to 3 to determine
# pass versus fail scores
# ifelse() takes a vector of TRUEs and FALSEs as its first
# argument, and returns here "Pass" for TRUE and "Fail"
# for FALSE
essays$f1 <- factor(ifelse(essays$r1 >= 3, "Pass",
"Fail"))
essays$f2 <- factor(ifelse(essays$r2 >= 3, "Pass",
"Fail"))
table(essays$f1, essays$f2)
# Pull the diagonal elements out of the crosstab with
# diag(), sum them, and divide by the number of people
sum(diag(table(essays$r1, essays$r2))) / nrow(essays)
# Randomly sample from the vector c("Pass", "Fail"),
# nrow(essays) times, with replacement
# Without replacement, we'd only have 2 values to sample
# from
monkey <- sample(c("Pass", "Fail"), nrow(essays),
replace = TRUE)
table(essays$f1, monkey)
# Use the astudy() function from epmr to measure agreement
astudy(essays[, 1:2])
# Certain changes in descriptive statistics, like adding
# constants won't impact correlations
cor(essays$r1, essays$r2)
dstudy(essays[, 1:2])
cor(essays$r1, essays$r2 + 1)
# Comparing scatter plots
ggplot(essays, aes(r1, r2)) +
geom_point(position = position_jitter(w = .1, h = .1)) +
geom_abline(col = "blue")
ggplot(essays, aes(r1, r2 + 1)) +
geom_point(position = position_jitter(w = .1, h = .1)) +
geom_abline(col = "blue")
# Run a g study on simulated essay scores
essays$r3 <- rsim(100, rho = .9, x = essays$r2,
meany = 3.5, sdy = 1.25)$y
essays$r3 <- round(setrange(essays$r3, to = c(0, 6)))
gstudy(essays[, c("r1", "r2", "r3")])
Chapter 6: Item Analysis
# R setup for this chapter
# Required packages are assumed to be installed,
# see chapter 1
library("epmr")
library("ggplot2")
# Functions we'll use in this chapter
# str() for checking the structure of an object
# recode() for recoding variables
# colMeans() for getting means by column
# istudy() from epmr for running an item analysis
# ostudy() from epmr for running an option analysis
# Recreate the item name index and use it to check the
# structure of the unscored reading items
# The strict.width argument is optional, making sure the
# results fit in the console window
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09",
"r452q03", "r452q04", "r452q06", "r452q07", "r458q01",
"r458q07", "r458q04")
str(PISA09[, ritems], strict.width = "cut")
# Subsets of the reading item index for constructed and
# selected items
# Check frequency tables by item (hence the 2 in apply)
# for CR items
critems <- ritems[c(3, 5, 7, 10)]
sritems <- ritems[c(1:2, 4, 6, 8:9, 11)]
apply(PISA09[, critems], 2, table, exclude = NULL)
# Indices for scored reading items
rsitems <- paste0(ritems, "s")
crsitems <- paste0(critems, "s")
srsitems <- paste0(sritems, "s")
# Tabulate unscored and scored responses for the first CR
# item
# exclude = NULL shows us NAs as well
# raw and scored are not arguments to table, but are used
# simply to give labels to the printed output
table(raw = PISA09[, critems[1]],
scored = PISA09[, crsitems[1]],
exclude = NULL)
# Create the same type of table for the first SR item
# Check the structure of raw attitude items
str(PISA09[, c("st33q01", "st33q02", "st33q03",
"st33q04")])
# Rescore two items
PISA09$st33q01r <- recode(PISA09$st33q01)
PISA09$st33q02r <- recode(PISA09$st33q02)
# Get p-values for reading items by type
round(colMeans(PISA09[, crsitems], na.rm = T), 2)
round(colMeans(PISA09[, srsitems], na.rm = T), 2)
# Index for attitude toward school items, with the first
# two items recoded
atsitems <- c("st33q01r", "st33q02r", "st33q03",
"st33q04")
# Check mean scores
round(colMeans(PISA09[, atsitems], na.rm = T), 2)
# Convert polytomous to dichotomous, with any disagreement
# coded as 0 and any agreement coded as 1
ats <- apply(PISA09[, atsitems], 2, recode,
list("0" = 1:2, "1" = 3:4))
round(colMeans(ats, na.rm = T), 2)
# Get total reading scores and check descriptives
PISA09$rtotal <- rowSums(PISA09[, rsitems])
dstudy(PISA09$rtotal)
# Compare CR item p-values for students below vs above the
# median total score
round(colMeans(PISA09[PISA09$rtotal <= 6, crsitems],
na.rm = T), 2)
round(colMeans(PISA09[PISA09$rtotal > 6, crsitems],
na.rm = T), 2)
# Create subset of data for German students, then reduce
# to complete data
pisadeu <- PISA09[PISA09$cnt == "DEU", c(crsitems,
"rtotal")]
pisadeu <- pisadeu[complete.cases(pisadeu), ]
round(cor(pisadeu), 2)
# Scatter plots for visualizing item discrimination
ggplot(pisadeu, aes(rtotal, factor(r414q06s))) +
geom_point(position = position_jitter(w = 0.2, h = 0.2))
ggplot(pisadeu, aes(rtotal, factor(r452q03s))) +
geom_point(position = position_jitter(w = 0.2, h = 0.2))
# Caculate ITC and CITC by hand for one of the attitude
# toward school items
PISA09$atstotal <- rowSums(PISA09[, atsitems])
cor(PISA09$atstotal, PISA09$st33q01r, use = "c")
cor(PISA09$atstotal - PISA09$st33q01r,
PISA09$st33q01r, use = "c")
# Estimate item analysis statistics, including alpha if
# item deleted
istudy(PISA09[, rsitems])
# Subset of data for Hong Kong, scored reading items
pisahkg <- PISA09[PISA09$cnt == "HKG", rsitems]
# Spearman-Brown based on original alpha and new test
# length of 8 items
sbr(r = epmr::alpha(pisahkg), k = 8/11)
# Item analysis for Hong Kong
istudy(pisahkg)
# Item analysis for a subset of items
istudy(pisahkg[, rsitems[-c(2, 5)]])
# Item option study on all SR reading items
pisao <- ostudy(PISA09[, sritems], scores = PISA09$rtotal)
# Print frequency results for one item
pisao$counts$r458q04
# Print option analysis percentages by column
# This item discriminates relatively well
pisao$colpct$r458q04
# Print option analysis percentages by column
# This item discriminates relatively less well
pisao$colpct$r414q11
Chapter 7: Item Response Theory
# R setup for this chapter
library("epmr")
library("ggplot2")
# Functions we'll use in this chapter
# rirf() for getting an item response function
# scale_y_continuous() for plotting a continuous scale
# irtstudy() from epmr for running an IRT study
# rtef() from epmr for getting a test error function
# rtif() from epmr for getting a test information function
# Prepping data for examples
# Create subset of data for Great Britain, then reduce to
# complete data
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09",
"r452q03", "r452q04", "r452q06", "r452q07", "r458q01",
"r458q07", "r458q04")
rsitems <- paste0(ritems, "s")
PISA09$rtotal <- rowSums(PISA09[, rsitems])
pisagbr <- PISA09[PISA09$cnt == "GBR",
c(rsitems, "rtotal")]
pisagbr <- pisagbr[complete.cases(pisagbr), ]
# Get p-values conditional on rtotal
# tapply() applies a function to the first argument over
# subsets of data defined by the second argument
pvalues <- data.frame(rtotal = 0:11,
p = tapply(pisagbr$r452q06s, pisagbr$rtotal, mean))
# Plot CTT discrimination over scatter plot of scored item
# responses
ggplot(pisagbr, aes(rtotal, r414q06s)) +
geom_point(position = position_jitter(w = 0.1,
h = 0.1)) +
geom_smooth(method = "lm", fill = NA) +
geom_point(aes(rtotal, p), data = pvalues,
col = "green", size = 3)
# Make up a, b, and c parameters for five items
# Get IRF using the rirf() function from epmr and plot
# rirf() will be demonstrated again later
ipar <- data.frame(a = c(2, 1, .5, 1, 1.5),
b = c(3, 2, -.5, 0, -1),
c = c(0, .2, .25, .1, .28),
row.names = paste0("item", 1:5))
ggplot(rirf(ipar), aes(theta)) + scale_y_continuous("Pr(X)") +
geom_line(aes(y = item1), col = 1) +
geom_line(aes(y = item2), col = 2) +
geom_line(aes(y = item3), col = 3) +
geom_line(aes(y = item4), col = 4) +
geom_line(aes(y = item5), col = 5)
# The irtstudy() function estimates theta and b parameters
# for a set of scored item responses
irtgbr <- irtstudy(pisagbr[, rsitems])
irtgbr
head(irtgbr$ip)
# Get IRF for the set of GBR reading item parameters and a
# vector of thetas
# Note the default thetas of seq(-4, 4, length = 100)
# could also be used
irfgbr <- rirf(irtgbr$ip, seq(-6, 6, length = 100))
# Plot IRF for items r452q03s, r452q04s, r452q06s, and
# r452q07s
ggplot(irfgbr, aes(theta)) + scale_y_continuous("Pr(X)") +
geom_line(aes(y = irfgbr$r452q03s, col = "r452q03")) +
geom_line(aes(y = irfgbr$r452q04s, col = "r452q04")) +
geom_line(aes(y = irfgbr$r452q06s, col = "r452q06")) +
geom_line(aes(y = irfgbr$r452q07s, col = "r452q07")) +
scale_colour_discrete(name = "item")
# Plot SEM curve conditional on theta for full items
# Then add SEM for the subset of items 1:8 and 1:4
ggplot(rtef(irtgbr$ip), aes(theta, se)) + geom_line() +
geom_line(aes(theta, se), data = rtef(irtgbr$ip[1:8, ]),
col = "blue") +
geom_line(aes(theta, se), data = rtef(irtgbr$ip[1:4, ]),
col = "green")
# SEM for theta 3 based on the four easiest and the four
# most difficult items
rtef(irtgbr$ip[c(4, 6, 9, 11), ], theta = 3)
rtef(irtgbr$ip[c(2, 7, 8, 10), ], theta = 3)
# Plot the test information function over theta
# Information is highest at the center of the theta scale
ggplot(rtif(irtgbr$ip), aes(theta, i)) + geom_line()
# Plot the test response function over theta
ggplot(rtrf(irtgbr$ip), aes(theta, p)) + geom_line()
Chapter 8: Factor Analysis
# R setup for this chapter
library("epmr")
# We're using a new package for CFA called lavaan
install.packages("lavaan")
library("lavaan")
# Functions we'll use
# fastudy() and plot() from epmr
# lavaanify() and cfa() from lavaan
# Subset of correlations from BDI data set in the epmr
# package
BDI$R[1:4, 1:4]
# Prepping PISA approaches to learning data for EFA
# Vectors of item names for memorization, elaboration, and
# control strategies
mitems <- c("st27q01", "st27q03", "st27q05", "st27q07")
eitems <- c("st27q04", "st27q08", "st27q10", "st27q12")
citems <- c("st27q02", "st27q06", "st27q09", "st27q11",
"st27q13")
alitems <- c(mitems, eitems, citems)
# Reduce to complete data for Great Britain
pisagbr <- PISA09[PISA09$cnt == "GBR", ]
pisagbr <- pisagbr[complete.cases(pisagbr[, c(mitems,
eitems, citems)]), ]
# Fit EFA with three factors
alefa <- fastudy(pisagbr[, alitems], factors = 3)
# Print approaches to learning EFA results
print(alefa, digits = 2)
# Print results again, rounding and filtering loadings
print(alefa, digits = 2, cutoff = 0.3)
# Print uniquenesses, and check sum of squared loadings
round(alefa$uniquenesses, 2)
round(rowSums(alefa$loadings^2) + alefa$uniquenesses, 2)
# Correlations between PISA approaches to learning scores
# for Great Britain
round(cor(PISA09[PISA09$cnt == "GBR", c("memor", "elab",
"cstrat")], use = "c"), 2)
# Plot of approaches to learning eigenvalues
plot(alefa, ylim = c(0, 3))
# Plot of eigenvalues for BDI
bdiefa <- fastudy(covmat = BDI$R, factors = 12,
n.obs = 576)
plot(bdiefa, ylim = c(0, 3))
# CFA of BDI using the lavaan package
# Specify the factor structure
# Comments within the model statement are ignored
bdimod <- lavaanify(model = "
# Latent variable definitions
cog =~ sadness + crying + failure + guilt + punish +
dislike + critical + pessimism + nopleasure +
nointerest + noworth + suicide + indecisive +
irritable + agitated + nosex
som =~ tired + noenergy + noconcentrate + appetite +
sleep
# Covariances
sadness ~~ crying
dislike ~~ critical
nopleasure ~~ nointerest",
auto.var = TRUE, auto.cov.lv.x = TRUE, std.lv = TRUE)
# Fit the model
bdicfa <- cfa(bdimod, sample.cov = BDI$S,
sample.nobs = 576)
# Print fit indices, loadings, and other output
summary(bdicfa, fit = TRUE, standardized = TRUE)
# Specify the factor structure
# Comments within the model statement are ignored as
# comments
bdimod2 <- lavaanify(model = "
depression =~ sadness + crying + failure + guilt +
punish + dislike + critical + pessimism + nopleasure +
nointerest + noworth + suicide + indecisive +
irritable + agitated + nosex + tired + noenergy +
noconcentrate + appetite + sleep",
auto.var = TRUE, std.lv = TRUE)
# Fit the model
bdicfa2 <- cfa(bdimod2, sample.cov = BDI$S,
sample.nobs = 576)
# Print output
summary(bdicfa2, fit = TRUE, standardized = TRUE)
# Compare fit for BDI CFA models
anova(bdicfa2, bdicfa)
# CFA with PISA approaches to learning scale
# Specify the factor structure
almod <- lavaanify(model = "
# Three factors
memor =~ st27q01 + st27q03 + st27q05 + st27q07
elab =~ st27q04 + st27q08 + st27q10 + st27q12
cstrat =~ st27q02 + st27q06 + st27q09 + st27q11 +
st27q13",
auto.var = TRUE, auto.cov.lv.x = TRUE, std.lv = TRUE)
# Fit the model
alcfa <- cfa(almod, sample.cov = cov(pisagbr[, alitems]),
sample.nobs = 3514)
# Print output
summary(alcfa, fit = TRUE, standardized = TRUE)
Chapter 9: Validity
# R setup for this chapter
library("epmr")
# Functions we'll use
# cor() from the base package
# rsim() from epmr to simulate scores
# Get the vector of reading items names
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09",
"r452q03", "r452q04", "r452q06", "r452q07", "r458q01",
"r458q07", "r458q04")
rsitems <- paste0(ritems, "s")
# Calculate total reading scores
pisausa <- PISA09[PISA09$cnt == "USA", rsitems]
rtotal <- rowSums(pisausa, na.rm = TRUE)
# Simulate a criterion - see Chapter 5 for another example
# using rsim
criterion <- rsim(rho = .8, x = rtotal, meany = 24,
sdy = 6)
# Check the correlation
cor(rtotal, criterion$y)
# Internal consistency for the PISA items
epmr::alpha(pisausa)
# Correction for attenuation
cor(rtotal, criterion$y)/sqrt(.77 * .86)