qqnorm and qqline in ggplot2
Solution 1:
The following code will give you the plot you want. The ggplot package doesn't seem to contain code for calculating the parameters of the qqline, so I don't know if it's possible to achieve such a plot in a (comprehensible) one-liner.
qqplot.data <- function (vec) # argument: vector of numbers
{
# following four lines from base R's qqline()
y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
x <- qnorm(c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1L] - slope * x[1L]
d <- data.frame(resids = vec)
ggplot(d, aes(sample = resids)) + stat_qq() + geom_abline(slope = slope, intercept = int)
}
Solution 2:
You can also add confidence Intervals/confidence bands with this function (Parts of the code copied from car:::qqPlot
)
gg_qq <- function(x, distribution = "norm", ..., line.estimate = NULL, conf = 0.95,
labels = names(x)){
q.function <- eval(parse(text = paste0("q", distribution)))
d.function <- eval(parse(text = paste0("d", distribution)))
x <- na.omit(x)
ord <- order(x)
n <- length(x)
P <- ppoints(length(x))
df <- data.frame(ord.x = x[ord], z = q.function(P, ...))
if(is.null(line.estimate)){
Q.x <- quantile(df$ord.x, c(0.25, 0.75))
Q.z <- q.function(c(0.25, 0.75), ...)
b <- diff(Q.x)/diff(Q.z)
coef <- c(Q.x[1] - b * Q.z[1], b)
} else {
coef <- coef(line.estimate(ord.x ~ z))
}
zz <- qnorm(1 - (1 - conf)/2)
SE <- (coef[2]/d.function(df$z)) * sqrt(P * (1 - P)/n)
fit.value <- coef[1] + coef[2] * df$z
df$upper <- fit.value + zz * SE
df$lower <- fit.value - zz * SE
if(!is.null(labels)){
df$label <- ifelse(df$ord.x > df$upper | df$ord.x < df$lower, labels[ord],"")
}
p <- ggplot(df, aes(x=z, y=ord.x)) +
geom_point() +
geom_abline(intercept = coef[1], slope = coef[2]) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha=0.2)
if(!is.null(labels)) p <- p + geom_text( aes(label = label))
print(p)
coef
}
Example:
Animals2 <- data(Animals2, package = "robustbase")
mod.lm <- lm(log(Animals2$brain) ~ log(Animals2$body))
x <- rstudent(mod.lm)
gg_qq(x)