###################################################################
### Hemant Ishwaran
###################################################################
### Plot ROC curve for ordinal categorical data (works for
### arbitrary number of categores, thus applies to "continuous"
### ordinal categorical data)
###
### Function does:
### -------------
### -- Plot empirical ROC curve and compute area under the
### curve using trapezoidal rule
### -- Plot CDF for diseased and non-diseased groups using the
### same axis
### -- Plot back to back probability histograms for
### diseased and non-diseased groups
###
###
### Example 1 Degree of suspicion of stage C/D prostate
### --------- cancer on ordinal scale {1,2,...100}
###
### -- true disease status (gold standard information)
### -- value of 1 corresponds to a diseased patient:
###
### group <- c(
### 1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,1,0,
### 0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,1,
### 1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,1,1,1)
###
### -- ordinal value for degree of suspicion:
###
### ordinal <- c(
### 99,5,40,20,10,30,5,50,5,25,50,5,60,10,5,5,15,15,5,5,40,60,5,5,20,5,60,
### 5,15,5,5,30,25,5,5,5,5,60,40,5,5,10,10,5,0,40,5,50,5,5,5,70,5,50,0,5,
### 30,10,20,60,5,10,5,10,5,70,10,15,40,10,80,40,5,60,5,60,5,10,10,50,15,
### 5,60,20,70,10,5,30,10,50,10,60,5,40,50,80,5,15,25)
###
### -- plot roc curves (use mtxt for custom titles):
###
### rocPlot(group,ordinal,mtxt="ROC plot for prostate data")
###
###
### Example 2 Randomly generated continuous data
### ---------
###
### rocPlot(rep(c(0,1),c(50,50)),rnorm(100,rep(c(90,100),c(50,50)),10))
###
### -------------------------------------------------------------
### Hemant Ishwaran ishwaran@bio.ri.ccf.org
### Dept of Biostatistics/Wb4 phone: 216-444-9932
### Cleveland Clinic Foundation fax: 216-445-2781
### 9500 Euclid Avenue
### Cleveland, OH 44195
###
### http://www.bio.ri.ccf.org/Resume/Pages/Ishwaran
###################################################################
rocPlot <- function(group,ordinal,
nbreaks=15,minAtom.plot=25,
mtxt="ROC Plots",...)
{
group.uniq <- sort(unique(group))
neg <- ordinal[group==group.uniq[1]]
pos <- ordinal[group==group.uniq[2]]
n.neg <- length(neg)
n.pos <- length(pos)
range.data <- range(neg,pos)
xtxt <- 'Test Results'
### Find all distinct atoms generated by pos and neg
### test values.
atoms <- unique(sort(c(neg,pos)))
atoms.neg <- unique(sort(neg))
atoms.pos <- unique(sort(pos))
n.atoms <- length(atoms)
tp <- rep(0,n.atoms)
fp <- rep(0,n.atoms)
cdf.neg <- rep(0,length(atoms.neg))
cdf.pos <- rep(0,length(atoms.pos))
### (1) Compute tp and fp values for each distinct atom
### (2) Compute CDF's
for (i in 1:n.atoms){
tp[i] <- sum(pos >= atoms[i])/n.pos
fp[i] <- sum(neg >= atoms[i])/n.neg
}
for (i in 1:length(atoms.neg)){
cdf.neg[i] <- sum(neg <= atoms.neg[i])/n.neg
}
for (i in 1:length(atoms.pos)){
cdf.pos[i] <- sum(pos <= atoms.pos[i])/n.pos
}
### Nice touch is to add end values for tp,fp values and cdf's
tp <- c(1,tp,0)
fp <- c(1,fp,0)
delta <- (range.data[2]-range.data[1])/20
atoms.neg <- c(atoms[1]-delta,atoms.neg,atoms[n.atoms]+delta)
atoms.pos <- c(atoms[1]-delta,atoms.pos,atoms[n.atoms]+delta)
cdf.neg <- c(0,cdf.neg,1)
cdf.pos <- c(0,cdf.pos,1)
### (1) Calculate area under curve using Wilcoxon U-statistic
### (2) Estimate its standard error
tp.mean <- (tp[1:(n.atoms+1)] + tp[2:(n.atoms+2)])/2
fp.diff <- -diff(fp)
area.U <- sum(fp.diff*tp.mean)
area.S <- sqrt((area.U*(1 - area.U)+(n.pos - 1)*(area.U/(2 - area.U)
- area.U^2) + (n.neg - 1)*((2*area.U^2)/(1 + area.U)
- area.U^2))/(n.pos*n.neg))
### (1) Plot Empirical ROC. Use points and segments if there are
### fewer than minAtom.plot distinct values
### (2) Plot probability histograms for positive and negative
### test values on same axis for comparison. Use points and
### segments if there are fewer than minAtom.plot values
### (3) Plot CDF for test values
par(pty="s",mfrow=c(1,3))
if(n.atoms>=minAtom.plot){
plot(fp,tp,type="l",xlim=c(0.0,1.0),
ylim=c(0.0,1.0),xlab="False Positive Ratio",
ylab="Sensitivity",main="Empirical ROC")
text(0.7,0.1,paste('Area=',format(round(area.U,3)),
'+/-',format(round(area.S,3))))
}
else{
plot(fp,tp,type="b",xlim=c(0.0,1.0),
ylim=c(0.0,1.0),xlab="False Positive Ratio",
ylab="Sensitivity",main="Empirical ROC")
text(0.7,0.1,paste('Area=',format(round(area.U,3)),
'+/-',format(round(area.S,3))))
}
if(n.atoms>=minAtom.plot){
plot(atoms.neg,cdf.neg,type="s",lty=1,
xlim=range(atoms.neg,atoms.pos),
ylim=range(cdf.neg,cdf.pos),
xlab=xtxt,ylab="Cumulative Probabilities",
main="CDF Plot")
par(new=T)
plot(atoms.pos,cdf.pos,type="s",lty=2,
xlab="",ylab="")
}
else{
plot(atoms.neg,cdf.neg,type="s",lty=1,
xlim=range(atoms.neg,atoms.pos),
ylim=range(cdf.neg,cdf.pos),
xlab=xtxt,ylab="Cumulative Probabilities",
main="CDF Plot")
rug(atoms)
par(new=T)
points(atoms.neg,cdf.neg,pch="x")
par(new=T)
plot(atoms.pos,cdf.pos,type="s",lty=2,
xlab="",ylab="")
par(new=T)
points(atoms.pos,cdf.pos,type="p",pch="o")
}
# Mimic a probability histogram, using barplot
# Back to back plots.
nbreaks <- min(nbreaks,max(n.atoms,range.data[2]-range.data[1]))
data.bks <- seq(range.data[1]-.001,range.data[2]+.001,
length=nbreaks+2)
temp.neg <- hist(neg,breaks=data.bks,plot=F)
pct.neg <- temp.neg$count
temp.pos <- hist(pos,breaks=data.bks,plot=F)
pct.pos <- temp.pos$count
barplot(rbind(pct.pos,-pct.neg),
yaxt="n",srt=90,
space=0,angle=c(5,45),density=c(15,30),
xlab=xtxt,
ylab="Counts",main="Histogram")
junk.fit <- lm(seq(0.5, length(data.bks), by=1) ~ data.bks)
pretty.x <- coef(junk.fit) %*% t(cbind(1,pretty(data.bks,10)))
nice.probs <- pretty(range(c(pct.pos,-pct.neg)))
axis(1,at=pretty.x, labels=pretty(data.bks,10),ticks=0)
axis(2,at=nice.probs,lab=abs(nice.probs))
# Overall title
mtext(mtxt,side=3,line=0,cex=2,outer=TRUE)
}