On 25 Apr 2001, at 7:39, Terry Therneau wrote:
> Chris,
> Because of a very narrow window (is suspect you did cut-and-paste) the
> code lines of your Splus posting got wrapped. If someone tries to cut
> and paste out of the posting the functions won't work, e.g., comments that
> got wrapped onto 2 lines don't have a second #.
Aargh -- of course. So near and yet ...
Here as an attachment, in case anyone really needs it (some people
have been generous in thanking me so I guess I'm not completely
barking!)
Chris
Chris Evans <chris@psyctc.org>
Consultant Psychiatrist in Psychotherapy,
Rampton Hospital; Associate R&D Director,
Tavistock & Portman NHS Trust;
Hon. SL Institute of Psychiatry
*** My views are my own and not representative
of those institutions ***
## This arose from a request I put the s-news list. I was asking for pointers
to
## the exact confidence interval for a proportion. I had written the following
## crude function that uses the Gaussian approximation without a continuity
correction
ci.prop <- function(p,n,int=.95) {
# Confidence interval for an observed proportion p in sample size n
# Method from Gardner, M. J. and D. G. Altman (1989).
# "Statistics with confidence. Confidence intervals and statistical
guidelines"
# London, British Medical Journal., p.29
# Uses crude normal approximation method. Must replace with accurate
method.
if (n != as.integer(n)) stop("n passed to function ci.prop must be an
integer")
if (n < 2) stop("n passed to function ci.prop must be at least 2")
if (p < 0 || p > 1) stop("p passed to function ci.prop must be between
0 and 1 incl.")
se.p <- sqrt(p*(1-p)/n)
int <- .5 + int/2
qn <- qnorm(int)
half <- qn*se.p
if (p == 0) lwr <- 0
else lwr <- p - half
if (lwr < 0) lwr <- 0
if (p == 1) upr <- 1
else upr <- p + half
if (upr > 1) upr <- 1
c(lwr,upr)
}
## The first response I got made this
exact.binom.CI <- function(P, N, k)
{
# Date sent: Fri, 20 Apr 2001 12:20:38 -0700
# To: "Chris Evans" <chris1@psyctc.org>
# From: "Timothy J. Wade" <tjwade@uclink4.berkeley.edu>
# Subject: Re: [S] confidence intervals for proportions
# I didn't write this, I asked the same question a while back and was kindly
# given the following function. Unfortunately I forget who sent this one to
# me, but I received several different user-written functions.
# Exact Binomial Condfidence limits. ;
# To get 95% limits, enter (0.95, trials, successes);
# Ref: Clopper & Pearson, Biometrika (1934), v26, p.404 ;
if(N > k & k >= 1)return(c(pl = qbeta((1 - P)/2, k, N - k + 1), pu =
qbeta(1 - (1 - P)/2, k + 1, N - k)))
if(N > 0 & N == k)return(c(pl = P^(1/n), pu = 1))
if(N > 0 & k == 0)return(c(pl = 0, pu = (1 - P^(1/N))))
}
## Then David Parkhurst pointed me straight!
#From: "David Parkhurst" <parkhurs@indiana.edu>
#To: "Chris Evans" <chris1@psyctc.org>
#Subject: Re: [S] confidence intervals for proportions
#Date sent: Fri, 20 Apr 2001 14:38:49 -0500
#Other references that may be of use are:
#Box, G. E. P., W. G. Hunter, et al. (1978). Statistics for Experimenters.
#New York, John Wiley & Sons. (They provide a graph you can use.)
#Agresti, A. and B. A. Coull (1998). "Approximate is better than "exact"
#for interval estimation of binomial proportions." American Statistician
52:119--126.
#Clopper, C. J. and E. S. Pearson (1934). "The use of confidence or
#fiducial limits illustrated in the case of the binomial." Biometrika 26:
404-413.
#I would NOT recommend using the normal distribution for tail probabilities
#unless both N*pi and N*(1-pi) are bigger than 15 or more. (Pi here is
#the binomial proportion, of course, and not 3.14.....)
#Dave Parkhurst
## Then Brad Bickerstaff pointed me to the binconf function built into the
Hmisc library.
## Frank Harrell did too!
## The version I have is:
binconf <- function(x, n, alpha = 0.05)
{
nu1 <- 2 * (n - x + 1)
nu2 <- 2 * x
ll <- if(x > 0) x/(x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1)) else 0
nu1p <- nu2 + 2
nu2p <- nu1 - 2
pp <- if(x < n) qf(1 - alpha/2, nu1p, nu2p) else 1
ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp)
zcrit <- - qnorm(alpha/2)
z2 <- zcrit * zcrit
p <- x/n
cl <- (p + z2/2/n + c(-1, 1) * zcrit * sqrt((p * (1 - p) +
z2/4/n)/n))/(1 + z2/n)
if(x == 1)
cl[1] <- - log(1 - alpha)/n
if(x == (n - 1))
cl[2] <- 1 + log(1 - alpha)/n
res <- rbind(c(ll, ul), cl)
dimnames(res) <- list(c("Exact", "Wilson"), c("Lower", "Upper"))
res
}
## Next I got this comprehensive function from Terry Therneau
cibinom <- function(k, n, p=.95,
method=c('exact', 'arcsin', 'normal', 'anscombe')) {
# From: Terry Therneau <therneau@mayo.edu>
# Date sent: Fri, 20 Apr 2001 15:18:57 -0500 (CDT)
# To: chris1@psyctc.org
# Subject: Better binomial CI
# These routines have been used for several years at our institution:
#
# Exact binomial confidence limits
# Based on equation 10.7 of Feller volume 1
# The other approximations make use of the continuity correction
nn <- max(length(k), length(n), length(p))
if (nn>1) {
k <- rep(k, length=nn)
n <- rep(n, length=nn)
p <- rep(p, length=nn)
}
p2 <- (1-p)/2
method <- match.arg(method)
if (method=='exact') {
dummy1 <- ifelse(k==0, 1, k) #avoid an error message of qbeta
dummy2 <- ifelse(k==n, n-1, k)
upper <- ifelse(k==n, 1, 1- qbeta(p2, n-dummy2, k+1))
lower <- ifelse(k==0, 0, 1- qbeta(1-p2, n+1-k, dummy1))
}
else if (method=='normal'){ #normal approx, poorest
pp <- k/n
std <- sqrt(pp * (1-pp) /n)
upper <- pp + .5/n - qnorm(p2)*std
lower <- pp + qnorm(p2)*std - .5/n
}
else if (method=='arcsin'){ # use the arcsin transform
ap <- asin(sqrt(k/n))
ap1<- asin(sqrt(ifelse(k==n, k, k+.5)/n))
ap2<- asin(sqrt(ifelse(k==0, k, k-.5)/n))
std<- sqrt(.25/n)
upper <- (sin(ap1 - qnorm(p2)*std))^2
lower <- (sin(ap2 + qnorm(p2)*std))^2
}
else if (method=='anscombe'){ # anscombe's method
con <- 3/8
ap <- asin(sqrt((k+con)/(n + 2*con)))
ap1<- asin(sqrt((con+ifelse(k==n, k, k+.5))/(n + 2*con)))
ap2<- asin(sqrt((con+ifelse(k==0, k, k-.5))/(n + 2*con)))
std<- sqrt(.25/(n+.5))
upper <- ((sin(ap1 - qnorm(p2)*std))^2)
lower <- ((sin(ap2 + qnorm(p2)*std))^2)
}
else stop("Invalid method")
if (nn==1) c(lower=lower, upper=upper)
else cbind(lower=lower, upper=upper)
}
## I got this message from Richard Howarth
#From: "Richard J. Howarth" <r.howarth@ucl.ac.uk>
#To: Chris Evans <chris1@psyctc.org>
#Subject: Re: [S] confidence intervals for proportions
#Date sent: Mon, 23 Apr 2001 18:03:01 +0100
#Blyth (1986) showed that accurate values of the one-sided lower and
#upper bounds on the proportion 100(n/N), where n is the number of occurrences
in
#a total count of N items, is given by:
#p(n)u = 100[Beta(1-a, n+1, N-n)]
#p(n)l = 100[1-Beta(1-a, N-n+1, n)]
#where Beta(1-a, v1,v2) is the 100(1-a)th percentile of the inverse beta
#distribution whith shape parameters v1 and v2, the qbeta() function in
#Splus. Use (a/2) in the above for two-sided bounds.
#In the special case where x might be expected to be present, but is not
#observed,
#p(0)u = 100[1-a^(1/N)]
#See Blyth, CR (1986) Approximate Binomial Confidence Limits, JASA, 81:
#843-855.
#Regards,
#Richard Howarth
## The first bit of that seemed to square with what the other "exact" methods
## were offering (which makes me puzzled by the "Approximate" in the title
## of Blyth's article but I'll follow that up.
## The second bit turns out to be a simple definition of qbeta(1-a,1,N) and
## related to a "3/n" rule of thumb I'd read somewhere.
## So I knocked up:
upr.not.seen <- function(n,ci=.95) {
alpha <- 1-ci
upr.blyth.prop <- 1-alpha^(1/n)
upr.blyth.perc <- 100*upr.blyth.prop
upr.blyth.n <- floor(1/upr.blyth.prop)
upr.JV.JL.JZ.k <- upr.blyth.prop*(n+1)
as.data.frame(cbind(upr.blyth.prop,upr.blyth.perc,upr.blyth.n,upr.JV.JL.JZ.k))
}
upr.not.seen(40)
upr.not.seen(30)
upr.not.seen(20)
upr.not.seen(10)
print(" ## ")
##Ugly but confirmed the utility of the rule of thumb
##I said that to Richard
## while seeking his permission to circulate his response and got this too:
#Dear Chris,
#Thanks for your message. Jovanovic & Viana (1986), Jovanovic & Levy (1987)
#and Jovanovic & Zalenski (1997) showed that a useful (and slightly
#conservative) approximations for p(0)u 90%, 95%, 99% and 99.9% when n=0,
#N>20 are given by k/(N+1) where k = 2.25, 3, 4.5 and 7 respectively.
## so here goes:
print(" ## ")
upr.not.seen(40,ci=.9)
upr.not.seen(30,ci=.9)
upr.not.seen(20,ci=.9)
upr.not.seen(10,ci=.9)
upr.not.seen(40,ci=.99)
upr.not.seen(30,ci=.99)
upr.not.seen(20,ci=.99)
upr.not.seen(10,ci=.99)
upr.not.seen(40,ci=.999)
upr.not.seen(30,ci=.999)
upr.not.seen(20,ci=.999)
upr.not.seen(10,ci=.999)
## Hm. Doesn't suggest that 2.25 or 7 are particularly well chosen but perhaps
## I've got something wrong here.
print(" ## ")
#Jovanovic BD & Levy PS 1997, A look at the rule of three. American
#Statistician 51: 137-139.
#-- & Viana MAG 1996, Upper confidence bounds for binomial probability in
#safety evaluations. Proc 1996 ASA Ann Conf., Chicago, Biopharm.
#Section., ASA, Alexandria, Virginia.
#-- & Zalenski RJ 1997, Safety evaluation and confidence intervals when the
#number of observed events is small or zero. Ann. Emergency Medicine, 30;
#301-306. [that could be where you saw it!].
## Certainly sounds nearer my journal reading than many of the refs. so far
## but I think I saw it in a Martin Bland short article in the BMJ (British
## Medical Journal, but I haven't made a note of it.
## That's everything I've had and stunning it's been. Thanks to everyone.
ci.prop(0/100,100)
cibinom(0,100)
cibinom(0,1000,method="normal")
cibinom(0,100,method="arcsin")
cibinom(0,100,method="normal")
cibinom(0,100,method="anscombe")
binconf(0,100) # from hmisc library
exact.binom.CI(.95,100,0)
#
#
# data from Gardner & Altman, results check with the book:
ci.prop(81/263,263)
# Terry Therneau's function showing different result
# owing to inclusion of a continuity correction:
cibinom(81,263,method="normal")
|