Didn't seem like Kendall's W was around so here's a crude start I've
cooked up. Suggestions for improvements, exact p values for small
N and k and correct p values for data with ties all gratefully received!
(And sorry if this is embarrassingly simple!)
##crude script providing a function for Kendall's
coefficient of concordance
##formulae from Siegel(1956) Nonparametric statistics
for the behavioral sciences
##data is expected as a complete matrix in which rows
are the judges' ratings,
##columns are the ratings of the objects
##data from Siegel supplied as:
#data <-
matrix(c(1,6,3,2,5,4,1,5,6,4,2,3,6,3,2,5,4,1),nrow=3
,ncol=6,byrow=T)
##Siegel tie data as;
tiedata<-
matrix(c(1,4.5,2,4.5,3,7.5,6,9,7.5,10,2.5,1,2.5,4.5,
4.5,8,9,6.5,10,6.5,2,1,4.5,4.5,4.5,4.5,8,8,8,10),
nrow=3,ncol=10,byrow=T)
##my own data as:
data<- matrix(c(1, 3, 2, 1, 2, 1, 2, 2, 2, 3, 2, 1,
1, 4, 2, 1, 2, 3,1, 2, 2, 2, 3, 2, 3, 3, 2, 2, 2,
1, 2, 3, 1, 1, 1, 3,1, 3, 2, 3, 3, 2, 3, 3, 2, 1,
2, 1, 2, 3, 1, 1, 1, 4,1, 4, 2, 1, 2, 2, 2, 3, 1,
1, 2, 2, 1, 3, 2, 1, 1, 4,2, 2, 2, 2, 2, 2, 3, 2,
2, 2, 2, 3, 2, 2, 2, 2, 2, 3,1, 2, 2, 3, 4, 2, 3,
3, 1, 2, 3, 1, 1, 3, 2, 1, 1, 3,2, 2, 1, 3, 2, 2,
2, 2, 1, 3, 3, 2, 1, 2, 2, 1, 1,
4),nrow=7,ncol=18,byrow=T)
##little utility function to check for missing values
in a vector
all.ok<- function(x)
{
sum(is.na(x)) == 0
}
##another utility function to find number of tied
values in a ranked vector
ties<- function(x)
{
length(x) - length(unique(x))
}
tie.correct<- function(x)
{
## function to calculate the correction factor for
ties in a vector
tablex <- table(x)
tietable <- tablex[tablex>1]
sum(tietable^3-tietable)/12
}
W<- function(data)
{
## first remove objects which have incomplete data
ok <- apply(data,2,all.ok)
torank <- data[,ok]
N <- ncol(torank) # number of objects with
complete data
k <- nrow(torank) # number of judges
ranked <- t(apply(torank,1,rank)) # transposed as
rank returns vectors, hence columns
## work out if there are ties in the data
ties.y <- sum(apply(torank,1,ties))
# get the sum ranks for each object
rj <- apply(ranked,2,sum)
ssq <- var(rj,unbiased=F,SumSquares=T)
den <- k^2*(N^3 - N)/12
W <- ssq/den
rSav <- (k*W - 1)/(k-1)
cat("There were complete data for N = ",N,"
objects",fill=T)
cat(" ranked by k = ",k," judges",fill=T)
cat(" Giving Kendall's coefficient of
concordance, W = ",round(W,3),fill=T)
cat(" uncorrected for any ties.",fill=T)
cat(" Average Spearman correlation between the
judges was: ",round(rSav,3),fill=T)
if (ties.y > 0) {
cat(" ",fill=T)
cat(" However, there were a total of
",ties.y," ties in the data",fill=T)
## deal with the ties
tiefactor <- sum(apply(ranked,1,tie.correct))
tieden <- den - k*tiefactor
Wtie <- ssq/tieden
cat(" W corrected for ties =
",round(Wtie,4),fill=T)
rSav <- (k*Wtie - 1)/(k-1)
cat(" Corrected average Spearman correlation
between the judges was: ",round(rSav,3),fill=T)
cat(" NB: any significance test that follows
is not corrected for ties.",fill=T)
}
cat(" ",fill=T)
if (N > 7) {
## can work out statistical significance as
distribution of k(N-1)W is chisquare
## with df = N-1
chisqval <- k*(N-1)*W
pval <- 1-pchisq(chisqval,N-1)
cat(" Since N>7, significance estimated by chi
squared approximation",fill=T)
cat(" Chi square = ",round(chisqval,5),fill=T)
cat(" p = ",round(pval,4),fill=T)
}
}
W(tiedata)
Chris Evans <chris@psyctc.org>
Consultant Psychiatrist in Psychotherapy,
Rampton Hospital; Associate R&D Director,
Tavistock & Portman NHS Trust;
Hon. SL Institute of Psychiatry
*** My views are my own and not representative
of those institutions ***
|