\name{cate.potassium}
\alias{cate.potassium}
\docType{data}
\title{
  Relative cotton yield for different soil potassium concentrations
}
\description{
  Relative cotton yield for different soil potassium concentrations
}

\format{
  A data frame with 24 observations on the following 2 variables.
  \describe{
    \item{\code{yield}}{Relative yield}
    \item{\code{potassium}}{Soil potassium, ppm}
  }
}
\details{
  Cate & Nelson used this data to determine the minimum optimal amount
  of soil potassium to achieve maximum yield.

  Note, Fig 1 of Cate & Nelson does not match the data from Table 2.  It
  sort of appears that points with high-concentrations of potassium were
  shifted left to a truncation point.  Also, the calculations below do
  not quite match the results in Table 1.  Perhaps the published data
  were rounded?
}
\source{
  Cate, R.B. and Nelson, L.A. (1971).
  A simple statistical procedure for partitioning soil test correlation
  data into two classes. \emph{Soil Science Society of America Journal},
  35, 658--660.
  \url{http://www.crops.org/publications/sssaj/abstracts/35/4/SS0350040658}
}

\examples{

dat <- cate.potassium
names(dat) <- c('y','x')

CateNelson <- function(dat){
  dat <- dat[order(dat$x),] # Sort the data by x
  x <- dat$x
  y <- dat$y

  # Create a data.frame to store the results
  out <- data.frame(x=NA, mean1=NA, css1=NA, mean2=NA, css2=NA, r2=NA)

  css <- function(x) { var(x) * (length(x)-1) }
  tcss <- css(y) # Total corrected sum of squares

  for(i in 2:(length(y)-2)){
    y1 <- y[1:i]
    y2 <- y[-(1:i)]

    out[i, 'x'] <- x[i]
    out[i, 'mean1'] <- mean(y1)
    out[i, 'mean2'] <- mean(y2)
    out[i, 'css1'] <- css1 <- css(y1)
    out[i, 'css2'] <- css2 <- css(y2)
    out[i, 'r2'] <-  ( tcss - (css1+css2)) / tcss
  }
  return(out)
}

cn <- CateNelson(dat)
ix <- which.max(cn$r2)
with(dat, plot(y~x, ylim=c(0,110), xlab="Potassium", ylab="Yield"))
title("cate.potassium - Cate-Nelson analysis")
abline(v=dat$x[ix], col='wheat')

}
\keyword{datasets}
