
\name{centerScale}
\alias{centerScale}
\alias{ctr}
\title{center and scale a vector to zero median, unit mad  }
\description{center and scale a vector to zero median, unit mad  }
\usage{
centerScale(x)
}
\arguments{
  \item{x}{ a numeric vector}
}
%\details{
%}
\value{ a shifted and scaled version of x with zero median, unit mad
}
%\references{  }
\author{VJ Carey <stvjc@channing.harvard.edu>  }
%\note{  }
%
%
%\seealso{  }

\examples{
summary(centerScale(runif(200)))
}
\keyword{ models }

\eof
\name{edd}
\alias{edd}
\title{ new expression density diagnostics interface }
\description{
this will replace edd.unsupervised; has more
sensible parameters
}
\usage{
edd(eset, distList=eddDistList, tx=c(sort,flatQQNormY)[[1]],
	refDist=c("multiSim", "theoretical")[1], 
	method=c("knn", "nnet", "test")[1], nRowPerCand=100, ...)
}
\arguments{
  \item{eset}{ eset -- instance of Biobase \code{\link{exprSet}} class}
  \item{distList}{ distList -- list comprised of eddDist objects}
  \item{tx}{ tx -- transformation of data and reference prior
to classification }
  \item{refDist}{ refDist -- type of reference distribution system to use}
  \item{method}{ method -- type of classifier to use.  knn is k-nearest
neighbors, nnet is neural net, test is max p-value from ks.test}
  \item{nRowPerCand}{ nRowPerCand -- number of realizations for
a multiSim reference system}
  \item{\dots}{ \dots -- parameters to classifiers}
}
\details{
Classifies genes according to distributional shape, by
comparing observed expression distributions to
a collection of references, which may be simulated
or evaluated theoretically.

The distList argument is important.  It enumerates the
catalog of distributions for classification of gene
expression vectors by distributional shape.  See the
HOWTO-edd vignette for information on how this list
is constructed and how it can be extended.

The tx argument specifies how the data are processed
for comparison to the reference catalog.  This is
a function on a vector returning a vector, but the
input and the output need not have the same length.
The default value of tx is sort, which entails that
the order statistics are treated as multivariate data
for classification.  

The refDist argument selects the type of reference
catalog.  Options are 'multiSim', for which the
reference consists of nRowPerCand realizations of
each catalog entry, and 'theoretical', for which
the reference consists of one vector of quantiles
for each catalog entry.

The method argument selects the type of classifier.
It would be desirable to allow this to be a function,
but there is insufficient structure on classifier
argument and return value structure to permit this
at present; see the e1071 package for some work
on handling various classifiers programmatically
(e.g., \code{tune}).

}
\value{
a character vector or factor depending on the classifier
}
%\references{  }
\author{Vince Carey <stvjc@channing.harvard.edu>}
%\note{  }


\seealso{\code{\link{exprSet}}  }

\examples{
require(Biobase)
data(eset)
# should filter to genes with reasonable variation
table( edd(eset, meth="nnet", size=10, decay=.2) )
library(golubEsets)
data(golubMerge)
madvec <- apply(exprs(golubMerge),1,mad)
minvec <- apply(exprs(golubMerge),1,min)
keep <- (madvec > median(madvec)) & (minvec > 300)
gmfilt <- golubMerge[keep==TRUE,]
ALL <- gmfilt$ALL.AML=="ALL"
gall <- gmfilt[,ALL==TRUE]
gaml <- gmfilt[,ALL==FALSE]
alldists <- edd(gall, meth="nnet", size=10, decay=.2)
amldists <- edd(gaml, meth="nnet", size=10, decay=.2)
table(alldists,amldists)
}
\keyword{ models }

\eof
\name{eddDist-class}
\docType{class}
\alias{eddDist-class}
% [1] "B28"              "B82"              "cdfName"          "CS1"
% [5] "eddDistList"      "edd.unsupervised" "genName"          "LN01"
% [9] "Mad"              "med"              "MIXN1"            "MIXN2"
%[13] "N01"              "parms"            "qfName"           "qfun"
%[17] "qmixnorm"         "rmixnorm.alt"     "stub"             "T3"
%[21] "tag"              "testVec"          "U01"
\alias{plotlim}
\alias{qfName}
\alias{qfun}
\alias{stub}
\alias{tag}
\alias{testVec}
\alias{B28}
\alias{T3}
\alias{B82}
\alias{CS1}
\alias{cdfName}
\alias{cdfName,eddDist-method}
\alias{eddDistList}
\alias{genName}
\alias{genName,eddDist-method}
\alias{LN01}
\alias{U01}
\alias{Mad}
\alias{Mad,eddDist-method}
\alias{med}
\alias{med,eddDist-method}
\alias{parms}
\alias{parms,eddDist-method}
\alias{qfName,eddDist-method}
\alias{qfun,eddDist-method}
\alias{stub,eddDist-method}
\alias{tag,eddDist-method}
\alias{latexTag}
\alias{latexTag,eddDist-method}
\alias{plotlim,eddDist-method}
\alias{testVec,eddDist-method}
\alias{testVec,numeric,eddDist,logical-method}
\alias{qmixnorm}
\alias{dmixnorm}
\alias{pmixnorm}
\alias{rmixnorm.alt}
\alias{MIXN1}
\alias{MIXN2}
\alias{N01}
\title{Class "eddDist" }
\description{  objects from this class can be used to
simulate or tabulate reference distributions for edd}
\section{Objects from the Class}{
Objects can be created by calls of the form \code{new("eddDist", ...)}.
These objects include a simple stub (like "norm", which can
be modified to obtain the name of a generator (prepend "r"),
cdf (prepend "p"), etc.) in R.
}
\section{Slots}{
  \describe{
    \item{\code{stub}:}{Object of class \code{"character"} stub
of a distribution system in R, to which "r" is prepended
to get a generator, "p" to get a cdf, "q" to get a quantile
function... }
    \item{\code{parms}:}{Object of class \code{"numeric"} named
vector of parameters for a member of the family }
    \item{\code{median}:}{Object of class \code{"numeric"} median
of the distribution (sometimes has to be computed by simulation) }
    \item{\code{mad}:}{Object of class \code{"numeric"}  MAD
of the distribution (sometimes has to be computed by simulation)}
    \item{\code{tag}:}{Object of class \code{"character"}  an
informative character tag }
    \item{\code{latexTag}:}{Object of class \code{"character"}  an
informative character tag in latex format, which can use
subscripts, greek letters, etc. }
  }
}

\section{Methods}{
  \describe{
    \item{cdfName}{\code{signature(x = "eddDist")}: prepend "p" to stub(x) }
    \item{genName}{\code{signature(x = "eddDist")}: prepend "r" to stub(x) }
    \item{Mad}{\code{signature(x = "eddDist")}: accessor }
    \item{med}{\code{signature(x = "eddDist")}: accessor }
    \item{parms}{\code{signature(x = "eddDist")}: accessor }
    \item{qfName}{\code{signature(x = "eddDist")}: prepend "q" to stub(x) }
    \item{qfun}{\code{signature(e = "eddDist")}: construct a quantile
function from the object }
    \item{stub}{\code{signature(x = "eddDist")}: accessor }
    \item{tag}{\code{signature(x = "eddDist")}: accessor }
    \item{latexTag}{\code{signature(x = "eddDist")}: accessor }
    \item{testVec}{\code{signature(x = "numeric", eddd = "eddDist", is.centered = "logical")}: apply ks.test to a given vector x against the dist specified
by the eddDist components }
  }
}
%\references{ ~put references to the literature/web site here ~ }
%\author{ ~~who you are~~ }
%\note{ ~~further notes~~ }
%
% ~Make other sections like Warning with \section{Warning }{....} ~
%
%\seealso{ ~~objects to See Also as \code{\link{~~fun~~}}, ~~~
% or\code{\link{CLASSNAME-class}} for links to other classes }

\examples{
library(edd)
eddDistList[[1]]
qfun(eddDistList[[1]])
}
\keyword{classes}

\eof
\name{eddObsolete}
\alias{eddObsolete}
\alias{fq.matrows}
\alias{makeCandmat.theor}
\alias{s.rmix1norm}
\alias{s.rmix2norm}
\alias{mkt}
\alias{testcl}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ expression density diagnostics: }
\description{
classify cohort distributions of gene expression values
}
\usage{
eddObsolete(eset, 
   ref=c("multiCand", "uniCand", "test", "nnet")[1], 
   k=10, l=6, nnsize=6, nniter=200)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{eset}{ instance of Biobase class exprSet }
  \item{ref}{ one of 'multiCand', 'uniCand', 'test' or 'nnet'.
see details.}
  \item{k}{ k setting for knn -- number of nearest neighbors to poll }
  \item{l}{ l setting for knn -- minimum number of concordant assents }
  \item{nnsize}{ size parameter for nnet }
  \item{nniter}{ iter setting for nnet }
}
\details{
Four options are available for classifying expression densities.
Data on each gene are shifted and scaled to have median zero and mad 1.
They are then compared to shapes of reference distributions (standard Gaussian,
chisq(1), lognorm(0,1), t(3), .75N0,1+.25N4,1, .25N0,1+.75N4,1, Beta(2,8), Beta(8,2),
U(0,1)) after each of these has been transformed to have median 0 and mad 1.
Classification proceeds by one of four methods, selected by setting of
the 'ref' argument.  Suppose there are S samples in the exprSet.

multiCand -- 100 samples of size S are drawn from each reference
distribution and then scaled to med 0, mad 1.  The knn(k,l) procedure
is used to classify the genes based on proximity to representatives
in this set

uniCand -- one representative of size S is created from each reference
distribution, using the theoretical quantiles. knn(1,0) is used
to classify genes based on proximity to these representatives

test -- classification of each gene is based on maximum p-value
of Kolmogorov-Smirnov tests vs each reference distribution.  If
the p-value never exceeds .1, 'doubt' is declared

nnet -- 100 samples of size S are drawn from each reference distribution
and then scaled to med 0, mad 1.  A neural net is fit to this dataset
and the associated labels.  The net is then applied to the
scaled gene expression data and the predictions are used
for classification.}
}
\value{
the vector of classifications, with NAs for nonclassifiable
genes
}
\author{ VJ Carey }

\examples{
require(Biobase)
data(eset)
print(summary(eddObsolete(eset,k=10,l=2)))

# 6 x 20 x 50 test problem
set.seed(1234)
test <- matrix(NA,nr=120,nc=50)
test[1:20,] <- rnorm(1000)
test[21:40,] <- rt(1000,3)
test[41:60,] <- rexp(1000,4)
test[61:80,] <- rmixnorm(1000,.750,0,1,4,1)
test[81:100,] <- runif(1000)
test[101:120,] <- rlnorm(1000)
labs <- c(rep("n01",20),rep("t3",20),
rep("exp",20),rep("mix1",20),rep("u01",20),rep("ln01",20))

TT <- new("exprSet", exprs=test) # should require phenoData
multrun <- eddObsolete( TT, k=10, l=2 )
print(table(given=labs, multiCand=multrun))
netrun <- eddObsolete( TT, ref="nnet" )
print(table(given=labs, netout=netrun))
newrun <- edd( TT, meth="nnet", size=10, decay=.2 )
print( table( given=labs, newout=newrun ) )
newrun <- edd( TT, meth="test" )
print( table( given=labs, newout=newrun ) )


}
\keyword{ models }% at least one, from doc/KEYWORDS


\eof
\name{flatQQNorm}
\alias{flatQQNorm}
\alias{flatQQNormY}
\title{ QQ difference plot }
\description{
standard normal transforms to horizontal line at 0
}
\usage{
flatQQNorm(y)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{y}{ numeric vector }
}
\value{
list with elements x and y describing the trace of the
qq differrence plot
}

\examples{
set.seed(1234)
plot(flatQQNorm(rnorm(40)),ylim=c(-3,3),ylab="QQNorm - line of identity ")
}
\keyword{ models }


\eof
\name{latEDtable}
\alias{latEDtable}
\title{ use latex tags for dimnames of table created from edd classification }
\description{use latex tags for dimnames of table created from edd classification
}
\usage{
latEDtable(x, baselist=eddDistList, reorder=NULL)
}
\arguments{
  \item{x}{ x -- a table (possibly two dimensional) of results
of applying edd}
  \item{baselist}{ baselist -- a list of eddDist objects}
  \item{reorder}{ reorder -- a numeric vector describing
how to re order the table rows/columns}
}
\details{
for use with xtable rendering.  table will give
results with margin names in lexicographic order;
reorder can be used to get a different order.
}
\value{
a matrix with dimnames computed from the latexTag slots
of the corresponding eddDist objects
}
%\references{  }
\author{Vince Carey <stvjc@channing.harvard.edu>}
%\note{  }


%\seealso{  }

\examples{
require(Biobase)
data(eset)
# should filter to genes with reasonable variation
rawTab <- table( edd(eset, meth="nnet", size=10, decay=.2) )
latEDtable(rawTab)
library(xtable)
xtable(latEDtable(rawTab))
#
realTags <- sapply(eddDistList,tag)
reo <- match(realTags,names(rawTab))
xtable(latEDtable(rawTab,reorder=reo))
}
\keyword{ models }

\eof
\name{makeCandmat.raw}
\alias{makeCandmat.raw}
\alias{rmixnorm}
\alias{pmix1n}
\alias{dmix1n}
\alias{qmix1n}
\alias{pmix2n}
\alias{dmix2n}
\alias{qmix2n}
\title{create and store reference distributions for edd   }
\description{
create and store reference distributions for edd
}
\usage{
makeCandmat.raw (nPerRow=20, nRowPerCand=20, dists=
  eddDistList, centerScale=TRUE)
}
\arguments{
  \item{nPerRow}{ size of each reference sample}
  \item{nRowPerCand}{ number of samples per candidate}
  \item{dists}{ list of eddDist objects specifying
reference distributions }
  \item{centerScale}{ logical indicating that
simulated data should be centered and scaled}
}
%\details{
%includes various utilities for quantiles of two gaussian mixtures
%}
\value{
matrix with rows realized from reference distributions
}
%\references{  }
\author{VJ Carey <stvjc@channing.harvard.edu>  }
%\note{  }


%\seealso{  }

\examples{
makeCandmat.raw(5,2,eddDistList[1:3])
}
\keyword{ models }


\eof
\name{maxKSp}
\alias{maxKSp}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ classify densities by maximum KS p-value }
\description{
compares a sample to reference distributions, identifying
the closest reference by maximum KS p
}
\usage{
maxKSp( x, is.centered=TRUE, dists=eddDistList, thresh=.1 )
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{x}{ matrix of samples, distributions to be classified by row }
  \item{is.centered}{ have data been brought to median 0, mad 1 }
  \item{dists}{ list of instances of class eddDist }
  \item{thresh}{ p-value above which some test
must lie, or else 'outlier' is declared }
}
\details{
}
\value{
a vector of classifications
}

\examples{
X <- rbind( rnorm(30), runif(30), rchisq(30,1))
tX <- t(apply(X,1,centerScale))
apply(tX,1,maxKSp)
}
\keyword{ models }% at least one, from doc/KEYWORDS

\eof
\name{plotED}
\alias{plotED}
\title{visualize an eddDist object  }
\description{
plots an eddDist model; can superimpose density sketch
from transformed data
}
\usage{
plotED(x, y, data=NULL, is.centered=FALSE, ...)
}
\arguments{
  \item{x}{ x: eddDist object}
  \item{y}{ y: not used}
  \item{data}{ data: optional vector of data to be
superimposed in the form of a kernel density estimate}
  \item{is.centered}{ is.centered: logical indicating
that the data vector has zero median and unit mad}
  \item{\dots}{ \dots: options passed to curve}
}
%\details{
%}
%\value{
%}
%\references{  }
\author{Vince Carey <stvjc@channing.harvard.edu>}
%\note{  }


%\seealso{  }

\examples{
#
# show the first 8 supplied reference dists
if (interactive()){
omf <- par()$mfrow
oas <- par()$ask
on.exit(par(mfrow=omf,ask=oas))
par(mfrow=c(4,2))
par(ask=TRUE)
}
set.seed(1234)
for (i in 1:8) plotED(eddDistList[[i]])
# illustrate the superposition
if (interactive()) par(mfrow=c(1,1))
x <- rnorm(30,3,4)
plotED(N01,data=x) # relocates/scales x
y <- 12*rbeta(30,2,8)+4
plotED(B28,data=y)
}
\keyword{ models }

\eof
