Simulated Amounts        package:compositions        R Documentation

_S_i_m_u_l_a_t_e_d _a_m_o_u_n_t _d_a_t_a_s_e_t_s

_D_e_s_c_r_i_p_t_i_o_n:

     Several simulated datasets intended as reference examples for
     various conceptual and statistical models of compositions and
     amounts.

_U_s_a_g_e:

     data(SimulatedAmounts)

_F_o_r_m_a_t:

     Data matrices with 60 cases and 3 or 5 variables.

_D_e_t_a_i_l_s:

     The statistical analysis of amounts and compositions is set to
     discussion. Four essentially different approaches are provided in
     this package around the classes "rplus", "aplus", "rcomp",
     "acomp". There is no absolutely "right" approach, since there is a
     conection between these approaches and the processes originating
     the data. We provide here simulated standard datasets and the
     corresponding simulation procedures following these several models
     to provide ``good'' analysis examples and to show how these models
     actually look like in data. 

     The data sets are simulated according to correlated lognormal
     distributions (sa.lognormals, sa.lognormal5), winsorised
     correlated normal distributions (sa.tnormals, sa.tnormal5),
     Dirichlet distribution on the simplex (sa.dirichlet,
     sa.dirichlet5), uniform distribution on the simplex  (sa.uniform,
     sa.uniform5), and a grouped dataset (sa.groups, sa.groups5) with
     three groups (given in sa.groups.area and sa.groups5.area) all
     distributed accordingly with a lognormal distribution with group
     dependent means.

     We can imagine that amounts evolve in nature e.g. in part of the
     soil they are diluted and transported in a transport medium,
     usually water, which comes from independent source (the rain, for
     instance) and this new composition is normalized by taking a
     sample of standard size. For each of the datasets sa.X there is a
     corresponding sa.X'.dil' dataset which is build by simulating
     exactly that process on the corresponding sa.X dataset . The
     amounts in the sa.X'.dil' are given in ppm. This idea of a
     transport medium is a major argument for a compositional approach,
     because the total amount given by the sum of the parts is induced
     by the dilution given by the medium and thus uninformative for the
     original investigated process. 

     If we imagine now these amounts flowing into a river and
     sedimenting, the different contributions are accumulated along the
     river and renormalized to a unit portion on taking samples again.
     For each of the dataset sa.X'.dil' there is a corresponding
     sa.X'.mix' dataset which is build from the corresponding sa.X
     dataset by simulating exactly that accumulation process. Mixing of
     different compositions is a major argument against the log based
     approaches ('aplus', 'acomp') since mixing is a highly nonlinear
     operation in terms of the logratios.

_S_o_u_r_c_e:

     The datasets are simulated for this package and are under the GNU
     Public Library Licence Version 2 or newer.

_R_e_f_e_r_e_n_c_e_s:

     http://statistic.boogaart.de/compositions/data

     Aitchison, J. (1986) _The Statistical Analysis of Compositional
     Data_ Monographs on Statistics and Applied Probability. Chapman &
     Hall Ltd., London (UK). 416p.

     Zier Rehder

     xxx Something recommending to use log-transforms

     xxx Something warning against log-transforms

_E_x_a_m_p_l_e_s:

     data(SimulatedAmounts)
     plot.acomp(sa.lognormals)
     plot.acomp(sa.lognormals.dil)
     plot.acomp(sa.lognormals.mix)
     plot.acomp(sa.lognormals5)
     plot.acomp(sa.lognormals5.dil)
     plot.acomp(sa.lognormals5.mix)

     library(MASS)
     plot.rcomp(sa.tnormals)
     plot.rcomp(sa.tnormals.dil)
     plot.rcomp(sa.tnormals.mix)
     plot.rcomp(sa.tnormals5)
     plot.rcomp(sa.tnormals5.dil)
     plot.rcomp(sa.tnormals5.mix)

     plot.acomp(sa.groups,col=as.numeric(sa.groups.area),pch=20)
     plot.acomp(sa.groups.dil,col=as.numeric(sa.groups.area),pch=20)
     plot.acomp(sa.groups.mix,col=as.numeric(sa.groups.area),pch=20)
     plot.acomp(sa.groups5,col=as.numeric(sa.groups.area),pch=20)
     plot.acomp(sa.groups5.dil,col=as.numeric(sa.groups.area),pch=20)
     plot.acomp(sa.groups5.mix,col=as.numeric(sa.groups.area),pch=20)

     plot.acomp(sa.uniform)
     plot.acomp(sa.uniform.dil)
     plot.acomp(sa.uniform.mix)
     plot.acomp(sa.uniform5)
     plot.acomp(sa.uniform5.dil)
     plot.acomp(sa.uniform5.mix)

     plot.acomp(sa.dirichlet)
     plot.acomp(sa.dirichlet.dil)
     plot.acomp(sa.dirichlet.mix)
     plot.acomp(sa.dirichlet5)
     plot.acomp(sa.dirichlet5.dil)
     plot.acomp(sa.dirichlet5.mix)

     # The data was simulated with the following commands:

     library(MASS)
     dilution <- function(x) {clo(cbind(x,exp(rnorm(nrow(x),5,1))))[,1:ncol(x)]*1E6}
     seqmix   <- function(x) {clo(apply(x,2,cumsum))*1E6}

     vars  <- c("Cu","Zn","Pb")
     vars5 <- c("Cu","Zn","Pb","Cd","Co")

     sa.lognormals <- structure(exp(matrix(rnorm(3*60),ncol=3) %*%
                                    chol(matrix(c(1,0.8,-0.2,0.8,1,
                                                  -0.2,-0.2,-0.2,1),ncol=3))+
                                    matrix(rep(c(1:3),each=60),ncol=3)),
                                dimnames=list(NULL,vars))

     plot.acomp(sa.lognormals)
     pairs(sa.lognormals)

     sa.lognormals.dil <- dilution(sa.lognormals)
     plot.acomp(sa.lognormals.dil)
     pairs(sa.lognormals.dil)

     sa.lognormals.mix <- seqmix(sa.lognormals.dil)
     plot.acomp(sa.lognormals.mix)
     pairs(sa.lognormals.mix)

     sa.lognormals5 <- structure(exp(matrix(rnorm(5*60),ncol=5) %*%
                                    chol(matrix(c(1,0.8,-0.2,0,0,
                                                  0.8,1,-0.2,0,0,
                                                  -0.2,-0.2,1,0,0,
                                                  0,0,0,5,4.9,
                                                  0,0,0,4.9,5),ncol=5))+
                                    matrix(rep(c(1:3,-2,-2),each=60),ncol=5)),
                                dimnames=list(NULL,vars5))

     plot.acomp(sa.lognormals5)
     pairs(sa.lognormals5)

     sa.lognormals5.dil <- dilution(sa.lognormals5)
     plot.acomp(sa.lognormals5.dil)
     pairs(sa.lognormals5.dil)

     sa.lognormals5.mix <- seqmix(sa.lognormals5.dil)
     plot.acomp(sa.lognormals5.mix)
     pairs(sa.lognormals5.mix)


     sa.groups.area <- factor(rep(c("Upper","Middle","Lower"),each=20))
     sa.groups <- structure(exp(matrix(rnorm(3*20*3),ncol=3) %*%
                                    chol(0.5*matrix(c(1,0.8,-0.2,0.8,1,
                                                  -0.2,-0.2,-0.2,1),ncol=3))+
                                    matrix(rep(c(1,2,2.5,2,2.9,5,4,2,5),
                                               each=20),ncol=3)),
                                dimnames=list(NULL,c("clay","sand","gravel")))

     plot.acomp(sa.groups,col=as.numeric(sa.groups.area),pch=20)
     pairs(sa.lognormals,col=as.numeric(sa.groups.area),pch=20)

     sa.groups.dil <- dilution(sa.groups)
     plot.acomp(sa.groups.dil,col=as.numeric(sa.groups.area),pch=20)
     pairs(sa.groups.dil,col=as.numeric(sa.groups.area),pch=20)

     sa.groups.mix <- seqmix(sa.groups.dil)
     plot.acomp(sa.groups.mix,col=as.numeric(sa.groups.area),pch=20)
     pairs(sa.groups.mix,col=as.numeric(sa.groups.area),pch=20)


     sa.groups5.area <- factor(rep(c("Upper","Middle","Lower"),each=20))
     sa.groups5 <- structure(exp(matrix(rnorm(5*20*3),ncol=5) %*%
                                    chol(matrix(c(1,0.8,-0.2,0,0,
                                                  0.8,1,-0.2,0,0,
                                                  -0.2,-0.2,1,0,0,
                                                  0,0,0,5,4.9,
                                                  0,0,0,4.9,5),ncol=5))+
                                    matrix(rep(c(1,2,2.5,
                                                 2,2.9,5,
                                                 4,2.5,0,
                                                 -2,-1,-1,
                                                 -1,-2,-3),
                                               each=20),ncol=5)),
                                dimnames=list(NULL,
                                  vars5))

     plot.acomp(sa.groups5,col=as.numeric(sa.groups5.area),pch=20)
     pairs(sa.groups5,col=as.numeric(sa.groups5.area),pch=20)

     sa.groups5.dil <- dilution(sa.groups5)
     plot.acomp(sa.groups5.dil,col=as.numeric(sa.groups5.area),pch=20)
     pairs(sa.groups5.dil,col=as.numeric(sa.groups5.area),pch=20)

     sa.groups5.mix <- seqmix(sa.groups5.dil)
     plot.acomp(sa.groups5.mix,col=as.numeric(sa.groups5.area),pch=20)
     pairs(sa.groups5.mix,col=as.numeric(sa.groups5.area),pch=20)


     sa.tnormals <- structure(pmax(matrix(rnorm(3*60),ncol=3) %*%
                                    chol(matrix(c(1,0.8,-0.2,0.8,1,
                                                  -0.2,-0.2,-0.2,1),ncol=3))+
                                    matrix(rep(c(0:2),each=60),ncol=3),0),
                                dimnames=list(NULL,c("clay","sand","gravel")))

     plot.rcomp(sa.tnormals)
     pairs(sa.tnormals)

     sa.tnormals.dil <- dilution(sa.tnormals)
     plot.acomp(sa.tnormals.dil)
     pairs(sa.tnormals.dil)

     sa.tnormals.mix <- seqmix(sa.tnormals.dil)
     plot.acomp(sa.tnormals.mix)
     pairs(sa.tnormals.mix)


     sa.tnormals5 <- structure(pmax(matrix(rnorm(5*60),ncol=5) %*%
                                    chol(matrix(c(1,0.8,-0.2,0,0,
                                                  0.8,1,-0.2,0,0,
                                                  -0.2,-0.2,1,0,0,
                                                  0,0,0,0.05,0.049,
                                                  0,0,0,0.049,0.05),ncol=5))+
                                    matrix(rep(c(0:2,0.1,0.1),each=60),ncol=5),0),
                                dimnames=list(NULL,
                                  vars5))

     plot.rcomp(sa.tnormals5)
     pairs(sa.tnormals5)

     sa.tnormals5.dil <- dilution(sa.tnormals5)
     plot.acomp(sa.tnormals5.dil)
     pairs(sa.tnormals5.dil)

     sa.tnormals5.mix <- seqmix(sa.tnormals5.dil)
     plot.acomp(sa.tnormals5.mix)
     pairs(sa.tnormals5.mix)


     sa.dirichlet <- sapply(c(clay=0.2,sand=2,gravel=3),rgamma,n=60)
     colnames(sa.dirichlet) <- vars

     plot.acomp(sa.dirichlet)
     pairs(sa.dirichlet)

     sa.dirichlet.dil <- dilution(sa.dirichlet)
     plot.acomp(sa.dirichlet.dil)
     pairs(sa.dirichlet.dil)

     sa.dirichlet.mix <- seqmix(sa.dirichlet.dil)
     plot.acomp(sa.dirichlet.mix)
     pairs(sa.dirichlet.mix)


     sa.dirichlet5 <- sapply(c(clay=0.2,sand=2,gravel=3,humus=0.1,plant=0.1),rgamma,n=60)
     colnames(sa.dirichlet5) <- vars5

     plot.acomp(sa.dirichlet5)
     pairs(sa.dirichlet5)

     sa.dirichlet5.dil <- dilution(sa.dirichlet5)
     plot.acomp(sa.dirichlet5.dil)
     pairs(sa.dirichlet5.dil)

     sa.dirichlet5.mix <- seqmix(sa.dirichlet5.dil)
     plot.acomp(sa.dirichlet5.mix)
     pairs(sa.dirichlet5.mix)

     sa.uniform   <- sapply(c(clay=1,sand=1,gravel=1),rgamma,n=60)
     colnames(sa.uniform) <- vars

     plot.acomp(sa.uniform)
     pairs(sa.uniform)

     sa.uniform.dil <- dilution(sa.uniform)
     plot.acomp(sa.uniform.dil)
     pairs(sa.uniform.dil)

     sa.uniform.mix <- seqmix(sa.uniform.dil)
     plot.acomp(sa.uniform.mix)
     pairs(sa.uniform.mix)


     sa.uniform5   <- sapply(c(clay=1,sand=1,gravel=1,humus=1,plant=1),rgamma,n=60)
     colnames(sa.uniform5) <- vars5

     plot.acomp(sa.uniform5)
     pairs(sa.uniform5)

     sa.uniform5.dil <- dilution(sa.uniform5)
     plot.acomp(sa.uniform5.dil)
     pairs(sa.uniform5.dil)

     sa.uniform5.mix <- seqmix(sa.uniform5.dil)
     plot.acomp(sa.uniform5.mix)
     pairs(sa.uniform5.mix)

     objects(pattern="sa.*")
      

