require(MASS) # Needed to generate multivariate random normals require(stats) # For factor analysis; but if this isn't loaded # you have bigger problems! factor.from.random.corr = function(d, n=1000, maxcor=2/(d-1), mincor=0, output="variance") { # Get a random correlation matrix sigma = make.corr.matrix(d,maxcor=maxcor,mincor=mincor, diag.dom=FALSE) # Multiply by the standard variance of 15^2 to go from the # correlation matrix to the covariance matrix varcovar = sigma*15*15 # set the means to 100 mu = rep(100,d) # Make up n random Gaussian vectors x = mvrnorm(n,mu,varcovar) # Do maximum-likelihood factor analysis with 1 factor my.fa = factanal(x,1) v = as.vector(my.fa\$loadings) switch(output, # Extract the fraction of variance described by the factor variance = sum(v^2)/d, loadings = v, everything = list(loadings=v,sigma=sigma,varexp=sum(v^2)/d,data=x), "I can't tell what you want me to output") } make.corr.matrix = function(d, maxcor=2/(d-1), mincor=0, diag.dom=FALSE) { # Create a random symmetric positive-definite matrix with # all non-negative entries to serve as a correlation matrix # It's easy to get something symmetric with non-negative # entries... # Use a rejection procedure: keep drawing from the easy # distribution until you get something positive-definite if (diag.dom) { # To make a diagonally-dominated matrix, create a test testf = function(m) { # A symmetric matrix which is diagonally dominated # is positive symmetric. "Diagonally dominated" # means (1) the diagonal entries are positive, and # (2) for each row, the sum of the absolute values of # the non-diagonal entries is less than the diagonal # entry. Since this is a correlation matrix, all of # the diagonal entries will be 1, taking care of the # first criterion, and simplifying the second. diag(m) = 0 # Doesn't change the outside matrix and # simplifies doing the row-sums diag.dominated = TRUE # Test each rom for diagonal domination for (i in 1:d) { # Loops are slow in r, but d is small here... rowsum = sum(m[i,]) # If any row fails, the whole matrix fails if (rowsum >= 1) { diag.dominated = FALSE } } return(diag.dominated) } } else { # Create any matrix so long as it's positive-definite # so define a test just for that testf = function(m) { # chol(m) attempts to perform the Cholesky # decomposition of m, which only makes sense if m # is symmetric and positive-definite. It gives # an error if m is not positive definite (but doesn't # check symmetry). try(expr,silent=TRUE) returns # the value of the expression, or nothing if it # produced an error. So try(chol(m),silent=TRUE) # either gives us a matrix, or nothing. So # wrapping the whole thing in is.matrix() tells # us whether m was positive definite or not. # Admittedly, it does waste some time by actually # doing the Cholesky decomp. on m if it can... p.d = is.matrix(try(chol(m),silent=TRUE)) return(p.d) } } pos.def = FALSE while (!pos.def) { sigma = make.symm.matrix(d, maxentry=maxcor, minentry=mincor) diag(sigma) = 1 pos.def = testf(sigma) } return(sigma) } # Create a symmetric matrix with uniformly-distributed positive # entries, and zeroes on the diagonal. make.symm.matrix = function(d,minentry=0,maxentry=1) { # Number of distinct components = no. in upper triangular # part above diagonal # (d-1) + (d-2) + ... + 1= (d-1)d/2 # Start with an all-zero matrix m = matrix(data=0,nrow=d,ncol=d) # Create the entries entries = runif(d*(d-1)/2,min=minentry,max=maxentry) for (i in 1:(d-1)) { for (j in (i+1):d) { m[i,j] = entries[1] m[j,i] = entries[1] entries = entries[-1] # Remove the first element from # the list } } return(m) }