Imputing binary missing data with hmi and mice -- Error: C stack usage 7969776 is too close to the limit - multilevel-analysis

I'm running HMI on two level data (students in courses) with missing data at the student level. The code throws the following error (Error: C stack usage 7969776 is too close to the limit) when I include a binary value (gender) with missing data.
Below is a reproducible example.
library(MCMCglmm)
library(hmi)
library(mice)
df <- data.frame(post = rtnorm(100,60,20,0,100),
pre = rtnorm(100,40,20,0,100),
gender = rbinom(n=100, size=1, prob=0.20),
course = rep( c("A","B","C","D"), 100*c(0.1,0.2,0.65,0.05) ))
df$post[1:round((0.3)*length(df$post),digits = 0)] <- NA
df$gender[round((0.2)*length(df$post),digits = 0):round((0.5)*length(df$post),
digits = 0)] <- NA
hmi_test <- hmi(df, model_formula = post ~ 1 + pre + gender + (1|course ),
M = 2, maxit = 5, list_of_types = NULL, nitt = 3000, burnin = 1000)
list_of_types_maker(df)

Related

Correlation of error terms in time-series model

I am reading this statistics book where they have mentioned that the attached top plot has no correlation between adjacent residuals. Whereas, the bottom most has correlation with p-0.9. Can anybody please provide some direction as to how to analyze this? Thank you very much for your time.
Correlated errors mean that the lag 1 correlation is p. That is, Cor(Yi, Yi-1) = p. This can be modelled using Yi = mu + p epsiloni-1 + epsiloni where epsiloni ~ N(0, 1) for all i. We can verify that the correlation between adjacent data points is p: Cov(Yi, Yi-1) = Cov(p epsiloni-1 + epsiloni, p epsiloni-2 + epsiloni-1) = Cov(p epsiloni-1, epsiloni-1) = p Var(epsiloni-1) = p. Code to demonstrate appears below:
set.seed(123)
epsilonX <- rnorm(100, 0, 1)
epsilonY <- rnorm(100, 0, 1)
epsilonZ <- rnorm(100, 0, 1)
X <- NULL
Y <- NULL
Z <- NULL
Y[1] <- epsilonY[1]
X[1] = epsilonX[1]
Z[1] = epsilonZ[1]
rhoX = 0
rhoY = 0.5
rhoZ = 0.9
for (i in 2:100) {
Y[i] <- rhoY * epsilonY[i-1] + epsilonY[i]
X[i] <- rhoX * epsilonX[i-1] + epsilonX[i]
Z[i] <- rhoZ * epsilonZ[i-1] + epsilonZ[i]
}
param = par(no.readonly = TRUE)
par(mfrow=c(3,1))
plot(X, type='o', xlab='', ylab='Residual', main=expression(rho*"=0.0"))
abline(0, 0, lty=2)
plot(Y, type='o', xlab='', ylab='Residual', main=expression(rho*"=0.5"))
abline(0, 0, lty=2)
plot(Z, type='o', xlab='', ylab='Residual', main=expression(rho*"=0.9"))
abline(0, 0, lty=2)
#par(param)
acf(X)
acf(Y)
acf(Z)
Note from the acf plots that the lag 1 correlation is insignificant for p = 0, higher for p = 0.5 data (~0.3), and still higher for p = 0.9 data (~0.5).

Error in FUN(X[[i]], ...) : object 'Year' not found when plotting ordination in ggplot

I am having an issue with the ggplot code line where R doesn't like the "group = Year".
Here is what my data looks like:
> head(data.scores.pa)
NMDS1 NMDS2 NMDS3 Site Year Elevation Fire history
1 -0.737547 0.73473457 0.7575643 BF 2004 1710 Burnt
......
> head(spp.scrs2)
species MDS1 MDS2 pval
1 Acrothamnus.montanus 0.8383 -0.02382347 1e-04
........
> head(vec.sp.df.pa)
MDS1 MDS2 species pvals
Elevation 0.834847 0.747474 Elevation 0.005
Here is the code I am using:
>xy <- ggplot(data.scores.pa, aes(x = NMDS1, y = NMDS2, group = Year)) +
geom_point(size = 3, aes(shape = Fire history, colour = Year))+
stat_ellipse(mapping = NULL, data = NULL, geom = "path", position = "identity", type = "t", level = 0.95, segments = 51, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) +
geom_segment(data=vec.sp.df.pa, aes(x=0,xend=MDS1,y=0,yend=MDS2),
arrow = arrow(length = unit(0.5,"cm")),colour="grey")+
geom_text_repel(data=vec.sp.df.pa,aes(x=MDS1,y=MDS2,label=species),size=2)+
geom_segment(data=spp.scrs2,aes(x=0,xend=MDS1,y=0,yend=MDS2),
arrow = arrow(length = unit(0.5, "cm")),colour="black")+
geom_text_repel(data=spp.scrs2, aes(x=MDS1,y=MDS2,label=species),size=2)+
annotate("text", x = -1.6, y = 1, label = paste0("3D stress: ", format(ord.pa$stress, digits = 4)), hjust = 0) +
theme_cowplot() + scale_color_brewer(palette = "BrBG", direction = 1) +
theme(panel.border = element_rect(colour = "black"))+
ggtitle("All Sites - distance data using Bray-Curtis")+
labs(x = "NMDS1", y = "NMDS2")
> Error in FUN(X[[i]], ...) : object 'Year' not found
However, when I remove the geom_segment and geom_text_repel code lines it fixes the problem and I am able to plot the graph...
Is anyone able to provide some insight into this issue?
Thank you!

How to send data frame from r to SQL?

#setwd('Desktop/IE332')
install.packages("wakefield")
install.packages("RMySQL")
install.packages("randomNames")
install.packages('password')
install.packages('OpenRepGrid')
library(RMySQL)
library(password)
library(wakefield)
library(randomNames)
industriesData <- read.csv('Industries.csv')
skills <- read.csv('Skills.csv')
sp500 <- read.csv("http://www.princeton.edu/~otorres/sandp500.csv")
companies <- sample(sp500$Name, 100)
locations <- c('Northwest', 'Midwest', 'Northeast', 'South', 'Southwest', 'Southeast',
'International') # Locations
gpas <- c(4,3.5,3,2.5,2)
n <- 100
locPrefs <- numeric(n)
studentSkills <- matrix(nrow=100,ncol=10)
studentInd <- matrix(nrow=100,ncol=5)
jobSkills <- matrix(nrow=100,ncol=5)
for(j in 1:n){ # Samples random skills assigned to students
studentSkills[j,] <- sample(skills[,1],10,replace=FALSE)
studentInd[j,] <- sample(industriesData[,1],5,replace=FALSE)
jobSkills[j,] <- sample(skills[,1],5,replace=FALSE)
}
studentData <- data.frame('first names'=randomNames(n, which.names = 'first'),'last
names'=randomNames(n, which.names = 'last'),'username'=seq(1,
n),'password'=password(8,numbers=TRUE),'gpa'=gpa(n, mean = 85.356, sd = 3.2, name =
"GPA"),'visa'=sample(c("N","Y"), size = n, replace = TRUE, prob = c(.78, .22)), 'loc
pref'=sample(locations,n,replace = TRUE), 'skill'=studentSkills, 'Industry'=studentInd) # Student data
employerData <- data.frame('company names'=companies, 'pref
gpa'=sample(gpas,n,replace=TRUE), 'sponser?'=sample(c('N','Y'), size=n, replace = TRUE, prob
= c(.78, .22)), 'job id'=sample(seq(100,999),n,replace=FALSE),'pref skill'=jobSkills,
'industry'=sample(industriesData[,1],n,replace=TRUE),'location'=sample(locations,n,replace =
TRUE)) # Employer data
I am trying to send certain columns of the studentData and employerData to tables in SQL, how would i go about doing that? I have a table named students where I would like to upload the first and last names of the studentsData data frame into this SQL table.

Vpython greyscreen crash

I have found many times a solution for my problems from here, but this time I am totally baffled. I don't know what's wrong at my code.
I made a code to create a box with charged particles inside with Vpython. As I launch the program, I get only a grey screen and the program crash. No error message, nothing.
from visual import *
from random import *
def electronizer(num):
list = []
electron_charge = -1.60217662e-19
electron_mass = 9.10938356e-31
for i in range(num):
another_list = []
e = sphere(pos=(random(), random(),random()), radius=2.818e-15,
color=color.cyan)
e.v = vector(random(), random(), random())
another_list.append(e)
another_list.append(e.v)
another_list.append(electron_charge)
another_list.append(electron_mass)
list.append(another_list)
return list
def protonizer(num):
list = []
proton_charge = 1.60217662e-19
proton_mass = 1.6726219e-27
for i in range(num):
another_list = []
p = sphere(pos=(random(), random(),random()), radius=0.8408739e-15, color=color.red)
p.v = vector(random(), random(), random())
another_list.append(p)
another_list.append(p.v)
another_list.append(proton_charge)
another_list.append(proton_mass)
list.append(another_list)
return list
def cross(a, b):
c = vector(a[1]*b[2] - a[2]*b[1],
a[2]*b[0] - a[0]*b[2],
a[0]*b[1] - a[1]*b[0])
return c
def positioner(work_list):
k = 8.9875517873681764e3 #Nm2/C2
G = 6.674e-11 # Nm2/kg2
vac_perm = 1.2566370614e-6 # H/m
pi = 3.14159265
dt = 0.1e-3
constant = 1
force = vector(0,0,0)
for i in range(len(work_list)):
for j in range(len(work_list)):
if i != j:
r = work_list[i][0].pos - work_list[j][0].pos
r_mag = mag(r)
r_norm = norm(r)
F = k * ((work_list[i][2] * work_list[j][2]) / (r_mag**2)) * r_norm
force += F
B = constant*(vac_perm / 4*pi) * (cross(work_list[j][2] * work_list[j][1], norm(r)))/r_mag**2
F = cross(work_list[i][2] * work_list[i][1], B)
force += F
F = -(G * work_list[i][3] * work_list[j][3]) / r_mag**2 * r_norm
force += F
acceleration = force / work_list[i][3]
difference_in_velocity = acceleration * dt
work_list[i][1] += difference_in_velocity
difference_in_position = work_list[i][1] * dt
work_list[i][0].pos += difference_in_position
if abs(work_list[i][0].pos[0]) > 2.5e-6:
work_list[i][1][0] = -work_list[i][1][0]
elif abs(work_list[i][0][1]) > 2.5e-6:
work_list[i][1][1] = -work_list[i][1][1]
elif abs(work_list[i][0][2]) > 2.5e-6:
work_list[i][1][2] = -work_list[i][1][2]
return work_list
box = box(pos=(0, 0, 0), length = 5e-6, width = 5e-6, height = 5e-6, opacity = 0.5)
protons_num = raw_input("number of protons: ")
electrons_num = raw_input("number of electrons: ")
list_of_electrons = electronizer(int(electrons_num))
list_of_protons = protonizer(int(protons_num))
work_list = list_of_electrons + list_of_protons
while True:
work_list = positioner(work_list)
You should ask your question on the VPython.org forum where the VPython experts hang out and will be able to answer your question. You should mention which operating system you are using and which version of python you are using. From your code I see that you are using classic VPython. There is a newer version of VPython 7 that just came out but the VPython syntax has changed.

How to remove scientific notation for Rplot chart

I developed this R-script to drive a decision flow Rplot chart, but I can't get it to show numeric values instead of scientific notation. I spent half of the work day yesterday trying to make it numeric by following examples I found on stackoverflow, but so far no luck. See code and screenshot for details.
#automatically convert columns with few unique values to factors
convertCol2factors<-function(data, minCount = 3)
{
for (c in 1:ncol(data))
if(is.logical(data[, c])){
data[, c] = as.factor(data[, c])
}else{
uc<-unique(data[, c])
if(length(uc) <= minCount)
data[, c] = as.factor(data[, c])
}
return(data)
}
#compute root node error
rootNodeError<-function(labels)
{
ul<-unique(labels)
g<-NULL
for (u in ul) g = c(g, sum(labels == u))
return(1-max(g)/length(labels))
}
# this function is almost identical to fancyRpartPlot{rattle}
# it is duplicated here because the call for library(rattle) may trigger GTK load,
# which may be missing on user's machine
replaceFancyRpartPlot<-function (model, main = "", sub = "", palettes, ...)
{
num.classes <- length(attr(model, "ylevels"))
default.palettes <- c("Greens", "Blues", "Oranges", "Purples",
"Reds", "Greys")
if (missing(palettes))
palettes <- default.palettes
missed <- setdiff(1:6, seq(length(palettes)))
palettes <- c(palettes, default.palettes[missed])
numpals <- 6
palsize <- 5
pals <- c(RColorBrewer::brewer.pal(9, palettes[1])[1:5],
RColorBrewer::brewer.pal(9, palettes[2])[1:5], RColorBrewer::brewer.pal(9,
palettes[3])[1:5], RColorBrewer::brewer.pal(9, palettes[4])[1:5],
RColorBrewer::brewer.pal(9, palettes[5])[1:5], RColorBrewer::brewer.pal(9,
palettes[6])[1:5])
if (model$method == "class") {
yval2per <- -(1:num.classes) - 1
per <- apply(model$frame$yval2[, yval2per], 1, function(x) x[1 +
x[1]])
}
else {
per <- model$frame$yval/max(model$frame$yval)
}
per <- as.numeric(per)
if (model$method == "class")
col.index <- ((palsize * (model$frame$yval - 1) + trunc(pmin(1 +
(per * palsize), palsize)))%%(numpals * palsize))
else col.index <- round(per * (palsize - 1)) + 1
col.index <- abs(col.index)
if (model$method == "class")
extra <- 104
else extra <- 101
rpart.plot::prp(model, type = 2, extra = extra, box.col = pals[col.index],
nn = TRUE, varlen = 0, faclen = 0, shadow.col = "grey",
fallen.leaves = TRUE, branch.lty = 3, ...)
title(main = main, sub = sub)
}
###############Upfront input correctness validations (where possible)#################
pbiWarning<-""
pbiInfo<-""
dataset <- dataset[complete.cases(dataset[, 1]), ] #remove rows with corrupted labels
dataset = convertCol2factors(dataset)
nr <- nrow( dataset )
nc <- ncol( dataset )
nl <- length( unique(dataset[, 1]))
goodDim <- (nr >=minRows && nc >= 2 && nl >= 2)
##############Main Visualization script###########
set.seed(randSeed)
opt = NULL
dtree = NULL
if(autoXval)
xval<-autoXvalFunc(nr)
dNames <- names(dataset)
X <- as.vector(dNames[-1])
form <- as.formula(paste('`', dNames[1], '`', "~ .", sep = ""))
# Run the model
if(goodDim)
{
for(a in 1:maxNumAttempts)
{
dtree <- rpart(form, dataset, control = rpart.control(minbucket = minBucket, cp = complexity, maxdepth = maxDepth, xval = xval)) #large tree
rooNodeErr <- rootNodeError(dataset[, 1])
opt <- optimalCPbyXError(as.data.frame(dtree$cptable))
dtree<-prune(dtree, cp = opt$CP)
if(opt$ind > 1)
break;
}
}
#info for classifier
if( showInfo && !is.null(dtree) && dtree$method == 'class')
pbiInfo <- paste("Rel error = ", d2form(opt$relErr * rooNodeErr),
"; CVal error = ", d2form(opt$xerror * rooNodeErr),
"; Root error = ", d2form(rooNodeErr),
";cp = ", d2form(opt$CP, 3), sep = "")
if(goodDim && opt$ind>1)
{
#fancyRpartPlot(dtree, sub = pbiInfo)
replaceFancyRpartPlot(dtree, sub = pbiInfo)
}else{
if( showWarnings )
pbiWarning <- ifelse(goodDim, paste("The tree depth is zero. Root error = ", d2form(rooNodeErr), sep = ""),
"Wrong data dimensionality" )
plot.new()
title( main = NULL, sub = pbiWarning, outer = FALSE, col.sub = "gray40" )
}
remove("dataset")
Also, how can I tell what "n" means from the photo below? (I copied this code from a project).
Try adding digits = -2 to the prp call in your code