I have a code in OpenBUGS but the error is "variable CR is not defined" - openbugs

model
{
for( i in 1 : N ) {
dgf[i] ~ dbin(p[i],n[i])
logit(p[i]) <- a[subject[i]] + beta[1] * CR[i]
}
for (j in 1:94)
{
a[j]~dnorm(beta0,prec.tau)
}
beta[1] ~ dnorm(0.0,.000001)
beta0 ~ dnorm(0.0,.000001)
prec.tau ~ dgamma(0.001,.001)
tau<-sqrt(1/prec.tau)
}
list(
n=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
dgf=c(0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0),
subject=c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63,64,64,65,65,66,66,67,67,68,68,69,69,70,70,71,71,72,72,73,73,74,74,75,75,76,76,77,77,78,78,79,79,80,80,81,81,82,82,83,83,84,84,85,85,86,86,87,87,88,88,89,89,90,90,91,91,92,92,93,93,94,94),
CR=c(NA,NA,1.41,0.85,1.13,0.65,NA,NA,2.13,1.61,7.31,3.8,1.65,2.32,1.13,2.3,0.99,1.5,1.32,3.95,7.2,2.97,0.83,1.55,NA,6.5,0.89,1.2,1.52,7,8.68,7.41,NA,0.86,NA,1.92,NA,1.31,7.8,1.78,NA,1.67,NA,NA,NA,NA,NA,2.25,0.98,0.82,3.94,1.14,12,2.58,2.42,2.59,NA,NA,NA,NA,6.6,3.22,NA,2.02,2.43,1.96,0.82,1.64,1.81,1.53,1.01,5.21,8.33,1.14,1.49,6,5.6,2,3.33,4.08,NA,NA,1.14,1.25,0.85,5.42,0.85,0.65,1.02,1.33,1.1,1.12,NA,NA,1.53,1.76,2,0.85,2.9,5,4.09,2.68,0.98,1.48,0.66,0.57,5.72,2.34,0.93,2.39,1.39,1.44,4.77,2.39,1.79,1.2,0.81,1.25,4.69,1.22,1.92,1.48,2.46,NA,NA,2.53,1.12,1.74,3.45,1.22,1.27,2.61,1.75,0.82,NA,1.4,NA,5.1,1.24,1.5,1.94,1.24,1.04,1.24,NA,2.39,NA,2.07,2.19,1.6,6,6.38,1.17,1.2,5.62,6.39,1.82,1.31,NA,1.18,3.71,2.03,5.4,2.17,NA,1.94,1.57,1.44,1.35,1.63,1.24,1.54,1.5,NA,NA,NA,NA,1.44,NA,2.19,7.98,2.15,1.71,1.45,NA,0.98,2.37,1.58), N = 188)
I know that the error is because of "NA" in variable "CR" but i don't know how to solve it. i'll appreciate any help.

You have 2 options:
1) Remove the missing CR and the corresponding elements of n, dgf, and subject (and reduce N accordingly).
2) Define a stochastic relation for CR within your model, so that the model estimates the missing CR and uses these estimates in the logistic regression. Something like:
for(i in 1:N){
CR[i] ~ dnorm(CR_mu, CR_tau)
}
CR_mu ~ dnorm(0, 10^-6)
CR_tau ~ drama(0.001, 0.001)
CR_mu and CR_tau are probably not of interest but can be monitored if you want.
Note that both approaches assume that CR are missing at random (and not e.g. censored) - if CR are missing not at random this will give you biased results.
Matt

Related

"Error: Attempt to redefine node" in Mixture that changes size every iteration

My data has three columns Time, Interval, Count. I have a mixture of Poissons that goes like this
mod_string = " model{
for(i in 2:length(Count)){
Count[i] ~ dpois(lambda.hacked[i]*z[i]+0.0001)
z[i] ~dbern(p)
lambda.hacked[i] <- mu[ clust[i] ]
Prob <- p^-(1:i) * (1-p) / p
mu <- (Time[1:i] - Interval[1:i])*lambda
clust[i] ~ dcat( Prob)
}
## Priors
lambda ~ dgamma(0.01,0.02)
p ~ dbeta(1,1)
}"
mu changes size at every iteration. As i grows, the number of clusters also grows.
How can I adapt this?

rjags error Invalid vector argument to ilogit

I'd like to compare a betareg regression vs. the same regression using rjags
library(betareg)
d = data.frame(p= sample(c(.1,.2,.3,.4),100, replace= TRUE),
id = seq(1,100,1))
# I am looking to reproduce this regression with jags
b=betareg(p ~ id, data= d,
link = c("logit"), link.phi = NULL, type = c("ML"))
summary(b)
Below I am trying to do the same regression with rjags
#install.packages("rjags")
library(rjags)
jags_str = "
model {
#model
y ~ dbeta(alpha, beta)
alpha <- mu * phi
beta <- (1-mu) * phi
logit(mu) <- a + b*id
#priors
a ~ dnorm(0, .5)
b ~ dnorm(0, .5)
t0 ~ dnorm(0, .5)
phi <- exp(t0)
}"
id = d$id
y = d$p
model <- jags.model(textConnection(jags_str),
data = list(y=y,id=id)
)
update(model, 10000, progress.bar="none"); # Burnin for 10000 samples
samp <- coda.samples(model,
variable.names=c("mu"),
n.iter=20000, progress.bar="none")
summary(samp)
plot(samp)
I get an error on this line
model <- jags.model(textConnection(jags_str),
data = list(y=y,id=id)
)
Error in jags.model(textConnection(jags_str), data = list(y = y, id = id)) :
RUNTIME ERROR:
Invalid vector argument to ilogit
Can you advise
(1) how to fix the error
(2) how to set priors for the beta regression
Thank you.
This error occurs because you have supplied the id vector to the scalar function logit. In Jags inverse link functions cannot be vectorized. To address this, you need to use a for loop to go through each element of id. To do this I would probably add an additional element to your data list that denotes how long id is.
d = data.frame(p= sample(c(.1,.2,.3,.4),100, replace= TRUE),
id = seq(1,100,1), len_id = length(seq(1,100,1)))
From there you just need to make a small edit to your jags code.
for(i in 1:(len_id)){
y[i] ~ dbeta(alpha[i], beta[i])
alpha[i] <- mu[i] * phi
beta[i] <- (1-mu[i]) * phi
logit(mu[i]) <- a + b*id[i]
}
However, if you track mu it is going to be a matrix that is 20000 (# of iterations) by 100 (length of id). You are likely more interested in the actual parameters (a, b, and phi).

Dirichlet-Multinomial WinBUGS code

I'm trying to code a dirichlet-multinomial model using BUGS.
Basically I have 18 regions and 3 categories per region. In example,
Region 1: 0.50 belongs to Low, 0.30 belongs to Middle, and 0.20 belongs to High. The list goes on to Region 18 of course with varying proportions.The only code I got is this
`model {
for (i in 1:N) {
x[1:3] ~ dmulti(p[],n[i])
p[1:3] ~ ddirch(alpha[])
}
for (k in 1:3) {
alpha[k] <- 1
}
}
DATA list(n=c(38483, 2259, 1900),x=c(29256.42719, 1857.431404, 1548.007808, 29256.42719, 1857.431404, 1548.007808, 29256.42719, 1857.431404, 1548.007808), N=3)`
I shortened it to 3 regions first just for example. It states 'Dirichlet36' after clicking 'gen inits'. Please help me to code this.
This may be helpful (source):
Learning about the parameters of a Dirichlet distribution
Suppose as part of a model there are J probability arrays p[j, 1:K], j = 1, ..., J, where K is the dimension of each array and sum(p[j, 1:K]) = 1 for all j. We give each of them a Dirichlet prior:
p[j, 1:K] ~ ddirch(alpha[])
and we would like to learn about alpha[]. However, the parameters alpha[] of a Dirichlet distribution cannot be stochastic nodes. The trick is to note that if delta[k] ~ dgamma(alpha[k], 1), then the vector with elements delta[k] / sum(delta[1:K]), k = 1, ..., K, is Dirichlet with parameters alpha[k], k = 1, ..., K. So the following construction should allow learning about the parameters alpha[]:
for (k in 1:K) {
p[j, k] <- delta[j, k] / sum(delta[j,])
delta[j, k] ~ dgamma(alpha[k], 1)
}
A prior can be put directly on the alpha[k]'s.

Why the jags result and depmixS4 are sometimes different?

I have a data set like the following simulated data:
Pi = matrix(c(0.9,0.1,0.3,0.7),2,2,byrow=TRUE)
delta = c(.5,.5)
z = sample(c(1,2),1,prob=delta)
T = 365
for( t in 2:T){
z[t] = sample(x=c(1,2),1,prob=Pi[z[t-1],])
}
x <- sample(x=seq(-1, 1.5, length.out=T),T,replace=TRUE)
alpha = c(-1, -3.2)
Beta = c(-4,3)
y<-NA
for(i in 1:T){
y[i] = rbinom(1,size=10,prob=1/(1+exp(-Beta[z[i]]*x[i]-alpha[z[i]])))
}
SimulatedBinomData <- data.frame('y' = y, 'x' = x , size=rep(10,T), 'z' = z)
yy<-NA
xx<-NA
for(i in 1:dim(SimulatedBinomData)[1]){
yy<-c(yy,c(rep(1,SimulatedBinomData$y[i]),rep(0,(SimulatedBinomData$size[i]-SimulatedBinomData$y[i]))))
xx<-c(xx,rep(SimulatedBinomData$x[i],SimulatedBinomData$size[i]))
}
yy<-yy[-1]
xx<-xx[-1]
SimulatedBernolliData<-data.frame(y=yy,x=xx, tt=rep(c(1:T),rep(10,T)))
This is a HMM problem with two states meaning that the Hidden Markov chain z_t belongs to {1,2}. To estimate alpha and Beta in two different states I can use the package 'depmixS4' and find the Maximum Likelihood estimates or I can use MCMC in 'rjags' package.
I expect that these two estimations be almost the same while when I run the following program for different simulated data, in several times, the answers are not the same and very different!!
library("rjags")
library("depmixS4")
mod <- depmix(cbind(y,(size-y))~x, data=SimulatedBinomData, nstates=2, family=binomial(logit))
fm <- fit(mod)
getpars(fm)
n<-length(SimulatedBernolliData$y)
T<-max(SimulatedBernolliData$tt)
cat("model {
# Transition Probability
Ptrans[1,1:2] ~ ddirch(a)
Ptrans[2,1:2] ~ ddirch(a)
# States
Pinit[1] <- 0.5 #failor
Pinit[2] <- 0.5 #success
state[1] ~ dbern(Pinit[2])
for (t in 2:T) {
state[t] ~ dbern(Ptrans[(state[t-1]+1),2])
}
# Parameters
alpha[1] ~ dunif(-1.e10, 1.e10)
alpha[2] ~ dunif(-1.e10, 1.e10)
Beta[1] ~ dunif(-1.e10, 1.e10)
Beta[2] ~ dunif(-1.e10, 1.e10)
# Observations
for (i in 1:n){
z[i] <- state[tt[i]]
y[i] ~ dbern(1/(1+exp(-(alpha[(z[i]+1)]+Beta[(z[i]+1)]*x[i]))))
}
}",
file="LeftBehindHiddenMarkov.bug")
jags <- jags.model('LeftBehindHiddenMarkov.bug', data = list('x' = SimulatedBernolliData$x, 'y' = SimulatedBernolliData$y, 'tt' = SimulatedBernolliData$tt, T=T, n = n, a = c(1,1) ))
res <- coda.samples(jags,c('alpha', 'Beta', 'Ptrans','state'),1000)
res.median = apply(res[[1]],2,median)
res.median[1:8]
res.mean = apply(res[[1]],2,mean)
res.mean[1:8]
res.sd = apply(res[[1]],2,sd)
res.sd[1:8]
res.mode = apply(res[[1]],2,function(x){as.numeric(names(table(x))
[which.max(table(x))]) })
res.mode[1:8]
You are having a problem of label switching in your JAGS code, that is, states z[i]=1 is not bounded to the lower posterior value for Beta and z[i]=2 to the higher Beta. Therefore, for each iteration of the MCMC they can switch. There are several ways to solve this problem. One of them is the partial reordering, that is, for every MCMC iteration, draw two independent values for Beta and order them so that Beta[1] < Beta[2].
You can do that by substituting
Beta[1] ~ dunif(-1.e10, 1.e10)
Beta[2] ~ dunif(-1.e10, 1.e10)
for
Beta[1:2] <- sort(Betaaux)
Betaaux[1] ~ dunif(-1.e10, 1.e10)
Betaaux[2] ~ dunif(-1.e10, 1.e10)
Of course, the ordering could also be done on the alpha parameters instead. The election of which parameter to use for the partial reordering depends on the problem.

Checking if lines intersect and if so return the coordinates

I've written some code below to check if two line segments intersect and if they do to tell me where. As input I have the (x,y) coordinates of both ends of each line. It appeared to be working correctly but now in the scenario where line A (532.87,787.79)(486.34,769.85) and line B (490.89,764.018)(478.98,783.129) it says they intersect at (770.136, 487.08) when the lines don't intersect at all.
Has anyone any idea what is incorrect in the below code?
double dy[2], dx[2], m[2], b[2];
double xint, yint, xi, yi;
WsqT_Location_Message *location_msg_ptr = OPC_NIL;
FIN (intersect (<args>));
dy[0] = y2 - y1;
dx[0] = x2 - x1;
dy[1] = y4 - y3;
dx[1] = x4 - x3;
m[0] = dy[0] / dx[0];
m[1] = dy[1] / dx[1];
b[0] = y1 - m[0] * x1;
b[1] = y3 - m[1] * x3;
if (m[0] != m[1])
{
//slopes not equal, compute intercept
xint = (b[0] - b[1]) / (m[1] - m[0]);
yint = m[1] * xint + b[1];
//is intercept in both line segments?
if ((xint <= max(x1, x2)) && (xint >= min(x1, x2)) &&
(yint <= max(y1, y2)) && (yint >= min(y1, y2)) &&
(xint <= max(x3, x4)) && (xint >= min(x3, x4)) &&
(yint <= max(y3, y4)) && (yint >= min(y3, y4)))
{
if (xi && yi)
{
xi = xint;
yi = yint;
location_msg_ptr = (WsqT_Location_Message*)op_prg_mem_alloc(sizeof(WsqT_Location_Message));
location_msg_ptr->current_latitude = xi;
location_msg_ptr->current_longitude = yi;
}
FRET(location_msg_ptr);
}
}
FRET(location_msg_ptr);
}
There is an absolutely great and simple theory about lines and their intersections that is based on adding an extra dimensions to your points and lines. In this theory a line can be created from two points with one line of code and the point of line intersection can be calculated with one line of code. Moreover, points at the Infinity and lines at the Infinity can be represented with real numbers.
You probably heard about homogeneous representation when a point [x, y] is represented as [x, y, 1] and the line ax+by+c=0 is represented as [a, b, c]?
The transitioning to Cartesian coordinates for a general homogeneous representation of a point [x, y, w] is [x/w, y/w]. This little trick makes all the difference including representation of lines at infinity (e.g. [1, 0, 0]) and making line representation look similar to point one. This introduces a GREAT symmetry into formulas for numerous line/point manipulation and is an
absolute MUST to use in programming. For example,
It is very easy to find line intersections through vector product
p = l1xl2
A line can be created from two points is a similar way:
l=p1xp2
In the code of OpenCV it it just:
line = p1.cross(p2);
p = line1.cross(line2);
Note that there are no marginal cases (such as division by zero or parallel lines) to be concerned with here. My point is, I suggest to rewrite your code to take advantage of this elegant theory about lines and points.
Finally, if you don't use openCV, you can use a 3D point class and create your own cross product function similar to this one:
template<typename _Tp> inline Point3_<_Tp> Point3_<_Tp>::cross(const Point3_<_Tp>& pt) const
{
return Point3_<_Tp>(y*pt.z - z*pt.y, z*pt.x - x*pt.z, x*pt.y - y*pt.x);
}