search.conf file:
source app_main
{
type = pgsql
sql_host = localhost
sql_user = blizzard_moz455_1_3
sql_pass = adminpwd
sql_db = blizzard_moz455_1_3
sql_port = 5432
sql_query = \
SELECT "id", "header", "date", "is_paid", "text", 10 as content_type\
FROM app_main
sql_query_info = SELECT * FROM "app_main" WHERE "id" = $id
sql_attr_uint = content_type
sql_attr_timestamp = date
}
index app_main
{
source = app_main
path = D:/blizzard/Projects/Python/Web/moz455/app/sphinx
docinfo = extern
morphology = stem_enru
min_word_len = 2
charset_type = utf-8
html_strip = 1
html_remove_elements = script
min_prefix_len = 0
min_infix_len = 3
enable_star = 1
}
indexer
{
mem_limit = 32M
}
searchd
{
listen = 127.0.0.1:3312
log = searchd.log
query_log = query.log
read_timeout = 5
max_children = 30
pid_file = searchd.pid
max_matches = 1000
}
Output of the command "indexer --config sphinx.conf --all":
using config file 'sphinx.conf'...
indexing index 'app_main'...
collected 1 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 1 docs, 143 bytes
total 0.065 sec, 2172 bytes/sec, 15.19 docs/sec
total 2 reads, 0.000 sec, 2.5 kb/call avg, 0.0 msec/call avg
total 9 writes, 0.000 sec, 1.2 kb/call avg, 0.0 msec/call avg
I.e. no errors. But index files are not created.
"D:/blizzard/Projects/Python/Web/moz455/app/sphinx" isn't really a folder - last part ("sphinx") is a prefix to files. They were created in "app" folder: sphinx.spa, ..., sphinx.sps
Related
when I run my jags model, I got this error message :
module glm loaded
Error in jags.model(model.file, data = data, inits = init.values, n.chains = n.chains, :
Error parsing model file:
syntax error on line 5 near "="
And here is my code:
install.packages('R2jags')
install.packages('rjags')
library(rjags)
library('R2jags')
library(lattice)
binomial.model.JAGS = function(){
y ~ dbin(p,n)
p=lambda*mu+rho*(1-mu)
lambda ~ dunif(min = 0.2,max = 1.4)
mu ~ dunif(min = 0,max = 1)
rho ~ dunif(min = 0.1,max = 1.7)
}
n = 100000
y = 30000
data.JAGS = list(y = y, n = n)
inits.JAGS = list(list(lambda=0.8,mu=0.5,rho=0.9))
para.JAGS = c("p", "lambda", "mu", "rho")
fit.JAGS = jags(
data=data.JAGS,inits=inits.JAGS,
parameters.to.save=para.JAGS,
n.chains=1,
n.iter=9000,
n.burnin = 1000,
model.file = binomial.model.JAGS)
I really don't know where it went wrong. Does anybody could help me please? Thank you a lot!!
Just remove all the argument naming in your priors and you should be good to go.
binomial.model.JAGS = function(){
y ~ dbin(p,n)
p=lambda*mu+rho*(1-mu)
lambda ~ dunif(0.2,1.4)
mu ~ dunif(0,1)
rho ~ dunif(0.1,1.7)
}
n = 100000
y = 30000
data.JAGS = list(y = y, n = n)
inits.JAGS = list(list(lambda=0.8,mu=0.5,rho=0.9))
para.JAGS = c("p", "lambda", "mu", "rho")
fit.JAGS = jags(
data=data.JAGS,inits=inits.JAGS,
parameters.to.save=para.JAGS,
n.chains=1,
n.iter=9000,
n.burnin = 1000,
model.file = binomial.model.JAGS)
mu.vect sd.vect 2.5% 25% 50% 75% 97.5%
lambda 0.286 0.025 0.239 0.270 0.287 0.298 0.351
mu 0.936 0.064 0.758 0.930 0.957 0.977 0.996
p 0.300 0.001 0.297 0.299 0.300 0.301 0.303
rho 0.794 0.459 0.114 0.355 0.774 1.166 1.611
deviance 12.799 1.403 11.791 11.883 12.230 13.105 16.491
DIC info (using the rule, pD = var(deviance)/2)
pD = 1.0 and DIC = 13.8
DIC is an estimate of expected predictive error (lower deviance is better).
I have numbers in a range from -4 to 4, including 0, as in
-0.526350041828112
-0.125648350883331
0.991377353361933
1.079241128983
1.06322905224238
1.17477528478982
-0.0651086035371559
0.818471811380787
0.0355593553368815
I need to create histogram like buckets, and have being trying to use this
BEGIN { delta = (delta == "" ? 0.1 : delta) }
{
bucketNr = int(($0+delta) / delta)
cnt[bucketNr]++
numBuckets = (numBuckets > bucketNr ? numBuckets : bucketNr)
}
END {
for (bucketNr=1; bucketNr<=numBuckets; bucketNr++) {
end = beg + delta
printf "%0.1f %0.1f %d\n", beg, end, cnt[bucketNr]
beg = end
}
}
from Create bins with awk histogram-like
The output would look like
-2.4 -2.1 8
-2.1 -1.8 25
-1.8 -1.5 108
-1.5 -1.2 298
-1.2 -0.9 773
-0.9 -0.6 1067
-0.6 -0.3 1914
-0.3 0.0 4174
0.0 0.3 3969
0.3 0.6 2826
0.6 0.9 1460
0.9 1.2 752
1.2 1.5 396
1.5 1.8 121
1.8 2.1 48
2.1 2.4 13
2.4 2.7 1
2.7 3.0 1
I'm thinking I would have to run this 2x, one with delta let's say 0.3 and another with delta -0.3, and cat the two together.
But I'm not sure this intuition is correct.
This might work for you:
BEGIN { delta = (delta == "" ? 0.1 : delta) }
{
bucketNr = int(($0<0?$0-delta:$0)/delta)
cnt[bucketNr]++
maxBucket = (maxBucket > bucketNr ? maxBucket : bucketNr)
minBucket = (minBucket < bucketNr ? minBucket : bucketNr)
}
END {
beg = minBucket*delta
for (bucketNr=minBucket; bucketNr<=maxBucket; bucketNr++) {
end = beg + delta
printf "%0.1f %0.1f %d\n", beg, end, cnt[bucketNr]
beg = end
}
}
It's basically the code you posted + handling negative numbers.
I would like to create bins to get histogram with totals and percentage, e.g. starting from 0.
If possible to set the minimum and maximum value in the bins ( in my case value min=0 and max=20 )
Input file
8 5
10 1
11 4
12 4
12 4
13 5
16 7
18 9
16 9
17 7
18 5
19 5
20 1
21 7
output desired
0 0 0.0%
0 - 2 0 0.0%
2 - 4 0 0.0%
4 - 6 0 0.0%
6 - 8 0 0.0%
8 - 10 5 6.8%
10 - 12 5 6.8%
12 - 14 13 17.8%
14 - 16 0 0.0%
16 - 18 23 31.5%
18 - 20 19 26.0%
> 20 8 11.0%
---------------------
Total: 73
I use this code from Mr Ed Morton, it works perfectly but the percentage is missed.
awk 'BEGIN { delta = (delta == "" ? 2 : delta) }
{
bucketNr = int(($0+delta) / delta)
cnt[bucketNr]++
numBuckets = (numBuckets > bucketNr ? numBuckets : bucketNr)
}
END {
for (bucketNr=1; bucketNr<=numBuckets; bucketNr++) {
end = beg + delta
printf "%0.1f %0.1f %d\n", beg, end, cnt[bucketNr]
beg = end
}
}' file
Thanks in advance
Your expected output doesn't seem to correspond to your sample input data, but try this variation of that awk code in your question (Intended to be put in an executable file to run as a script, not a a one-liner due to size):
#!/usr/bin/awk -f
BEGIN { delta = (delta == "" ? 2 : delta) }
{
bucketNr = int(($0+delta) / delta)
cnt[bucketNr]++
max[bucketNr] = max[bucketNr] < $2 ? $2 : max[bucketNr]
sum += $2
numBuckets = (numBuckets > bucketNr ? numBuckets : bucketNr)
}
END {
for (bucketNr=1; bucketNr<=numBuckets; bucketNr++) {
end = beg + delta
printf "%d-%d %d %.1f\n", beg, end, max[bucketNr],
(cnt[bucketNr] / NR) * 100
beg = end
}
print "-------------"
print "Total " sum
}
It adds tracking the maximum of the second column for each bin the first column falls in, and prints out a percentage instead of a count of how many rows were in each bin. Plus some tweaks to the output format to better match your desired output.
I am trying to learn Bayes Network and I have a problem that I would like some clarification on.
Given the table
CPT
What would the p(Aggression=high|Anger=Partly,Hostility=Yes) be? My answer is 0.5.
My thought process is that Anger and Hostility are dependent, so according to the info given, the probability of partly anger and yes hostility is 0.5.
Aggression is independent of the two, so it would just be P(aggression)*0.5= 0.5.
Would this be a correct assumption?
Short answer: My value for p(Aggression=high|Anger=Partly,Hostility=Yes) is 100%.
If Aggression were indepent of Hostility and Anger, it would not matter what evidence you have.
So p(Aggression) was the maximum of the 3 values p(Agg=low), p(Agg=high), p(Agg=veryhigh).
However the 3*9 table implies p(Agg) = p(Hos, Ang) and it is not independent.
I have tried to model your CPT (upper table) with the free software "Samiam".
I doing so I've entered the values from the CPT for the Aggression node in Samiam.
For the priors: I am assuming someone who is in Anger 5% of the time, partly angry 15% of the time, and 80% not angry; and hostile 10% of the time, partly hostile 30% or not hostile 60% of the time.
See screenshots:
Table values for Aggression Node:
With Observed Evidence - Value of Aggression=High goes up to 100%:
I've also attached the samiam file:
net
{
propagationenginegenerator1791944048146838126L = "edu.ucla.belief.approx.BeliefPropagationSettings#20ece334";
recoveryenginegenerator6944530267470113528l = "edu.ucla.util.SettingsImpl#49f77e1b";
node_size = (130.0 55.0);
huginenginegenerator3061656038650325130L = "edu.ucla.belief.inference.JoinTreeSettings#71a1d859";
}
node Aggression
{
states = ("Low" "High" "VeryHigh" );
position = (268 -263);
diagnosistype = "AUXILIARY";
DSLxSUBMODEL = "Root Submodel";
ismapvariable = "false";
ID = "variable2";
label = "Aggression";
DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
excludepolicy = "include whole CPT";
}
node Anger
{
states = ("no" "partly" "yes" );
position = (118 -48);
diagnosistype = "AUXILIARY";
DSLxSUBMODEL = "Root Submodel";
ismapvariable = "false";
ID = "variable0";
label = "Anger";
DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
excludepolicy = "include whole CPT";
}
node Hostility
{
states = ("No" "Partly" "Yes" );
position = (351 -46);
diagnosistype = "AUXILIARY";
DSLxSUBMODEL = "Root Submodel";
ismapvariable = "false";
ID = "variable1";
label = "Hostility";
DSLxEXTRA_DEFINITIONxDIAGNOSIS_TYPE = "AUXILIARY";
excludepolicy = "include whole CPT";
}
potential ( Aggression | Anger Hostility )
{
data = ((( 1.0 0.0 0.0 )
( 0.5 0.5 0.0 )
( 0.5 0.0 0.5 ))
(( 0.5 0.5 0.0 )
( 0.5 0.5 0.0 )
( 0.0 1.0 0.0 ))
(( 0.5 0.0 0.5 )
( 0.0 0.5 0.5 )
( 0.0 0.0 1.0 )));
}
potential ( Anger | )
{
data = ( 0.8 0.15 0.05 );
}
potential ( Hostility | )
{
data = ( 0.6 0.3 0.1 );
}
I have a file that looks like this:
> loc.38167 h3k4me1 1.8299 1.5343 0.0 0.0 1.8299 1.5343 0.0 ....
> loc.08652 h3k4me3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ....
I want to plot 500 random 'loc.' points on a graph. Each loc. has 100 values. I use the following python script:
file = open('h3k4me3.tab.data')
data = {}
for line in file:
cols = line.strip().split('\t')
vals = map(float,cols[2:])
data[cols[0]] = vals
file.close
randomA = data.keys()[:500]
window = int(math.ceil(5000.0 / 100))
xticks = range(-2500,2500,window)
sns.tsplot([data[k] for k in randomA],time=xticks)
However, I get
ValueError: arrays must all be same length