Undefined variable error in optimization program using Julia, JuMP, DataFrames - dataframe

Trying to update this optimization program that worked fine with julia 1.6.2, but now
returns the following error using julia 1.7.2.
The error occurs in line 24.
#constraint(m, sum(pitchers_lineup[i], i=1:num_pitchers) == 1)
using DataFrames
using GLPK
using JuMP
using CSV
num_lineups = 25
num_overlap = 3
path_hitters = "Hitters.csv"
path_pitchers = "Pitchers.csv"
path_to_output = "output.csv"
# This is a function that creates one lineup using the Type 4 formulation from the paper
function one_lineup_Type_4(hitters, pitchers, lineups, num_overlap, num_hitters, num_pitchers, catcher, first_baseman, second_baseman, third_baseman, shortstop, outfielders, num_teams, hitters_teams, pitchers_opponents)
m = Model(GLPK.Optimizer)
# Variable for Hitters in lineup
#variable(m, hitters_lineup[i=1:num_hitters], Bin)
# Variable for Pitcher in lineup
#variable(m, pitchers_lineup[i=1:num_pitchers], Bin)
# One Pitcher constraint
#constraint(m, sum(pitchers_lineup[i], i=1:num_pitchers) == 1)
# Eight Hitters constraint
#constraint(m, sum(hitters_lineup[i], i=1:num_hitters) == 8)
# between 1 and 2 catchers + first baseman
#constraint(m, sum((catcher)[i]*hitters_lineup[i], i=1:num_hitters) <= 2)
#constraint(m, 1 <= sum(catcher[i]*hitters_lineup[i], i=1:num_hitters))
# between 1 and 2 second basemen
#constraint(m, sum(second_baseman[i]*hitters_lineup[i], i=1:num_hitters) <= 2)
#constraint(m, 1 <= sum(second_baseman[i]*hitters_lineup[i], i=1:num_hitters))
# between 1 and 2 third basemen
#constraint(m, sum(third_baseman[i]*hitters_lineup[i], i=1:num_hitters) <= 2)
#constraint(m, 1 <= sum(third_baseman[i]*hitters_lineup[i], i=1:num_hitters))
# between 1 and 2 shortstops
#constraint(m, sum(shortstop[i]*hitters_lineup[i], i=1:num_hitters) <= 2)
#constraint(m, 1 <= sum(shortstop[i]*hitters_lineup[i], i=1:num_hitters))
# between 3 and 4 outfielders
#constraint(m, 3 <= sum(outfielders[i]*hitters_lineup[i], i=1:num_hitters))
#constraint(m, sum(outfielders[i]*hitters_lineup[i], i=1:num_hitters) <= 4)
# Financial Constraint
#constraint(m, sum(hitters[i,:Salary]*hitters_lineup[i], i=1:num_hitters) + sum(pitchers[i,:Salary]*pitchers_lineup[i], i=1:num_pitchers) <= 35000)
# exactly x different teams for the 8 hitters constraint
#variable(m, used_team[i=1:num_teams], Bin)
constr = #constraint(m, [i=1:num_teams], used_team[i] <= sum(hitters_teams[t, i]*hitters_lineup[t], t=1:num_hitters))
constr = #constraint(m, [i=1:num_teams], sum(hitters_teams[t, i]*hitters_lineup[t], t=1:num_hitters) == 4*used_team[i])
#constraint(m, sum(used_team[i], i=1:num_teams) == 2)
# No pitchers going against hitters
constr = #constraint(m, [i=1:num_pitchers], 6*pitchers_lineup[i] + sum(pitchers_opponents[k, i]*hitters_lineup[k], k=1:num_hitters)<=6)
# Overlap Constraint
constr = #constraint(m, [i=1:size(lineups)[2]], sum(lineups[j,i]*hitters_lineup[j], j=1:num_hitters) + sum(lineups[num_hitters+j,i]*pitchers_lineup[j], j=1:num_pitchers) <= num_overlap)
# Objective
#objective(m, Max, sum(hitters[i,:FPPG]*hitters_lineup[i], i=1:num_hitters) + sum(pitchers[i,:FPPG]*pitchers_lineup[i], i=1:num_pitchers) )
# Solve the integer programming problem
println("Solving Problem...")
print("\n")
status = solve(m);
# Puts the output of one lineup into a format that will be used later
if status==:Optimal
hitters_lineup_copy = Array(Int64)(0)
for i=1:num_hitters
if getvalue(hitters_lineup[i]) >= 0.9 && getvalue(hitters_lineup[i]) <= 1.1
hitters_lineup_copy = vcat(hitters_lineup_copy, fill(1,1))
else
hitters_lineup_copy = vcat(hitters_lineup_copy, fill(0,1))
end
end
for i=1:num_pitchers
if getvalue(pitchers_lineup[i]) >= 0.9 && getvalue(pitchers_lineup[i]) <= 1.1
hitters_lineup_copy = vcat(hitters_lineup_copy, fill(1,1))
else
hitters_lineup_copy = vcat(hitters_lineup_copy, fill(0,1))
end
end
return(hitters_lineup_copy)
end
end
#=
formulation is the type of formulation that you would like to use. Feel free to customize the formulations. In our paper we considered
the Type 4 formulation in great detail, but we have included the code for all of the formulations dicussed in the paper here. For instance,
if you would like to create lineups without stacking, change one_lineup_Type_4 below to one_lineup_no_stacking
=#
formulation = one_lineup_Type_4
function create_lineups(num_lineups, num_overlap, path_hitters, path_pitchers, formulation, path_to_output)
#=
num_lineups is an integer that is the number of lineups
num_overlap is an integer that gives the overlap between each lineup
path_hitters is a string that gives the path to the hitters csv file
path_pitchers is a string that gives the path to the pitchers csv file
formulation is the type of formulation you would like to use (for instance one_lineup_Type_1, one_lineup_Type_2, etc.)
path_to_output is a string where the final csv file with your lineups will be
=#
# Load information for hitters table
hitters = CSV.read(path_hitters, DataFrames.DataFrame)
# Load information for pitchers table
pitchers = CSV.read(path_pitchers, DataFrames.DataFrame)
# Number of hitters
num_hitters = size(hitters)[1]
# Number of pitchers
num_pitchers = size(pitchers)[1]
# catchers stores the information on which players are catchers
catcher = Int64[]
# first baseman stores the information on which players are first baseman
first_baseman = Int64[]
# second baseman stores the information on which players are second baseman
second_baseman = Int64[]
# third baseman stores the information on which players are third baseman
third_baseman = Int64[]
# shortstop stores the information on which players are shortsops
shortstop = Int64[]
# outfielders stores the information on which players are outfielders
outfielders = Int64[]
#=
Process the position information in the hitters file to populate C, 1B, 2B, 3B, SS & OF's with the
corresponding correct information
=#
for i =1:num_hitters
if hitters[i,:Position] == "C"
catcher=vcat(catcher,fill(1,1))
first_baseman=vcat(first_baseman,fill(0,1))
second_baseman=vcat(second_baseman,fill(0,1))
third_baseman=vcat(third_baseman,fill(0,1))
shortstop=vcat(shortstop,fill(0,1))
outfielders=vcat(outfielders,fill(0,1))
elseif hitters[i,:Position] == "1B"
catcher=vcat(catcher,fill(1,1))
first_baseman=vcat(first_baseman,fill(0,1))
second_baseman=vcat(second_baseman,fill(0,1))
third_baseman=vcat(third_baseman,fill(0,1))
shortstop=vcat(shortstop,fill(0,1))
outfielders=vcat(outfielders,fill(0,1))
elseif hitters[i,:Position] == "2B"
catcher=vcat(catcher,fill(0,1))
first_baseman=vcat(first_baseman,fill(0,1))
second_baseman=vcat(second_baseman,fill(1,1))
third_baseman=vcat(third_baseman,fill(0,1))
shortstop=vcat(shortstop,fill(0,1))
outfielders=vcat(outfielders,fill(0,1))
elseif hitters[i,:Position] == "3B"
catcher=vcat(catcher,fill(0,1))
first_baseman=vcat(first_baseman,fill(0,1))
second_baseman=vcat(second_baseman,fill(0,1))
third_baseman=vcat(third_baseman,fill(1,1))
shortstop=vcat(shortstop,fill(0,1))
outfielders=vcat(outfielders,fill(0,1))
elseif hitters[i,:Position] == "SS"
catcher=vcat(catcher,fill(0,1))
first_baseman=vcat(first_baseman,fill(0,1))
second_baseman=vcat(second_baseman,fill(0,1))
third_baseman=vcat(third_baseman,fill(0,1))
shortstop=vcat(shortstop,fill(1,1))
outfielders=vcat(outfielders,fill(0,1))
else
catcher=vcat(catcher,fill(0,1))
first_baseman=vcat(first_baseman,fill(0,1))
second_baseman=vcat(second_baseman,fill(0,1))
third_baseman=vcat(third_baseman,fill(0,1))
shortstop=vcat(shortstop,fill(0,1))
outfielders=vcat(outfielders,fill(1,1))
end
end
catcher = catcher+first_baseman
# Create team indicators from the information in the hitters file
teams = unique(hitters[!, :Team])
# Total number of teams
num_teams = size(teams)[1]
# player_info stores information on which team each player is on
player_info = zeros(Int, size(teams)[1])
# Populate player_info with the corresponding information
for j=1:size(teams)[1]
if hitters[1, :Team] == teams[j]
player_info[j] =1
end
end
hitters_teams = player_info'
for i=2:num_hitters
player_info = zeros(Int, size(teams)[1])
for j=1:size(teams)[1]
if hitters[i, :Team] == teams[j]
player_info[j] =1
end
end
hitters_teams = vcat(hitters_teams, player_info')
end
# Create pitcher identifiers so you know who they are playing
opponents = pitchers[!, :Opponent]
pitchers_teams = pitchers[!, :Team]
pitchers_opponents=[]
for num = 1:size(teams)[1]
if opponents[1] == teams[num]
pitchers_opponents = hitters_teams[:, num]
end
end
for num = 2:size(opponents)[1]
for num_2 = 1:size(teams)[1]
if opponents[num] == teams[num_2]
pitchers_opponents = hcat(pitchers_opponents, hitters_teams[:,num_2])
end
end
end
# Lineups using formulation as the stacking type
the_lineup= formulation(hitters, pitchers, hcat(zeros(Int, num_hitters + num_pitchers), zeros(Int, num_hitters + num_pitchers)), num_overlap, num_hitters, num_pitchers, catcher, first_baseman, second_baseman, third_baseman, shortstop, outfielders, num_teams, hitters_teams, pitchers_opponents)
the_lineup2 = formulation(hitters, pitchers, hcat(the_lineup, zeros(Int, num_hitters + num_pitchers)), num_overlap, num_hitters, num_pitchers, catcher, first_baseman, second_baseman, third_baseman, shortstop, outfielders, num_teams, hitters_teams, pitchers_opponents)
tracer = hcat(the_lineup, the_lineup2)
for i=1:(num_lineups-2)
try
thelineup=formulation(hitters, pitchers, tracer, num_overlap, num_hitters, num_pitchers, catcher, first_baseman, second_baseman, third_baseman, shortstop, outfielders, num_teams, hitters_teams, pitchers_opponents)
tracer = hcat(tracer,thelineup)
catch
break
end
end
# Create the output csv file
lineup2 = ""
for j = 1:size(tracer)[2]
lineup = ["" "" "" "" "" "" "" "" ""]
for i =1:num_hitters
if tracer[i,j] == 1
if catcher[i]==1
if lineup[2]==""
lineup[2] = string(hitters[i,1])
elseif lineup[9] ==""
lineup[9] = string(hitters[i,1])
end
elseif first_baseman[i] == 1
if lineup[2] == ""
lineup[2] = string(hitters[i,1])
elseif lineup[9] == ""
lineup[9] = string(hitters[i,1])
end
elseif second_baseman[i] == 1
if lineup[3] == ""
lineup[3] = string(hitters[i,1])
elseif lineup[9] == ""
lineup[9] = string(hitters[i,1])
end
elseif third_baseman[i] == 1
if lineup[4] == ""
lineup[4] = string(hitters[i,1])
elseif lineup[9] == ""
lineup[9] = string(hitters[i,1])
end
elseif shortstop[i] == 1
if lineup[5] == ""
lineup[5] = string(hitters[i,1])
elseif lineup[9] == ""
lineup[9] = string(hitters[i,1])
end
elseif outfielders[i] == 1
if lineup[6] == ""
lineup[6] = string(hitters[i,1])
elseif lineup[7] == ""
lineup[7] = string(hitters[i,1])
elseif lineup[8] == ""
lineup[8] = string(hitters[i,1])
elseif lineup[9] == ""
lineup[9] = string(hitters[i,1])
end
end
end
end
for i =1:num_pitchers
if tracer[num_hitters+i,j] == 1
lineup[1] = string(pitchers[i,1])
end
end
for name in lineup
lineup2 = string(lineup2, name, ",")
end
lineup2 = chop(lineup2)
lineup2 = string(lineup2, """
""")
end
outfile = open(path_to_output, "w")
write(outfile, lineup2)
close(outfile)
end
# Running the code
create_lineups(num_lineups, num_overlap, path_hitters, path_pitchers, formulation, path_to_output)
UndefVarError: i not defined
Stacktrace:
[1] macro expansion
# ~/.julia/packages/MutableArithmetics/0Y9ZS/src/rewrite.jl:279 [inlined]
[2] macro expansion
# ~/.julia/packages/JuMP/klrjG/src/macros.jl:676 [inlined]
[3] one_lineup_Type_4(hitters::DataFrame, pitchers::DataFrame, lineups::Matrix{Int64}, num_overlap::Int64, num_hitters::Int64, num_pitchers::Int64, catcher::Vector{Int64}, first_baseman::Vector{Int64}, second_baseman::Vector{Int64}, third_baseman::Vector{Int64}, shortstop::Vector{Int64}, outfielders::Vector{Int64}, num_teams::Int64, hitters_teams::Matrix{Int64}, pitchers_opponents::Matrix{Int64})
# Main ./In[30]:24
[4] create_lineups(num_lineups::Int64, num_overlap::Int64, path_hitters::String, path_pitchers::String, formulation::typeof(one_lineup_Type_4), path_to_output::String)
# Main ./In[30]:265
[5] top-level scope
# In[30]:348
[6] eval
# ./boot.jl:373 [inlined]
[7] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
# Base ./loading.jl:1196

The correct syntax is
sum(pitchers_lineup[i] for i=1:num_pitchers)
Note the for, not a comma.

Related

Solving 15Puzzle with Julia

I'm trying to use Julia to solve the common tile game 15 Puzzle using Julia using A* algorithm. I am quite new to the language and my style may seem very C like. When I try the following code, I run out of memory. I'm not sure if its related to the use of a pointer style in my structs or just bad design.
struct Node
parent
f::Int64
board::Array{Int64,1}
end
function findblank(A::Array{Int64,1})
x = size(A,1)
for i = 1:x
if A[i] == x
return i
end
end
return -1
end
function up(A::Array{Int64,1})
N = size(A,1)
Nsq = isqrt(N)
blank = findblank(A)
B = copy(A)
if blank / Nsq <= 1
return nothing
end
B[blank-Nsq],B[blank] = B[blank],B[blank-Nsq]
return B
end
function down(A::Array{Int64,1})
N = size(A,1)
Nsq = isqrt(N)
blank = findblank(A)
B = copy(A)
if (blank / Nsq) > (Nsq -1)
return nothing
end
B[blank+Nsq],B[blank] = B[blank],B[blank+Nsq]
return B
end
function left(A::Array{Int64,1})
N = size(A,1)
Nsq = isqrt(N)
blank = findblank(A)
B = copy(A)
if (blank % Nsq) == 1
return nothing
end
B[blank-1],B[blank] = B[blank],B[blank-1]
return B
end
function right(A::Array{Int64,1})
N = size(A,1)
Nsq = isqrt(N)
blank = findblank(A)
B = copy(A)
if (blank % Nsq) == 0
return nothing
end
B[blank+1],B[blank] = B[blank],B[blank+1]
return B
end
function manhattan(A::Array{Int64,1})
N = size(A,1)
Nsq = isqrt(N)
r = 0
for i in 1:N
if (A[i]==i || A[i]==N)
continue
end
row1 = floor((A[i]-1) / Nsq)
col1 = (A[i]-1) % Nsq
row2 = floor((i-1) / Nsq)
col2 = (i-1) % Nsq
r+= abs(row1 - row2) + abs(col1 - col2)
end
return r
end
# start = [1,2,3,4,5,6,7,9,8]
# start = [6,5,4,1,7,3,9,8,2] #26 moves
start = [7,8,4,11,12,14,10,15,16,5,3,13,2,1,9,6] # 50 moves
goal = [x for x in 1:length(start)]
# println("The manhattan distance of $start is $(manhattan(start))")
g = 0
f = g + manhattan(start)
pq = PriorityQueue()
actions = [up,down,left,right]
dd = Dict{Array{Int64,1},Int64}()
snode = Node(C_NULL,f,start)
enqueue!(pq,snode,f)
pos_seen = 0
moves = 0
while (!isempty(pq))
current = dequeue!(pq)
if haskey(dd,current.board)
continue
else
push!(dd, current.board =>current.f)
end
if (current.board == goal)
while(current.board != start)
println(current.board)
global moves +=1
current = current.parent[]
end
println(start)
println("$start solved in $moves moves after looking at $pos_seen positions")
break
end
global pos_seen+=1
global g+=1
for i in 1:4
nextmove = actions[i](current.board)
if (nextmove === nothing || nextmove == current.board || haskey(dd,nextmove))
continue
else
global f = g+manhattan(nextmove)
n = Node(Ref(current),f,nextmove)
enqueue!(pq,n,f)
end
end
end
println("END")

How to speed up simple linear algebra optimization probelm in Julia?

I implemented the LSDD changepoint detection method decribed in [1] in Julia, to see if I could make it faster than the existing python implementation [2], which is based on a grid search that looks for the optimal parameters.
I obtain the desired results but despite my best efforts, my grid search version of it takes about the same time to compute as the python one, which is still way too long for real applications.
I also tried using the Optimize package which only makes things worse (2 or 3 times slower).
Here is the grid search that I implemented :
using Random
using LinearAlgebra
function squared_distance(X::Array{Float64,1},C::Array{Float64,1})
sqd = zeros(length(X),length(C))
for i in 1:length(X)
for j in 1:length(C)
sqd[i,j] = X[i]^2 + C[j]^2 - 2*X[i]*C[j]
end
end
return sqd
end
function lsdd(x::Array{Float64,1},y::Array{Float64,1}; folds = 5, sigma_list = nothing , lambda_list = nothing)
lx,ly = length(x), length(y)
b = min(lx+ly,300)
C = shuffle(vcat(x,y))[1:b]
CC_dist2 = squared_distance(C,C)
xC_dist2, yC_dist2 = squared_distance(x,C), squared_distance(y,C)
Tx,Ty = length(x) - div(lx,folds), length(y) - div(ly,folds)
#Define the training and testing data sets
cv_split1, cv_split2 = floor.(collect(1:lx)*folds/lx), floor.(collect(1:ly)*folds/ly)
cv_index1, cv_index2 = shuffle(cv_split1), shuffle(cv_split2)
tr_idx1,tr_idx2 = [findall(x->x!=i,cv_index1) for i in 1:folds], [findall(x->x!=i,cv_index2) for i in 1:folds]
te_idx1,te_idx2 = [findall(x->x==i,cv_index1) for i in 1:folds], [findall(x->x==i,cv_index2) for i in 1:folds]
xTr_dist, yTr_dist = [xC_dist2[i,:] for i in tr_idx1], [yC_dist2[i,:] for i in tr_idx2]
xTe_dist, yTe_dist = [xC_dist2[i,:] for i in te_idx1], [yC_dist2[i,:] for i in te_idx2]
if sigma_list == nothing
sigma_list = [0.25, 0.5, 0.75, 1, 1.2, 1.5, 2, 2.5, 2.2, 3, 5]
end
if lambda_list == nothing
lambda_list = [1.00000000e-03, 3.16227766e-03, 1.00000000e-02, 3.16227766e-02,
1.00000000e-01, 3.16227766e-01, 1.00000000e+00, 3.16227766e+00,
1.00000000e+01]
end
#memory prealocation
score_cv = zeros(length(sigma_list),length(lambda_list))
H = zeros(b,b)
hx_tr, hy_tr = [zeros(b,1) for i in 1:folds], [zeros(b,1) for i in 1:folds]
hx_te, hy_te = [zeros(1,b) for i in 1:folds], [zeros(1,b) for i in 1:folds]
#h_tr,h_te = zeros(b,1), zeros(1,b)
theta = zeros(b)
for (sigma_idx,sigma) in enumerate(sigma_list)
#the expression of H is different for higher dimension
#H = sqrt((sigma^2)*pi)*exp.(-CC_dist2/(4*sigma^2))
set_H(H,CC_dist2,sigma,b)
#check if the sum is performed along the right dimension
set_htr(hx_tr,xTr_dist,sigma,Tx), set_htr(hy_tr,yTr_dist,sigma,Ty)
set_hte(hx_te,xTe_dist,sigma,lx-Tx), set_hte(hy_te,yTe_dist,sigma,ly-Ty)
for i in 1:folds
h_tr = hx_tr[i] - hy_tr[i]
h_te = hx_te[i] - hy_te[i]
#set_h(h_tr,hx_tr[i],hy_tr[i],b)
#set_h(h_te,hx_te[i],hy_te[i],b)
for (lambda_idx,lambda) in enumerate(lambda_list)
set_theta(theta,H,lambda,h_tr,b)
score_cv[sigma_idx,lambda_idx] += dot(theta,H*theta) - 2*dot(theta,h_te)
end
end
end
#retrieve the value of the optimal parameters
sigma_chosen = sigma_list[findmin(score_cv)[2][2]]
lambda_chosen = lambda_list[findmin(score_cv)[2][2]]
#calculating the new "optimal" solution
H = sqrt((sigma_chosen^2)*pi)*exp.(-CC_dist2/(4*sigma_chosen^2))
H_lambda = H + lambda_chosen*Matrix{Float64}(I, b, b)
h = (1/lx)*sum(exp.(-xC_dist2/(2*sigma_chosen^2)),dims = 1) - (1/ly)*sum(exp.(-yC_dist2/(2*sigma_chosen^2)),dims = 1)
theta_final = H_lambda\transpose(h)
f = transpose(theta_final).*sum(exp.(-vcat(xC_dist2,yC_dist2)/(2*sigma_chosen^2)),dims = 1)
L2 = 2*dot(theta_final,h) - dot(theta_final,H*theta_final)
return L2
end
function set_H(H::Array{Float64,2},dist::Array{Float64,2},sigma::Float64,b::Int16)
for i in 1:b
for j in 1:b
H[i,j] = sqrt((sigma^2)*pi)*exp(-dist[i,j]/(4*sigma^2))
end
end
end
function set_theta(theta::Array{Float64,1},H::Array{Float64,2},lambda::Float64,h::Array{Float64,2},b::Int64)
Hl = (H + lambda*Matrix{Float64}(I, b, b))
LAPACK.posv!('L', Hl, h)
theta = h
end
function set_htr(h::Array{Float64,1},dists::Array{Float64,2},sigma::Float64,T::Int16)
for (CVidx,dist) in enumerate(dists)
for (idx,value) in enumerate((1/T)*sum(exp.(-dist/(2*sigma^2)),dims = 1))
h[CVidx][idx] = value
end
end
end
function set_hte(h::Array{Float64,1},dists::Array{Float64,2},sigma::Array{Float64,1},T::Int16)
for (CVidx,dist) in enumerate(dists)
for (idx,value) in enumerate((1/T)*sum(exp.(-dist/(2*sigma^2)),dims = 1))
h[CVidx][idx] = value
end
end
end
function set_h(h,h1,h2,b)
for i in 1:b
h[i] = h1[i] - h2[i]
end
end
The set_H, set_h and set_theta functions are there because I read somewhere that modifying prealocated memory in place with a function was faster, but it did not make a great difference.
To test it, I use two random distribution as input data :
x,y = rand(500),1.5*rand(500)
lsdd(x,y) #returns a value around 0.3
Now here is the version of the code where I try to use Optimizer :
function Theta(sigma::Float64,lambda::Float64,x::Array{Float64,1},y::Array{Float64,1},folds::Int8)
lx,ly = length(x), length(y)
b = min(lx+ly,300)
C = shuffle(vcat(x,y))[1:b]
CC_dist2 = squared_distance(C,C)
xC_dist2, yC_dist2 = squared_distance(x,C), squared_distance(y,C)
#the subsets are not be mutually exclusive !
Tx,Ty = length(x) - div(lx,folds), length(y) - div(ly,folds)
shuffled_x, shuffled_y = [shuffle(1:lx) for i in 1:folds], [shuffle(1:ly) for i in 1:folds]
cv_index1, cv_index2 = floor.(collect(1:lx)*folds/lx)[shuffle(1:lx)], floor.(collect(1:ly)*folds/ly)[shuffle(1:ly)]
tr_idx1,tr_idx2 = [i[1:Tx] for i in shuffled_x], [i[1:Ty] for i in shuffled_y]
te_idx1,te_idx2 = [i[Tx:end] for i in shuffled_x], [i[Ty:end] for i in shuffled_y]
xTr_dist, yTr_dist = [xC_dist2[i,:] for i in tr_idx1], [yC_dist2[i,:] for i in tr_idx2]
xTe_dist, yTe_dist = [xC_dist2[i,:] for i in te_idx1], [yC_dist2[i,:] for i in te_idx2]
score_cv = 0
Id = Matrix{Float64}(I, b, b)
H = sqrt((sigma^2)*pi)*exp.(-CC_dist2/(4*sigma^2))
hx_tr, hy_tr = [transpose((1/Tx)*sum(exp.(-dist/(2*sigma^2)),dims = 1)) for dist in xTr_dist], [transpose((1/Ty)*sum(exp.(-dist/(2*sigma^2)),dims = 1)) for dist in yTr_dist]
hx_te, hy_te = [(lx-Tx)*sum(exp.(-dist/(2*sigma^2)),dims = 1) for dist in xTe_dist], [(ly-Ty)*sum(exp.(-dist/(2*sigma^2)),dims = 1) for dist in yTe_dist]
for i in 1:folds
h_tr, h_te = hx_tr[i] - hy_tr[i], hx_te[i] - hy_te[i]
#theta = (H + lambda * Id)\h_tr
theta = copy(h_tr)
Hl = (H + lambda*Matrix{Float64}(I, b, b))
LAPACK.posv!('L', Hl, theta)
score_cv += dot(theta,H*theta) - 2*dot(theta,h_te)
end
return score_cv,(CC_dist2,xC_dist2,yC_dist2)
end
function cost(params::Array{Float64,1},x::Array{Float64,1},y::Array{Float64,1},folds::Int8)
s,l = params[1],params[2]
return Theta(s,l,x,y,folds)[1]
end
"""
Performs the optinization
"""
function lsdd3(x::Array{Float64,1},y::Array{Float64,1}; folds = 4)
start = [1,0.1]
b = min(length(x)+length(y),300)
lx,ly = length(x),length(y)
#result = optimize(params -> cost(params,x,y,folds),fill(0.0,2),fill(50.0,2),start, Fminbox(LBFGS(linesearch=LineSearches.BackTracking())); autodiff = :forward)
result = optimize(params -> cost(params,x,y,folds),start, BFGS(),Optim.Options(f_calls_limit = 5, iterations = 5))
#bboptimize(rosenbrock2d; SearchRange = [(-5.0, 5.0), (-2.0, 2.0)])
#result = optimize(cost,[0,0],[Inf,Inf],start, Fminbox(AcceleratedGradientDescent()))
sigma_chosen,lambda_chosen = Optim.minimizer(result)
CC_dist2, xC_dist2, yC_dist2 = Theta(sigma_chosen,lambda_chosen,x,y,folds)[2]
H = sqrt((sigma_chosen^2)*pi)*exp.(-CC_dist2/(4*sigma_chosen^2))
h = (1/lx)*sum(exp.(-xC_dist2/(2*sigma_chosen^2)),dims = 1) - (1/ly)*sum(exp.(-yC_dist2/(2*sigma_chosen^2)),dims = 1)
theta_final = (H + lambda_chosen*Matrix{Float64}(I, b, b))\transpose(h)
f = transpose(theta_final).*sum(exp.(-vcat(xC_dist2,yC_dist2)/(2*sigma_chosen^2)),dims = 1)
L2 = 2*dot(theta_final,h) - dot(theta_final,H*theta_final)
return L2
end
No matter, which kind of option I use in the optimizer, I always end up with something too slow. Maybe the grid search is the best option, but I don't know how to make it faster... Does anyone have an idea how I could proceed further ?
[1] : http://www.mcduplessis.com/wp-content/uploads/2016/05/Journal-IEICE-2014-CLSDD-1.pdf
[2] : http://www.ms.k.u-tokyo.ac.jp/software.html

Trying to do a gaussian bell in Scilab

I'm trying to do a Gaussian bell using the data I am obtaining from a matrix but everytime I try to run the program I obtain this message:
"Error: syntax error, unexpected identifier, expecting end"
The data used to obtain the gaussina bell is a matrix which includes the last point of every n displacements, which are the last position of a particle. I want to know if there is an easier way to obtain the gaussian bell in scilab because I have to also do a fit with an histogram using the same data.
function bla7()
t=4000
n=1000
l=0.067
p=%pi*2
w1=zeros(t,1);
w2=zeros(t,1);
for I=1:t
a=(grand(n,1,"unf",0,p));
x=l*cos(a)
y=l*sin(a)
z1=zeros(n,1);
z2=zeros(n,1);
for i=2:n
z1(i)=z1(i-1)+x(i);
z2(i)=z2(i-1)+y(i);
end
w1(I)=z1($)
w2(I)=z2($)
end
n=10000
w10=zeros(t,1);
w20=zeros(t,1);
for I=1:t
a=(grand(n,1,"unf",0,p));
x=l*cos(a)
y=l*sin(a)
z1=zeros(n,1);
z2=zeros(n,1);
for i=2:n
z1(i)=z1(i-1)+x(i);
z2(i)=z2(i-1)+y(i);
end
w10(I)=z1($)
w20(I)=z2($)
end
n=100
w100=zeros(t,1);
w200=zeros(t,1);
for I=1:t
a=(grand(n,1,"unf",0,p));
x=l*cos(a)
y=l*sin(a)
z1=zeros(n,1);
z2=zeros(n,1);
for i=2:n
z1(i)=z1(i-1)+x(i);
z2(i)=z2(i-1)+y(i);
end
w100(I)=z1($)
w200(I)=z2($)
end
k=70
v=12/k
c1=zeros(k,1)
for r=1:t
c=w1(r)
m=-6+v
n=-6
for g=1:k
if (c<m & c>=n) then
c1(g)=c1(g)+1
m=m+v
n=n+v
else
m=m+v
n=n+v
end
end
end
c2=zeros(k,1)
c2(1)=-6+(6/k)
for b=2:k
c2(b)=c2(b-1)+v
end
y = stdev(w1)
normal1=zeros(k,1)
normal2=zeros(k,1)
bb=-6
bc=-6+v
for wa=1:k
bd=(bb+bc)/2
gauss1=(1/(y*sqrt(2*%pi)))exp(-0.5(bb/y)^2)
gauss2=(1/(y*sqrt(2*%pi)))exp(-0.5(bc/y)^2)
gauss3=(1/(y*sqrt(2*%pi)))exp(-0.5(bd/y)^2)
gauss4=((bc-bb)/6)*(gauss1+gauss2+4*gauss3)
bb=bb+v
bc=bc+v
normal2(wa,1)=gauss4
end
normal3=normal2*4000
k=100
v=24/k
c10=zeros(k,1)
for r=1:t
c=w10(r)
m=-12+v
n=-12
for g=1:k
if (c<m & c>=n) then
c10(g)=c10(g)+1
m=m+v
n=n+v
else
m=m+v
n=n+v
end
end
end
c20=zeros(k,1)
c20(1)=-12+(12/k)
for b=2:k
c20(b)=c20(b-1)+v
end
y = stdev(w10)
normal10=zeros(k,1)
normal20=zeros(k,1)
bb=-12
bc=-12+v
for wa=1:k
bd=(bb+bc)/2
gauss10=(1/(y*sqrt(2*%pi)))exp(-0.5(bb/y)^2)
gauss20=(1/(y*sqrt(2*%pi)))exp(-0.5(bc/y)^2)
gauss30=(1/(y*sqrt(2*%pi)))exp(-0.5(bd/y)^2)
gauss40=((bc-bb)/6)*(gauss10+gauss20+4*gauss30)
bb=bb+v
bc=bc+v
normal20(wa,1)=gauss40
end
normal30=normal20*4000
k=70
v=12/k
c100=zeros(k,1)
for r=1:t
c=w100(r)
m=-6+v
n=-6
for g=1:k
if (c<m & c>=n) then
c100(g)=c100(g)+1
m=m+v
n=n+v
else
m=m+v
n=n+v
end
end
end
c200=zeros(k,1)
c200(1)=-6+(6/k)
for b=2:k
c200(b)=c200(b-1)+v
end
y = stdev(w100)
normal100=zeros(k,1)
normal200=zeros(k,1)
bb=-6
bc=-6+v
for wa=1:k
bd=(bb+bc)/2
gauss100=(1/(y*sqrt(2*%pi)))exp(-0.5(bb/y)^2)
gauss200=(1/(y*sqrt(2*%pi)))exp(-0.5(bc/y)^2)
gauss300=(1/(y*sqrt(2*%pi)))exp(-0.5(bd/y)^2)
gauss400=((bc-bb)/6)*(gauss100+gauss200+4*gauss300)
bb=bb+v
bc=bc+v
normal200(wa,1)=gauss400
end
normal300=normal200*4000
bar(c20,c10,1.0,'white')
plot(c20, normal30, 'b-')
bar(c2,c1,1.0,'white')
plot(c2, normal3, 'r-')
bar(c200,c100,1.0,'white')
plot(c200, normal300, 'm-')
poly1.thickness=3;
xlabel(["x / um"]);
ylabel("molecules");
gcf().axes_size=[500,500]
a=gca();
a.zoom_box=[-12,12;0,600];
a.font_size=4;
a.labels_font_size=5;
a.x_label.font_size = 5;
a.y_label.font_size = 5;
ticks = a.x_ticks
ticks.labels =["-12";"-10";"-8";"-6";"-4";"-2";"0";"2";"4";"6";"8";"10";"12"]
ticks.locations = [-12;-10;-8;-6;-4;-2;0;2;4;6;8;10;12]
a.x_ticks = ticks
endfunction
Each and every one of your gauss variables are missing the multiplication operator in two places. Check every line at it will run. For example, this:
gauss1=(1/(y*sqrt(2*%pi)))exp(-0.5(bb/y)^2)
should be this:
gauss1=(1/(y*sqrt(2*%pi))) * exp(-0.5 * (bb/y)^2)
As for the Gaussian bell, there is no standard function in Scilab. However, you could define a new function to make things more clear in your case:
function x = myGauss(s,b_)
x = (1/(s*sqrt(2*%pi)))*exp(-0.5*(b_/s)^2)
endfunction
Actually, while we're at it, your whole code is really difficult to read. You should define functions instead of repeating code: it helps clarify what you mean, and if there is a mistake, you need to fix only one place. Also, I personally do not recommend that you enclose everything in a function like bla7() because it makes things harder to debug. Your example could be rewritten like this:
The myGauss function;
A function w_ to calculate w1, w2, w10, w20, w100 and w200;
A function c_ to calculate c1, c2, c10, c20, c100 and c200;
A function normal_ to calculate normal1, normal2, normal10, normal20, normal100 and normal200;
Call all four functions as many times as needed with different inputs for different results.
If you do that, your could will look like this:
function x = myGauss(s,b_)
x = (1 / (s * sqrt(2 * %pi))) * exp(-0.5 * (b_/s)^2);
endfunction
function [w1_,w2_] = w_(t_,l_,n_,p_)
w1_ = zeros(t_,1);
w2_ = zeros(t_,1);
for I = 1 : t_
a = (grand(n_,1,"unf",0,p_));
x = l_ * cos(a);
y = l_ * sin(a);
z1 = zeros(n_,1);
z2 = zeros(n_,1);
for i = 2 : n_
z1(i) = z1(i-1) + x(i);
z2(i) = z2(i-1) + y(i);
end
w1_(I) = z1($);
w2_(I) = z2($);
end
endfunction
function [c1_,c2_] = c_(t_,k_,v_,w1_,x_)
c1_ = zeros(k_,1)
for r = 1 : t_
c = w1_(r);
m = -x_ + v_;
n = -x_;
for g = 1 : k_
if (c < m & c >= n) then
c1_(g) = c1_(g) + 1;
m = m + v_;
n = n + v_;
else
m = m + v_;
n = n + v_;
end
end
end
c2_ = zeros(k_,1);
c2_(1) = -x_ + (x_/k_);
for b = 2 : k_
c2_(b) = c2_(b-1) + v_;
end
endfunction
function [normal1_,normal2_,normal3_] = normal_(k_,bb_,bc_,v_,w1_)
y = stdev(w1_);
normal1_ = zeros(k_,1);
normal2_ = zeros(k_,1);
for wa = 1 : k_
bd_ = (bb_ + bc_) / 2;
gauss1 = myGauss(y,bb_);
gauss2 = myGauss(y,bc_);
gauss3 = myGauss(y,bd_);
gauss4 = ((bc_ - bb_) / 6) * (gauss1 + gauss2 + 4 * gauss3);
bb_ = bb_ + v_;
bc_ = bc_ + v_;
normal2_(wa,1) = gauss4;
end
normal3_ = normal2_ * 4000;
endfunction
t = 4000;
l = 0.067;
p = 2 * %pi;
n = 1000;
k = 70;
v = 12 / k;
x = 6;
bb = -x;
bc = -x + v;
[w1,w2] = w_(t,l,n,p);
[c1,c2] = c_(t,k,v,w1,x);
[normal1,normal2,normal3] = normal_(k,bb,bc,v,w1);
bar(c2,c1,1.0,'white');
plot(c2, normal3, 'r-');
n = 10000;
k = 100;
v = 24 / k;
x = 12;
bb = -x;
bc = -x + v;
[w10,w20] = w_(t,l,n,p);
[c10,c20] = c_(t,k,v,w10,x);
[normal10,normal20,normal30] = normal_(k,bb,bc,v,w10);
bar(c20,c10,1.0,'white');
plot(c20, normal30, 'b-');
n = 100;
k = 70;
v = 12 / k;
x = 6;
bb = -x;
bc = -x + v;
[w100,w200] = w_(t,l,n,p);
[c100,c200] = c_(t,k,v,w100,x);
[normal100,normal200,normal300] = normal_(k,bb,bc,v,w100);
bar(c200,c100,1.0,'white');
plot(c200, normal300, 'm-');
poly1.thickness=3;
xlabel(["x / um"]);
ylabel("molecules");
gcf().axes_size=[500,500]
a=gca();
a.zoom_box=[-12,12;0,600];
a.font_size=4;
a.labels_font_size=5;
a.x_label.font_size = 5;
a.y_label.font_size = 5;
ticks = a.x_ticks
ticks.labels =["-12";"-10";"-8";"-6";"-4";"-2";"0";"2";"4";"6";"8";"10";"12"]
ticks.locations = [-12;-10;-8;-6;-4;-2;0;2;4;6;8;10;12]
a.x_ticks = ticks

Code or Logic to find number of char appearances in a string composed of consecutive numbers

I am struggling with this exercise where I have to find a number (y) so that when counting the times (nr) the value "1" appears in a string (x) composed of all the consecutive numbers starting from 1 to y, the following conditions are met: nr=y and nr is divisible by 10.
example:
x (string with consecutive from 1 to 12)= 123456789101112
y (the number) = 12
nr (times of "1" appearances) = 5
so i need to find the situation where nr=y and y mod 10 = 0
I've tried creating a vba sub to do this, but it takes forever and cannot seem to find a suitable result:
Sub abc2()
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Dim i As Double
Dim y As Double
Dim nr As Double
Dim x As String
x = 1
y = 1
For i = 1 To 500001
x = x & (y + 1)
y = y + 1
nr = Len(x) - Len(Replace(x, "1", ""))
If nr = y And nr Mod 10 = 0 Then
Range("E1") = y
GoTo out
End If
Next i
out:
Range("A1") = x
Range("B1") = y
Range("C1") = nr
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
End Sub
I'd really appreciate some suggestions. Maybe it can be solved in some other ingenious way.
Thank you!
Python:
x = ''
y = 0
####################################
# BRUTE FORCE, FINDS ANSWER 199990 #
####################################
#for iteration in range(100000):
# for index in range(10):
# y+=1
# x+=str(y)
# if (y == x.count('1')):
# print 'Found: ' + str(y) + ': ' + x
####################################
# More elegantly and efficiently, just track how many '1's we've added in each step
ones = 0
for iteration in range(100000):
x = ''
for index in range(10):
y += 1
x += str(y)
ones += x.count('1')
if (y == ones):
print 'Found: ' + str(y)
The commented-out solution takes about 2 minutes to execute. The second solution finishes in .46 seconds.

Trying to count all symbols and numbers in a word file. Program outputs symbols but not how many times they appear

(Visual Basic)
This is the word file I'm reading from:
`#+/084&"
#3*#%#+
8%203:
,1$&
!-*%
.#&33&
#*#71%
&-&641'2
#))85
9&330*
Download link: http://www.filehosting.org/file/details/465979/words.txt
I am trying to find all the different characters and symbols inside the word files, and then count them, and output them as a frequency. For example ("The symbol '#' appears (8) times"), ("The number(0) appears (3) times") etc.
I am using a 2 dimensional array and storing the symbols in the first column and the amount of times they appear in the second.
This is my current code:
Sub Main()
Dim UncodedWords(10) As String
Dim Symcheck(19, 3) As String
Dim X As Integer = 0
Symcheck(0, 0) = ("+")
Symcheck(0, 1) = ("0")
Symcheck(1, 0) = ("/")
Symcheck(1, 1) = ("0")
Symcheck(2, 0) = ("’")
Symcheck(2, 1) = ("0")
Symcheck(3, 0) = ("&")
Symcheck(3, 1) = ("0")
Symcheck(4, 0) = (":")
Symcheck(4, 1) = ("0")
Symcheck(5, 0) = ("$")
Symcheck(5, 1) = ("0")
Symcheck(6, 0) = ("-")
Symcheck(6, 1) = ("0")
Symcheck(7, 0) = ("!")
Symcheck(7, 1) = ("0")
Symcheck(8, 0) = (".")
Symcheck(8, 1) = ("0")
Symcheck(9, 0) = ("""")
Symcheck(9, 1) = ("0")
Symcheck(10, 0) = ("0")
Symcheck(10, 1) = ("0")
Symcheck(11, 0) = ("1")
Symcheck(11, 1) = ("0")
Symcheck(12, 0) = ("2")
Symcheck(12, 1) = ("0")
Symcheck(13, 0) = ("3")
Symcheck(13, 1) = ("0")
Symcheck(14, 0) = ("4")
Symcheck(14, 1) = ("0")
Symcheck(15, 0) = ("5")
Symcheck(15, 1) = ("0")
Symcheck(16, 0) = ("6")
Symcheck(16, 1) = ("0")
Symcheck(17, 0) = ("7")
Symcheck(17, 1) = ("0")
Symcheck(18, 0) = ("8")
Symcheck(18, 1) = ("0")
Symcheck(19, 0) = ("9")
Symcheck(19, 1) = ("0")
Dim Newtext(10) As String
Dim FileLoc As String = "C:\Users\Downloads\words.txt"
Dim StringReader As New StreamReader(FileLoc, FileMode.Open)
For Counter = 0 To 9 ' for each line in the file
UncodedWords(Counter) = StringReader.ReadLine
Next
For Counter = 0 To 9 ' for each word in the file
For length = 1 To Len(UncodedWords(Counter)) - 1
For Counter2 = 0 To 19 ' for each symbol in symcheck
If UncodedWords(Counter).Contains(Symcheck(Counter2, 0)) Then
X += 1
Else
End If
WriteLine(Symcheck(Counter2, 0))
WriteLine(Symcheck(Counter2, X))
Next
Next
Next
End Sub
End Module
There are a lot of ways to do this.
One simple, though not necessarily efficient, method is to begin with an empty Symcheck. Then check each character in the input string. If you've encountered the character before (i.e., if it's in Symcheck; use IndexOf), increment its counter. Otherwise, add it to Symcheck with zero count.
This is what a hash map (hash table) is used for.
Loop through each character in the txt file, and perform a check. If the key exists in the hash map, then increment that keys value, else add the key with a value of 0.
Psuedocode:
For each letter in txtFile
If (HashMap.KeyExists(letter)) then
HashMap(letter).Value += 1
Else
HashMap.Add(letter, 0)
End If
Next