Sudoku Solution Using Multiprocessing - python-multiprocessing

I tried sudoku solution using backtracking, but it was taking a lot time around 12sec to give output. I tried to implement a multiprocessing technique but it's taking lot more time than that of backtracking. I never ran it completely it's too slow. Please suggest what am I missing? Even better if someone can also tell me how to run this through my GPU. (using CUDA).
import concurrent.futures
import copy
A = [[0]*9 for _ in range(9)]
A[0][6] = 2
A[1][1] = 8
A[1][5] = 7
A[1][7] = 9
A[2][0] = 6
A[2][2] = 2
A[2][6] = 5
A[3][1] = 7
A[3][4] = 6
A[4][3] = 9
A[4][5] = 1
A[5][4] = 2
A[5][7] = 4
A[6][2] = 5
A[6][6] = 6
A[6][8] = 3
A[7][1] = 9
A[7][3] = 4
A[7][7] = 7
A[8][2] = 6
Boards = [A]
L = []
for i in range(9):
for j in range(9):
if A[i][j] == 0:
L.append([i,j])
def RC_Check(A,Value,N):
global L
i,j = L[N]
for x in range(9):
if A[x][j] == Value:
return False
if A[i][x] == Value:
return False
return True
def Square_Check(A,Value,N):
global L
i,j = L[N]
X, Y = int(i/3)*3,int(j/3)*3
for x in range(X,X+3):
for y in range(Y,Y+3):
if A[x][y] == Value:
return False
return True
def New_Boards(Board,N):
global L
i,j = L[N]
Boards = []
with concurrent.futures.ProcessPoolExecutor() as executor:
RC_Process = executor.map(RC_Check,[Board]*10,list(range(1,10)),[N]*10)
Square_Process = executor.map(Square_Check,[Board]*10,list(range(1,10)),[N]*10)
for Value, (RC_Process, Square_Process) in enumerate(zip(RC_Process,Square_Process)):
if RC_Process and Square_Process:
Board[i][j] = Value+1
Boards.append(copy.deepcopy(Board))
return Boards
def Solve_Boards(Boards,N):
Results = []
with concurrent.futures.ProcessPoolExecutor() as executor:
Process = executor.map(New_Boards,Boards,[N]*len(Boards))
for new_boards in Process:
if len(new_boards):
Results.extend(new_boards)
return Results
if __name__ == "__main__":
N = 0
while N < len(L):
Boards = Solve_Boards(Boards,N)
N+=1
print(len(Boards),N)
print(Boards)

Multi processing is NOT a silver bullet. Backtracking is pretty more efficient than exhaustive search parallelly in most cases. I tried running this code on my PC which has 32 cores 64 threads, but it takes long time.
And you look like to want to use GPGPU to solve this problem, but i doesn't suit, Because state of board depends on previous state, so can't split calculation efficiently.

Related

How to stop the iteration when the Jacobian reached to an arbitrary (small) value in Newton-CG method?

How to put a stopping condition on jacobian (or gradient) for Newton-CG methode?
I want the algorithme to stop when the jacobian reaches to 1e-2, is it possible to do with Newton-CG ??
input:
scipy.optimize.minimize(f, [5.0,1.0,2.0,5.0], args=Data, method='Newton-CG',jac=Jacf)
output:
jac: array([7.64265411e-08, 1.74985718e-08, 4.12408407e-07, 5.02972841e-08])
message: 'Optimization terminated successfully.'
nfev: 12
nhev: 0
nit: 11
njev: 68
status: 0
success: True
x: array([0.22545395, 0.3480084 , 1.06811724, 1.64873479])
in BFGS method, which is symilar to Newton-CG, there is a gtol option, it allows to stop the iteration when the gradient reaches to some value. But in Newton-CG theres no that type of option.
Does anyone know how to stop the iteration when the jacobien reaches to 1e-2.
Here are some details to reproduce my code:
def convert_line2matrix(a):
n = len(a)
if (np.sqrt(n) % 1 == 0) :
d = int(np.sqrt(n))
Mat = np.zeros((d,d))
for i in range(d):
for j in range(d):
Mat[i,j] = a[j+d*i]
else:
raise ValueError(f"{a} cant be converted into a (n x n) matrix. The array has {len(a)} elements, \n\t thus impossible to build a square matrix with {len(a)} elements.")
return Mat
def convert_matrix2line(Matrix):
result = []
dim = len(Matrix)
for i in range(dim):
for j in range(dim):
result.append(Matrix[i,j])
return np.array(result)
my_data = np.array([[0.21530249, 0.32450331, 0 ],
[0.1930605 , 0.31788079, 0 ],
[0.17793594, 0.31788079, 0 ],
[0.16459075, 0.31125828, 1 ],
[0.24822064, 0.31125828, 0 ],
[0.28647687, 0.32450331, 0 ],
[0.32829181, 0.31788079, 0 ],
[0.38879004, 0.32450331, 0 ],
[0.42882562, 0.32450331, 0 ],
[0.47419929, 0.32450331, 0 ],
[0.5044484 , 0.32450331, 0 ],
[0.1797153 , 0.31125828, 0 ],
[0.16548043, 0.31125828, 1 ],
[0.17793594, 0.29801325, 1 ],
[0.1930605 , 0.31788079, 0 ]])
Data = pd.DataFrame(my_data, columns=['X_1','X_2', 'Allum'])
def logLB(params,Data):
B = convert_line2matrix(params)
X = np.array(Data.iloc[:,:len(B)])
Y = np.array(Data.iloc[:,len(B)])
result = 0
n = len(Data)
BB = np.transpose(B) # B
for i in range(n):
if(1-np.exp(-X[i].T # BB # X[i]) > 0):
result += Y[i]*(-np.transpose(X[i]) # BB # X[i]) + (1 - Y[i])*np.log(1-np.exp(-X[i].T # BB # X[i]))
return result
def f(params, Data):
return -logLB(params, Data)
def dlogLB(params, Data):
B = convert_line2matrix(params)
X = np.array(Data.iloc[:,:len(B)])
Y = np.array(Data.iloc[:,len(B)])
BB = B.T # B
N = len(Data)
M = len(B)
Jacobian = np.zeros(np.shape(B))
for n in range(len(B)):
for m in range(len(B)):
result = 0
for c in range(N):
som = 0
for i in range(M):
som += X[c,m]*B[n,i]*X[c,i]
if (1 - np.exp(-X[c].T # BB # X[c]) > 0):
result += -2*Y[c]*som + (1-Y[c])*np.exp(-X[c].T # BB # X[c])*(2*som)/(1 - np.exp(-X[c].T # BB # X[c]))
Jacobian[n,m] = result
return convert_matrix2line(Jacobian)
def Jacf(params, Data):
return -dlogLB(params, Data)
I assume that you want to stop the optimizer as soon as the euclidian norm of the gradient reaches a specific value, which is exactly the meaning of the BFGS method's gtol option. Otherwise, it doesn't make any sense mathematically, since the evaluated gradient is a vector and thus can't be compared to a scalar value.
The Newton-CG method doesn't provide a similar option. However, you could use a simple callback that is called after each iteration and terminates the algorithm when the callback returns True. Unfortunately, you can only terminate the optimizer by a callback with the trust-constr method. For all other methods, the callback's return value is ignored, so it's very limited.
A possible hacky and ugly way to terminate the optimizer by the callback anyway would be raising an exception:
import numpy as np
from scipy.optimize import minimize
class Callback:
def __init__(self, eps, args, jac):
self.eps = eps
self.args = args
self.jac = jac
self.x = None
self.gtol = None
def __call__(self, xk):
self.x = xk
self.gtol = np.linalg.norm(self.jac(xk, *self.args))
if self.gtol <= self.eps:
raise Exception("Gradient norm is below threshold")
Here, xk is the current iterate, eps your desired tolerance, args a tuple containing your optional objective und gradient arguments and jac the gradient. Then, you can use it like this:
from scipy.optimize import minimize
cb = Callback(1.0e-1, (Data,), Jacf)
try:
res = minimize(f, [5.0,1.0,2.0,5.0], args=Data, method='Newton-CG',
jac=Jacf, callback=cb)
except:
x = cb.x
gtol = cb.gtol
print(f"gtol = {gtol:E}, x = {x}")
which yields
gtol = 5.515263E-02, x = [14.43322108 -5.18163542 0.22582261 -0.04859385]

GEKKO - MINLP in Matrix Form - Errors using m.axb()

I am trying to solve a MINLP problem using GEKKO. My code is the following:
m = GEKKO(remote = True)
m.options.SOLVER = 3
m.solver_options = ['minlp_maximum_iterations 500', \
# minlp iterations with integer solution
'minlp_max_iter_with_int_sol 10', \
# treat minlp as nlp
'minlp_as_nlp 0', \
# nlp sub-problem max iterations
'nlp_maximum_iterations 50', \
# 1 = depth first, 2 = breadth first
'minlp_branch_method 1', \
# maximum deviation from whole number
'minlp_integer_tol 0.05', \
# covergence tolerance
'minlp_gap_tol 0.01']
# Array Variable
rows = nb_phases + 3*b_max*(nb_phases+1)#48
columns = 1
x = np.empty((rows,columns),dtype=object)
for i in range(3*nb_phases*b_max+nb_phases+1):
for j in range(columns):
x[i,j] = m.Var(value = xinit[i,j], lb = LB[i,j], ub = UB[i,j], integer = False)
for i in range(3*nb_phases*b_max+nb_phases+1, (3*nb_phases+3)*b_max+nb_phases):
for j in range(columns):
x[i,j] = m.Var(value = xinit[i,j], lb = LB[i,j], ub = UB[i,j], integer = True)
# Constraints
#m.axb(A = A,b = B, x = x, etype = '<=', sparse = False)
m.axb(A,B, etype = '<=',sparse=False)
#m.axb(A = A_eq,b = B_eq, x = x, etype = '=', sparse = False)
m.axb(A_eq,B_eq, etype = '=',sparse=False)
for i in range(rows):
for j in range(columns):
m.Minimize((x[i,j]-i*j)**2)
#Solver
m.solve(disp = True)
When calling the axb function, if I declare the variable x in the arguments as the following:
m.axb(A = A,b = B, x = x, etype = '<=', sparse = False)
I get the error : List x must be composed of GEKKO parameters or variables. I don't really understand why I get this error since x is a gekko variable.
If I don't declare the variable x in the arguments of the axb function:
m.axb(A,B, etype = '<=',sparse=False)
I get the following error: AXB Missing Configuration File, Error: AXB object missing: axb1.txt, Example config file: axb1.txt
I was thinking maybe the issue is that x is not defined as an array. Therefore, considering x[i,j], I tried to explicit the equation Ax<=b by coding the matrix product A.x in a loop to avoid calling m.axb but I am not sure how to declare the equations after. My code is the following:
Ax = []
for i in range(rows):
temp = []
for j in range(columns):
temp.append(A[i,j]*x[j,0])
Ax.append(sum(temp))
for i in range(rows):
m.Equations(Ax[i] <= B[i])
I get the error: 'int' object is not subscriptable
Is anyone able to help me figure out how to solve this problem?
Is there a way of defining x as an array? (Since some of its elements are integers and some aren't)
Thanks a lot !
Here is a solution that works with the newer version of Gekko that is not yet released but is available on GitHub. You'll need to put the newest version of gekko.py (v1.0) in the Lib/site_packages/gekko folder and the local executable (apm.exe for Windows, apm_mac for MacOS, apm for Linux) in the Lib/site_packages/gekko/bin folder to use remote=False.
from gekko import GEKKO
import numpy as np
m = GEKKO(remote = False)
m.options.SOLVER = 3
nb_phases = 2
b_max = 3
m.solver_options = ['minlp_maximum_iterations 500', \
# minlp iterations with integer solution
'minlp_max_iter_with_int_sol 10', \
# treat minlp as nlp
'minlp_as_nlp 0', \
# nlp sub-problem max iterations
'nlp_maximum_iterations 50', \
# 1 = depth first, 2 = breadth first
'minlp_branch_method 1', \
# maximum deviation from whole number
'minlp_integer_tol 0.05', \
# covergence tolerance
'minlp_gap_tol 0.01']
# Array Variable
rows = nb_phases + 3*b_max*(nb_phases+1)#48
columns = 1
xinit = np.ones(rows)
LB = np.zeros(rows)
UB = np.ones(rows)*10.0
#x = m.Array(m.Var,(rows))
x = np.empty(rows,dtype=object)
for i in range(3*nb_phases*b_max+nb_phases+1):
x[i] = m.Var(value = xinit[i], lb = LB[i], ub = UB[i], integer = False)
for i in range(3*nb_phases*b_max+nb_phases+1, (3*nb_phases+3)*b_max+nb_phases):
x[i] = m.Var(value = xinit[i], lb = LB[i], ub = UB[i], integer = True)
# Constraints
#m.axb(A = A,b = B, x = x, etype = '<=', sparse = False)
A = np.ones((1,rows)); B = np.zeros(1)
m.axb(A,B,x,etype = '<=',sparse=False)
#m.axb(A = A_eq,b = B_eq, x = x, etype = '=', sparse = False)
m.axb(A,B,x,etype = '=',sparse=False)
for i in range(rows):
m.Minimize((x[i]-i)**2)
#Solver
m.options.SOLVER = 1
m.solve(disp = True)
This produces the solution:
----------------------------------------------------------------
APMonitor, Version 1.0.0
APMonitor Optimization Suite
----------------------------------------------------------------
--------- APM Model Size ------------
Each time step contains
Objects : 2
Constants : 0
Variables : 29
Intermediates: 0
Connections : 58
Equations : 29
Residuals : 29
Number of state variables: 29
Number of total equations: - 2
Number of slack variables: - 0
---------------------------------------
Degrees of freedom : 27
----------------------------------------------
Steady State Optimization with APOPT Solver
----------------------------------------------
Iter: 1 I: 0 Tm: -0.00 NLPi: 2 Dpth: 0 Lvs: 0 Obj: 7.71E+03 Gap: 0.00E+00
Successful solution
---------------------------------------------------
Solver : APOPT (v1.0)
Solution time : 0.019000000000000003 sec
Objective : 7714.
Successful solution
---------------------------------------------------

jDE(Adaptive Differential Evolution)

In jDE, each individual has its own F and CR values. How to assign these values to each individuals programmatically. How to update these values.
A pseudo-code will help.
If you want each individual to have its own F and CR values, you can simply save it in a list. (Pseudo-code: Python)
ID_POS = 0
ID_FIT = 1
ID_F = 2
ID_CR = 3
def create_solution(problem_size):
pos = np.random.uniform(lower_bound, upper_bound, problem_size)
fit = fitness_function(pos)
F = your_values
CR = your values
return [pos, fit, F, CR]
def training(problem_size, pop_size, max_iteration):
# Initialization
pop = [create_solution(problem_size) for _ in range(0, pop_size)]
# Evolution process
for iteration in range(0, max_iteration):
for i in range(0, pop_size):
# Do your stuff here
pos_new = ....
fit_new = ....
F_new = ...
CR_new = ...
if pop[i][ID_FIT] < fit_new: # meaning the new solution has better fitness than the old one.
pop[i][ID_F] = F_new
pop[i][ID_CR] = CR_new # This is how you update F and CR for every individual.
...
You can check out my repo's contains most of the state-of-the-art meta-heuristics here.
https://github.com/thieunguyen5991/metaheuristics

Efficient implementation of factorization machine with matrix operations?

Link is here : https://www.csie.ntu.edu.tw/~r01922136/slides/ffm.pdf (slides 5-6)
Given the following matrices:
X : n * d
W : d * k
Is there an efficient way to calculate the n x 1 matrix using only matrix operations (eg. numpy, tensorflow), where the jth element is :
EDIT:
Current attempt is this, but obviously it's not very space efficient, as it requires storing matrices of size n*d*d :
n = 1000
d = 256
k = 32
x = np.random.normal(size=[n,d])
w = np.random.normal(size=[d,k])
xxt = np.matmul(x.reshape([n,d,1]),x.reshape([n,1,d]))
wwt = np.matmul(w.reshape([1,d,k]),w.reshape([1,k,d]))
output = xxt*wwt
output = np.sum(output,(1,2))
Avoid large temporary arrays
Not all types of algorithms are that easily or obviously to vectorize. The np.sum(xxt*wwt) can be rewritten using np.einsum. This should be faster than your solution, but has some other limitations (eg. no multithreading).
I would therefor suggest using a compiler like Numba.
Example
import numpy as np
import numba as nb
import time
#nb.njit(fastmath=True,parallel=True)
def factorization_nb(w,x):
n = x.shape[0]
d = x.shape[1]
k = w.shape[1]
output=np.empty(n,dtype=w.dtype)
wwt=np.dot(w.reshape((d,k)),w.reshape((k,d)))
for i in nb.prange(n):
sum=0.
for j in range(d):
for jj in range(d):
sum+=x[i,j]*x[i,jj]*wwt[j,jj]
output[i]=sum
return output
def factorization_orig(w,x):
n = x.shape[0]
d = x.shape[1]
k = w.shape[1]
xxt = np.matmul(x.reshape([n,d,1]),x.reshape([n,1,d]))
wwt = np.matmul(w.reshape([1,d,k]),w.reshape([1,k,d]))
output = xxt*wwt
output = np.sum(output,(1,2))
return output
Mesuring Performance
n = 1000
d = 256
k = 32
x = np.random.normal(size=[n,d])
w = np.random.normal(size=[d,k])
#first call has some compilation overhead
res_1=factorization_nb(w,x)
t1=time.time()
for i in range(100):
res_1=factorization_nb(w,x)
#res_2=factorization_orig(w,x)
print(time.time()-t1)
Timings
factorization_nb: 4.2 ms per iteration
factorization_orig: 460 ms per iteration (110x speedup)
For an einsum implemtnation in pytorch, it would be something like
V = torch.randn([50, 10])
x = torch.randn([50])
result = (torch.einsum('ik,jk,i,j->', V, V, x, x)-torch.einsum('ik,ik,i,i->', V, V, x, x))/2
where we subtract the contribution from the feature weight being dotted with itself.

Pandas Apply(), Transform() ERROR = invalid dtype determination in get_concat_dtype

Flowing on from this question, which i link as background, but question is standalone.
4 questions:
I cannot understand the error I see when using apply or transform:
"invalid dtype determination in get_concat_dtype"
Why does ClipNetMean work but the other 2 methods not?
Unsure if or why i need the .copy(deep=True)
Why the slightly different syntax needed to call the InnerFoo function
The DataFrame:
cost
section item
11 1 25
2 100
3 77
4 10
12 5 50
1 39
2 7
3 32
13 4 19
1 21
2 27
The code:
import pandas as pd
import numpy as np
df = pd.DataFrame(data = {'section' : [11,11,11,11,12,12,12,12,13,13,13]
,'item' : [1,2,3,4,5,1,2,3,4,1,2]
,'cost' : [25.,100.,77.,10.,50.,39.,7.,32.,19.,21.,27.]
})
df.set_index(['section','item'],inplace=True)
upper =50
lower = 10
def ClipAndNetMean(cost,upper,lower):
avg = cost.mean()
new_cost = (cost- avg).clip(lower,upper)
return new_cost
def MiniMean(cost,upper,lower):
cost_clone = cost.copy(deep=True)
cost_clone['A'] = lower
cost_clone['B'] = upper
v = cost_clone.apply(np.mean,axis=1)
return v.to_frame()
def InnerFoo(lower,upper):
def inner(group):
group_clone = group.copy(deep=True)
group_clone['lwr'] = lower
group_clone['upr'] = upper
v = group_clone.apply(np.mean,axis=1)
return v.to_frame()
return inner
#These 2 work fine.
print df.groupby(level = 'section').apply(ClipAndNetMean,lower,upper)
print df.groupby(level = 'section').transform(ClipAndNetMean,lower,upper)
#apply works but not transform
print df.groupby(level = 'section').apply(MiniMean,lower,upper)
print df.groupby(level = 'section').transform(MiniMean,lower,upper)
#apply works but not transform
print df.groupby(level = 'section').apply(InnerFoo(lower,upper))
print df.groupby(level = 'section').transform(InnerFoo(lower,upper))
exit()
So to Chris's answer, note that if I add back the column header the methods will work in a Transform call.
see v.columns = ['cost']
def MiniMean(cost,upper,lower):
cost_clone = cost.copy(deep=True)
cost_clone['A'] = lower
cost_clone['B'] = upper
v = cost_clone.apply(np.mean,axis=1)
v = v.to_frame()
v.columns = ['cost']
return v
def InnerFoo(lower,upper):
def inner(group):
group_clone = group.copy(deep=True)
group_clone['lwr'] = lower
group_clone['upr'] = upper
v = group_clone.apply(np.mean,axis=1)
v = v.to_frame()
v.columns = ['cost']
return v
return inner
1 & 2) transform expects something "like-indexed", while apply is flexible. The two failing functions are adding additional columns.
3) In some cases, (e.g. if you're passing a whole DataFrame into a function) it can be necessary to copy to avoid mutating the original. It should not be necessary here.
4) The first two functions take a DataFrame with two parameters and returns data. InnerFoo actually returns another function, so it needs to be called before being passed into apply.