WORHP very slow on locally affine function - worhp

I have a problem where I need to minimize a locally affine function and I choose to use WORHP (later the locally affine property shall be replaced by other local conditions) . The value of the function as well as the derivative and the hessian (which is trivial) is provided as user input and is computed very fast. I have simple box constraints, i.e. all variables should be between 0 and 1 and no other constraints. I consider this as a rather easy problem. Still WORHP need to iterate over 350 times leading to a computation time which is rather high for the easy problem.
Is there a way to choose better parameters for this kind of situation?
The python code I use is given below (The input values are computed by another process.)
def worhp_minimize(n, f, df, hmf, xl, xu, init_x):#f:R^n -> R, df: R^n -> R^n, g:R^n -> R^m, gl in R^m, gu in R^m, dg in R^nxm, hmf:R^n -> R^k, hmg: R^(n+m) -> R^k
def vectorize(attribute):
return [float(attribute[i]) for i in range(0,len(attribute))]
def assign(attribute,list):
for i in range(0,len(list)):
if list[i] == "inf":
attribute[i] = par.infty
elif list[i] == "-inf":
attribute[i] = -par.infty
else:
if isinstance(list[i],int):
attribute[i] = list[i]
else:
attribute[i] = float(list[i])
def user_f(opt, wsp, par, cnt):
opt.f = wsp.scale_obj * f(vectorize(opt.x))
def user_df(opt, wsp, par, cnt):
assign(wsp.df.val, wsp.scale_obj*df(vectorize(opt.x)))
#def user_dg(opt, wsp, par, cnt):
# assign(wsp.dg.val, dg(vectorize(opt.x)))
def user_hm(opt, wsp, par, cnt):
hess = wsp.scale_obj * hmf(vectorize(opt.x))
l = len(hess)
index = 0
for j in range(0,l):
wsp.hm.val[l*(l-1)//2 + j] = hess[j][j]
for i in range(j,l):
wsp.hm.val[index] = hess[i][j]
index +=1
if worhp.check_version(worhp.MAJOR, worhp.MINOR, worhp.PATCH):
exit(1)
opt = worhp.OptVar()
wsp = worhp.Workspace()
par = worhp.Params()
cnt = worhp.Control()
worhp.pre_init(opt, wsp, par, cnt)
worhp.init_params(par)
par.NLPprint = 1
status = worhp.read_params_no_init("worhp.xml", par)
if status == worhp.DATA_ERROR or status == worhp.INIT_ERROR:
exit(1)
opt.n = n
opt.m = 0
worhp.init(opt, wsp, par, cnt)
if cnt.status != worhp.FIRST_CALL:
print("Main: Initialisation failed.")
exit(1)
assign(opt.x, init_x)
assign(opt.Lambda, [0.]*n)
assign(opt.xl, xl)
assign(opt.xu, xu)
while cnt.status < worhp.TERMINATE_SUCCESS and cnt.status > worhp.TERMINATE_ERROR:
if worhp.get_user_action(cnt, worhp.Action.CALL_WORHP):
#print("call worhp")
worhp.worhp(opt, wsp, par, cnt)
if worhp.get_user_action(cnt, worhp.Action.ITER_OUTPUT):
#print("iter output")
worhp.iteration_output(opt, wsp, par, cnt)
worhp.done_user_action(cnt, worhp.Action.ITER_OUTPUT)
if worhp.get_user_action(cnt, worhp.Action.EVAL_F):
#print("eval f")
user_f(opt, wsp, par, cnt)
worhp.done_user_action(cnt, worhp.Action.EVAL_F)
if worhp.get_user_action(cnt, worhp.Action.EVAL_DF):
#print("eval df")
user_df(opt, wsp, par, cnt)
worhp.done_user_action(cnt, worhp.Action.EVAL_DF)
if worhp.get_user_action(cnt, worhp.Action.EVAL_HM):
#print("eval hm")
user_hm(opt, wsp, par, cnt)
worhp.done_user_action(cnt, worhp.Action.EVAL_HM)
if worhp.get_user_action(cnt, worhp.Action.FIDIF):
#print("fidif")
worhp.fidif(opt, wsp, par, cnt)
# No done_user_action!
worhp.status_msg(opt, wsp, par, cnt)
return opt.f, vectorize(opt.x)

If the Hessian is 0, WORHP will apply some regularisation to get positive definiteness which might slow down convergence. You could try setting the parameter StartBettsTau to a very small value like 1e-16 to avoid much of the regularisation.

Related

How to get the indices of x smallest elements in a large numpy matrix/multi-dimensional array (works for any number of dimensions)?

Given a large numpy matrix/multi-dimensional array, what is the best and fastest way to get the indices of the x smallest elements?
from typing import Tuple
import numpy as np
def get_indices_of_k_smallest_as_array(arr: np.ndarray, k: int) -> np.ndarray:
idx = np.argpartition(arr.ravel(), k)
return np.array(np.unravel_index(idx, arr.shape))[:, range(k)].transpose().tolist()
def get_indices_of_k_smallest_as_tuple(arr: np.ndarray, k: int) -> Tuple:
idx = np.argpartition(arr.ravel(), k)
return tuple(np.array(np.unravel_index(idx, arr.shape))[:, range(min(k, 0), max(k, 0))])
This answer gives the correct indices, but those indices aren't sorted based on size of the elements. That's just how the introselect algorithm works, which is used by np.argpartition under the hood, https://en.wikipedia.org/wiki/Introselect.
It would be nice if the return was also sorted based on the size of the elements, ex. index 0 of the return points to the smallest element, index 1 points to the 2nd smallest element, etc.
Here's how to do it with sorting. Keep in mind that sorting the results after np.argpartition is going to be much faster than sorting the entire multi-dimensional array.
def get_indices_of_k_smallest_as_array(arr: np.ndarray, k: int) -> np.ndarray:
ravel_array = arr.ravel()
indices_on_ravel = np.argpartition(ravel_array, k)
sorted_indices_on_ravel = sorted(indices_on_ravel, key=lambda x: ravel_array[x])
sorted_indices_on_original = np.array(np.unravel_index(sorted_indices_on_ravel, arr.shape))[:, range(k)].transpose().tolist()
# for the fun of numpy indexing, you can do it this way too
# indices_on_original = np.array(np.unravel_index(indices_on_ravel, arr.shape))[:, range(k)].transpose().tolist()
# sorted_indices_on_original = sorted(indices_on_original, key=lambda x: arr[tuple(np.array(x).T)])
return sorted_indices_on_original
def get_indices_of_k_smallest_as_tuple(arr: np.ndarray, k: int) -> Tuple:
ravel_array = arr.ravel()
indices_on_ravel = np.argpartition(ravel_array, k)
sorted_indices_on_ravel = sorted(indices_on_ravel, key=lambda x: ravel_array[x])
sorted_indices_on_original = tuple(
np.array(np.unravel_index(sorted_indices_on_ravel, arr.shape))[:, range(min(k, 0), max(k, 0))]
)
return sorted_indices_on_original

Binary-search without an explicit array

I want to perform a binary-search using e.g. np.searchsorted, however, I do not want to create an explicit array containing values. Instead, I want to define a function giving the value to be expected at the desired position of the array, e.g. p(i) = i, where i denotes the position within the array.
Generating an array of values regarding the function would, in my case, be neither efficient nor elegant. Is there any way to achieve this?
What about something like:
import collections
class GeneratorSequence(collections.Sequence):
def __init__(self, func, size):
self._func = func
self._len = size
def __len__(self):
return self._len
def __getitem__(self, i):
if 0 <= i < self._len:
return self._func(i)
else:
raise IndexError
def __iter__(self):
for i in range(self._len):
yield self[i]
This would work with np.searchsorted(), e.g.:
import numpy as np
gen_seq = GeneratorSequence(lambda x: x ** 2, 100)
np.searchsorted(gen_seq, 9)
# 3
You could also write your own binary search function, you do not really need NumPy in this case, and it can actually be beneficial:
def bin_search(seq, item):
first = 0
last = len(seq) - 1
found = False
while first <= last and not found:
midpoint = (first + last) // 2
if seq[midpoint] == item:
first = midpoint
found = True
else:
if item < seq[midpoint]:
last = midpoint - 1
else:
first = midpoint + 1
return first
Which gives identical results:
all(bin_search(gen_seq, i) == np.searchsorted(gen_seq, i) for i in range(100))
# True
Incidentally, this is also WAY faster:
gen_seq = GeneratorSequence(lambda x: x ** 2, 1000000)
%timeit np.searchsorted(gen_seq, 10000)
# 1 loop, best of 3: 1.23 s per loop
%timeit bin_search(gen_seq, 10000)
# 100000 loops, best of 3: 16.1 µs per loop
Inspired by #norok2 comment, I think you can use something like this:
def f(i):
return i*2 # Just an example
class MySeq(Sequence):
def __init__(self, f, maxi):
self.maxi = maxi
self.f = f
def __getitem__(self, x):
if x < 0 or x > self.maxi:
raise IndexError()
return self.f(x)
def __len__(self):
return self.maxi + 1
In this case f is your function while maxi is the maximum index. This of course only works if the function f return values in sorted order.
At this point you can use an object of type MySeq inside np.searchsorted.

Speeding up Euclidean Distance in python [duplicate]

How do you optimize this code?
At the moment it is running to slow for the amount of data that goes through this loop. This code runs 1-nearest neighbor. It will predict the label of the training_element based off the p_data_set
# [x] , [[x1],[x2],[x3]], [l1, l2, l3]
def prediction(training_element, p_data_set, p_label_set):
temp = np.array([], dtype=float)
for p in p_data_set:
temp = np.append(temp, distance.euclidean(training_element, p))
minIndex = np.argmin(temp)
return p_label_set[minIndex]
Use a k-D tree for fast nearest-neighbour lookups, e.g. scipy.spatial.cKDTree:
from scipy.spatial import cKDTree
# I assume that p_data_set is (nsamples, ndims)
tree = cKDTree(p_data_set)
# training_elements is also assumed to be (nsamples, ndims)
dist, idx = tree.query(training_elements, k=1)
predicted_labels = p_label_set[idx]
You could use distance.cdist to directly get the distances temp and then use .argmin() to get min-index, like so -
minIndex = distance.cdist(training_element[None],p_data_set).argmin()
Here's an alternative approach using np.einsum -
subs = p_data_set - training_element
minIndex = np.einsum('ij,ij->i',subs,subs).argmin()
Runtime test
Well I was thinking cKDTree would easily beat cdist, but I guess training_element being a 1D array isn't too heavy for cdist and I am seeing it to beat out cKDTree instead by a good 10x+ margin!
Here's the timing results -
In [422]: # Setup arrays
...: p_data_set = np.random.randint(0,9,(40000,100))
...: training_element = np.random.randint(0,9,(100,))
...:
In [423]: def tree_based(p_data_set,training_element): ##ali_m's soln
...: tree = cKDTree(p_data_set)
...: dist, idx = tree.query(training_element, k=1)
...: return idx
...:
...: def einsum_based(p_data_set,training_element):
...: subs = p_data_set - training_element
...: return np.einsum('ij,ij->i',subs,subs).argmin()
...:
In [424]: %timeit tree_based(p_data_set,training_element)
1 loops, best of 3: 210 ms per loop
In [425]: %timeit einsum_based(p_data_set,training_element)
100 loops, best of 3: 17.3 ms per loop
In [426]: %timeit distance.cdist(training_element[None],p_data_set).argmin()
100 loops, best of 3: 14.8 ms per loop
Python can be quite fast programming language if used properly.
This is my suggestion (faster_prediction):
import numpy as np
import time
def euclidean(a,b):
return np.linalg.norm(a-b)
def prediction(training_element, p_data_set, p_label_set):
temp = np.array([], dtype=float)
for p in p_data_set:
temp = np.append(temp, euclidean(training_element, p))
minIndex = np.argmin(temp)
return p_label_set[minIndex]
def faster_prediction(training_element, p_data_set, p_label_set):
temp = np.tile(training_element, (p_data_set.shape[0],1))
temp = np.sqrt(np.sum( (temp - p_data_set)**2 , 1))
minIndex = np.argmin(temp)
return p_label_set[minIndex]
training_element = [1,2,3]
p_data_set = np.random.rand(100000, 3)*10
p_label_set = np.r_[0:p_data_set.shape[0]]
t1 = time.time()
result_1 = prediction(training_element, p_data_set, p_label_set)
t2 = time.time()
t3 = time.time()
result_2 = faster_prediction(training_element, p_data_set, p_label_set)
t4 = time.time()
print "Execution time 1:", t2-t1, "value: ", result_1
print "Execution time 2:", t4-t3, "value: ", result_2
print "Speed up: ", (t4-t3) / (t2-t1)
I get the following result on pretty old laptop:
Execution time 1: 21.6033108234 value: 9819
Execution time 2: 0.0176379680634 value: 9819
Speed up: 1224.81857013
which makes me think I must have done some stupid mistake :)
In case of very huge data, where memory might be an issue, I suggest using Cython or implementing function in C++ and wrapping it in python.

`scipy.optimize` functions hang even with `maxiter=0`

I am trying to train the MNIST data (which I downloaded from Kaggle) with simple multi-class logistic regression, but the scipy.optimize functions hang.
Here's the code:
import csv
from math import exp
from numpy import *
from scipy.optimize import fmin, fmin_cg, fmin_powell, fmin_bfgs
# Prepare the data
def getIiter(ifname):
"""
Get the iterator from a csv file with filename ifname
"""
ifile = open(ifname, 'r')
iiter = csv.reader(ifile)
iiter.__next__()
return iiter
def parseRow(s):
y = [int(x) for x in s]
lab = y[0]
z = y[1:]
return (lab, z)
def getAllRows(ifname):
iiter = getIiter(ifname)
x = []
l = []
for row in iiter:
lab, z = parseRow(row)
x.append(z)
l.append(lab)
return x, l
def cutData(x, y):
"""
70% training
30% testing
"""
m = len(x)
t = int(m * .7)
return [(x[:t], y[:t]), (x[t:], y[t:])]
def num2IndMat(l):
t = array(l)
tt = [vectorize(int)((t == i)) for i in range(10)]
return array(tt).T
def readData(ifname):
x, l = getAllRows(ifname)
t = [[1] + y for y in x]
return array(t), num2IndMat(l)
#Calculate the cost function
def sigmoid(x):
return 1 / (1 + exp(-x))
vSigmoid = vectorize(sigmoid)
vLog = vectorize(log)
def costFunction(theta, x, y):
sigxt = vSigmoid(dot(x, theta))
cm = (- y * vLog(sigxt) - (1 - y) * vLog(1 - sigxt)) / m / N
return sum(cm)
def unflatten(flatTheta):
return [flatTheta[i * N : (i + 1) * N] for i in range(n + 1)]
def costFunctionFlatTheta(flatTheta):
return costFunction(unflatten(flatTheta), trainX, trainY)
def costFunctionFlatTheta1(flatTheta):
return costFunction(flatTheta.reshape(785, 10), trainX, trainY)
x, y = readData('train.csv')
[(trainX, trainY), (testX, testY)] = cutData(x, y)
m = len(trainX)
n = len(trainX[0]) - 1
N = len(trainY[0])
initTheta = zeros(((n + 1), N))
flatInitTheta = ndarray.flatten(initTheta)
flatInitTheta1 = initTheta.reshape(1, -1)
In the last two lines we flatten initTheta because the fmin{,_cg,_bfgs,_powell} functions seem to only take vectors as the initial value argument x0. I also flatten initTheta using reshape in hope this answer can be of help.
There is no problem computing the cost function which takes up less than 2 seconds on my computer:
print(costFunctionFlatTheta(flatInitTheta), costFunctionFlatTheta1(flatInitTheta1))
# 0.69314718056 0.69314718056
But all the fmin functions hang, even if I set maxiter=0.
e.g.
newFlatTheta = fmin(costFunctionFlatTheta, flatInitTheta, maxiter=0)
or
newFlatTheta1 = fmin(costFunctionFlatTheta1, flatInitTheta1, maxiter=0)
When I interrupt the program, it seems to me it all hangs at lines in optimize.py calling the cost functions, lines like this:
return function(*(wrapper_args + args))
For example, if I use fmin_cg, this would be line 292 in optimize.py (Version 0.5).
How do I solve this problem?
OK I found a way to stop fmin_cg from hanging.
Basically I just need to write a function that computes the gradient of the cost function, and pass it to the fprime parameter of fmin_cg.
def gradient(theta, x, y):
return dot(x.T, vSigmoid(dot(x, theta)) - y) / m / N
def gradientFlatTheta(flatTheta):
return ndarray.flatten(gradient(flatTheta.reshape(785, 10), trainX, trainY))
Then
newFlatTheta = fmin_cg(costFunctionFlatTheta, flatInitTheta, fprime=gradientFlatTheta, maxiter=0)
terminates within seconds, and setting maxiter to a higher number (say 100) one can train the model within reasonable amount of time.
The documentation of fmin_cg says the gradient would be numerically computed if no fprime is given, which is what I suspect caused the hanging.
Thanks to this notebook by zgo2016#Kaggle which helped me find the solution.

how to use Apache Commons Math Optimization in Jython?

I want to transfer Matlab code to Jython version, and find that the fminsearch in Matlab might be replaced by Apache-Common-Math-Optimization.
I'm coding on the Mango Medical Image script manager, which uses Jython 2.5.3 as coding language. And the Math version is 3.6.1.
Here is my code:
def f(x,y):
return x^2+y^2
sys.path.append('/home/shujian/APPs/Mango/lib/commons-math3-3.6.1.jar')
sys.add_package('org.apache.commons.math3.analysis')
from org.apache.commons.math3.analysis import MultivariateFunction
sys.add_package('org.apache.commons.math3.optim.nonlinear.scalar.noderiv')
from org.apache.commons.math3.optim.nonlinear.scalar.noderiv import NelderMeadSimplex,SimplexOptimizer
sys.add_package('org.apache.commons.math3.optim.nonlinear.scalar')
from org.apache.commons.math3.optim.nonlinear.scalar import ObjectiveFunction
sys.add_package('org.apache.commons.math3.optim')
from org.apache.commons.math3.optim import MaxEval,InitialGuess
sys.add_package('org.apache.commons.math3.optimization')
from org.apache.commons.math3.optimization import GoalType
initialSolution=[2.0,2.0]
simplex=NelderMeadSimplex([2.0,2.0])
opt=SimplexOptimizer(2**(-6), 2**(-10))
solution=opt.optimize(MaxEval(300),ObjectiveFunction(f),simplex,GoalType.MINIMIZE,InitialGuess([2.0,2.0]))
skewParameters2 = solution.getPointRef()
print skewParameters2;
And I got the error below:
TypeError: optimize(): 1st arg can't be coerced to
I'm quite confused about how to use the optimization in Jython and the examples are all Java version.
I've given up this plan and find another method to perform the fminsearch in Jython. Below is the Jython version code:
import sys
sys.path.append('.../jnumeric-2.5.1_ra0.1.jar') #add the jnumeric path
import Numeric as np
def nelder_mead(f, x_start,
step=0.1, no_improve_thr=10e-6,
no_improv_break=10, max_iter=0,
alpha=1., gamma=2., rho=-0.5, sigma=0.5):
'''
#param f (function): function to optimize, must return a scalar score
and operate over a numpy array of the same dimensions as x_start
#param x_start (float list): initial position
#param step (float): look-around radius in initial step
#no_improv_thr, no_improv_break (float, int): break after no_improv_break iterations with
an improvement lower than no_improv_thr
#max_iter (int): always break after this number of iterations.
Set it to 0 to loop indefinitely.
#alpha, gamma, rho, sigma (floats): parameters of the algorithm
(see Wikipedia page for reference)
return: tuple (best parameter array, best score)
'''
# init
dim = len(x_start)
prev_best = f(x_start)
no_improv = 0
res = [[np.array(x_start), prev_best]]
for i in range(dim):
x=np.array(x_start)
x[i]=x[i]+step
score = f(x)
res.append([x, score])
# simplex iter
iters = 0
while 1:
# order
res.sort(key=lambda x: x[1])
best = res[0][1]
# break after max_iter
if max_iter and iters >= max_iter:
return res[0]
iters += 1
# break after no_improv_break iterations with no improvement
print '...best so far:', best
if best < prev_best - no_improve_thr:
no_improv = 0
prev_best = best
else:
no_improv += 1
if no_improv >= no_improv_break:
return res[0]
# centroid
x0 = [0.] * dim
for tup in res[:-1]:
for i, c in enumerate(tup[0]):
x0[i] += c / (len(res)-1)
# reflection
xr = x0 + alpha*(x0 - res[-1][0])
rscore = f(xr)
if res[0][1] <= rscore < res[-2][1]:
del res[-1]
res.append([xr, rscore])
continue
# expansion
if rscore < res[0][1]:
xe = x0 + gamma*(x0 - res[-1][0])
escore = f(xe)
if escore < rscore:
del res[-1]
res.append([xe, escore])
continue
else:
del res[-1]
res.append([xr, rscore])
continue
# contraction
xc = x0 + rho*(x0 - res[-1][0])
cscore = f(xc)
if cscore < res[-1][1]:
del res[-1]
res.append([xc, cscore])
continue
# reduction
x1 = res[0][0]
nres = []
for tup in res:
redx = x1 + sigma*(tup[0] - x1)
score = f(redx)
nres.append([redx, score])
res = nres
And the test example is as below:
def f(x):
return x[0]**2+x[1]**2+x[2]**2
print nelder_mead(f,[3.4,2.3,2.2])
Actually, the original version is for python, and the link below is the source:
https://github.com/fchollet/nelder-mead