scipy optimization with multiple constraints - optimization

How to find solution to a linear regression with multiple constraints on the coefficients in python?

cvxpy is a good choice:
import cvxpy as cp
import numpy as np
np.random.seed(1)
y = 100*np.random.random(1)
x = 200*np.random.random(1000)-100
b = cp.Variable(1000)
constraints = [-100<=b, b<=100, cp.sum(b)==2]
obj = cp.Minimize( cp.square(y-x#b) )
prob = cp.Problem(obj, constraints)
val = prob.solve()
print(f"Objective value {val}")
print("b values: {0}".format(b.value))

Related

numpy ndarray error in lmfit when mdel is passed using sympy

I got the following error:
<lambdifygenerated-1>:2: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.return numpy.array((A1exp(-1/2(x - xc1)**2/sigma1**2), 0, 0))
Here I have just one model but this code is written for model combination in fitting by the lmfit Please kindly let me know about it.
import matplotlib.pyplot as plt
import numpy as np
import sympy
from sympy.parsing import sympy_parser
import lmfit
gauss_peak1 = sympy_parser.parse_expr('A1*exp(-(x-xc1)**2/(2*sigma1**2))')
gauss_peak2 = 0
exp_back = 0
model_list = sympy.Array((gauss_peak1, gauss_peak2, exp_back))
model = sum(model_list)
print(model)
model_list_func = sympy.lambdify(list(model_list.free_symbols), model_list)
model_func = sympy.lambdify(list(model.free_symbols), model)
np.random.seed(1)
x = np.linspace(0, 10, 40)
param_values = dict(x=x, A1=2, sigma1=1, xc1=2)
y = model_func(**param_values)
yi = model_list_func(**param_values)
yn = y + np.random.randn(y.size)*0.4
plt.plot(x, yn, 'o')
plt.plot(x, y)
lm_mod = lmfit.Model(model_func, independent_vars=('x'))
res = lm_mod.fit(data=yn, **param_values)
res.plot_fit()
plt.plot(x, y, label='true')
plt.legend()
plt.show()
lmfit.Model takes a model function that is a Python function. It parses the function arguments and expects those to be the Parameters for the model.
I don't think using sympy-created functions will do that. Do you need to use sympy here? I don't see why. The usage here seems designed to make the code more complex than it needs to be. It seems you want to make a model with a Gaussian-like peak, and a constant(?) background. If so, why not do
from lmfit.Models import GaussianModel, ConstantModel
model = GaussianModel(prefix='p1_') + ConstantModel()
params = model.make_params(p1_amplitude=2, p1_center=2, p1_sigma=1, c=0)
That just seems way easier to me, and it is very easy to add a second Gaussian peak to that model.
But even if you have your own preferred mathematical expression, don't use that as a sympy string, use it as Python code:
def myfunction(x, A1, xc1, sigma1):
return A1*exp(-(x-xc1)**2/(2*sigma1**2))
and then
from lmfit import Model
mymodel = Model(myfunction)
params = mymodel.guess(A1=2, xc1=2, sigma1=1)
In short: sympy is an amazing tool, but lmfit does not use it.

Inconsistent Objective Value when solving NLP

The objective is a quadratic function.
final = np.zeros_like(m.time)
final[-1] = 1
final = m.Param(final)
final_obj = ((m.Cs_n_avg[0]*final-0.0226981-0.25)*100)**2
m.Obj(final_obj)
m.options.IMODE = 6
m.options.SOLVER = 3
m.options.MAX_ITER = 500
m.solve(disp=True)
m.options.OBJ
3718.2126872
m.Cs_n_avg[0]
[0.0226981, 0.059400736803, 0.093924942354, 0.13029876847, 0.17044062163, 0.21694119575, 0.27269809999]
If plug in the value of m.Cs_n_avg[0] into the objective function, the value is 0.
Here is a related problem (see Problem 1b) from the Dynamic Optimization course where the final value is minimized. The current question is incomplete because the code is missing several dependencies. Please update the question or else use the following as an example to help.
import numpy as np
import matplotlib.pyplot as plt
from gekko import GEKKO
m = GEKKO()
nt = 101
m.time = np.linspace(0,1,nt)
# Variables
x1 = m.Var(value=1)
x2 = m.Var(value=0)
u = m.Var(value=-0.48)
p = np.zeros(nt)
p[-1] = 1.0
final = m.Param(value=p)
# Equations
m.Equation(x1.dt()==u)
m.Equation(x2.dt()==x1**2 + u**2)
m.Equation(final*(x1-1)==0)
# Objective Function
m.Minimize(x2*final)
m.options.IMODE = 6
m.solve()
plt.figure(1)
plt.plot(m.time,x1.value,'k:',lw=2,label=r'$x_1$')
plt.plot(m.time,x2.value,'b-',lw=2,label=r'$x_2$')
plt.plot(m.time,u.value,'r--',lw=2,label=r'$u$')
plt.legend(loc='best')
plt.xlabel('Time')
plt.ylabel('Value')
plt.show()

Getting a score of zero using cross val score

I am trying to use cross_val_score on my dataset, but I keep getting zeros as the score:
This is my code:
df = pd.read_csv("Flaveria.csv")
df = pd.get_dummies(df, columns=["N level", "species"], drop_first=True)
# Extracting the target value from the dataset
X = df.iloc[:, df.columns != "Plant Weight(g)"]
y = np.array(df.iloc[:, 0], dtype="S6")
logreg = LogisticRegression()
loo = LeaveOneOut()
scores = cross_val_score(logreg, X, y, cv=loo)
print(scores)
The features are categorical values, while the target value is a float value. I am not exactly sure why I am ONLY getting zeros.
The data looks like this before creating dummy variables
N level,species,Plant Weight(g)
L,brownii,0.3008
L,brownii,0.3288
M,brownii,0.3304
M,brownii,0.388
M,brownii,0.406
H,brownii,0.3955
H,brownii,0.3797
H,brownii,0.2962
Updated code where I am still getting zeros:
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import pandas as pd
# Creating dummies for the non numerical features in the dataset
df = pd.read_csv("Flaveria.csv")
df = pd.get_dummies(df, columns=["N level", "species"], drop_first=True)
# Extracting the target value from the dataset
X = df.iloc[:, df.columns != "Plant Weight(g)"]
y = df.iloc[:, 0]
forest = RandomForestRegressor()
loo = LeaveOneOut()
scores = cross_val_score(forest, X, y, cv=loo)
print(scores)
The general cross_val_score will split the data into train and test with the given iterator, then fit the model with the train data and score on the test fold. And for regressions, r2_score is the default in scikit.
You have specified LeaveOneOut() as your cv iterator. So each fold will contain a single test case. In this case, R_squared will always be 0.
Looking at the formula for R2 in wikipedia:
R2 = 1 - (SS_res/SS_tot)
And
SS_tot = sqr(sum(y - y_mean))
Here for a single case, y_mean will be equal to y value and hence denominator is 0. So the whole R2 is undefined (Nan). In this case, scikit-learn will set the value to 0, instead of nan.
Changing the LeaveOneOut() to any other CV iterator like KFold, will give you some non-zero results as you have already observed.

Implementing minimization in SciPy

I am trying to implement the 'Iterative hessian Sketch' algorithm from https://arxiv.org/abs/1411.0347 page 12. However, I am struggling with step two which needs to minimize the matrix-vector function.
Imports and basic data generating function
import numpy as np
import scipy as sp
from sklearn.datasets import make_regression
from scipy.optimize import minimize
import matplotlib.pyplot as plt
%matplotlib inline
from numpy.linalg import norm
def generate_data(nsamples, nfeatures, variance=1):
'''Generates a data matrix of size (nsamples, nfeatures)
which defines a linear relationship on the variables.'''
X, y = make_regression(n_samples=nsamples, n_features=nfeatures,\
n_informative=nfeatures,noise=variance)
X[:,0] = np.ones(shape=(nsamples)) # add bias terms
return X, y
To minimize the matrix-vector function, I have tried implementing a function which computes the quanity I would like to minimise:
def f2min(x, data, target, offset):
A = data
S = np.eye(A.shape[0])
#S = gaussian_sketch(nrows=A.shape[0]//2, ncols=A.shape[0] )
y = target
xt = np.ravel(offset)
norm_val = (1/2*S.shape[0])*norm(S#A#(x-xt))**2
#inner_prod = (y - A#xt).T#A#x
return norm_val - inner_prod
I would eventually like to replace S with some random matrices which can reduce the dimensionality of the problem, however, first I need to be confident that this optimisation method is working.
def grad_f2min(x, data, target, offset):
A = data
y = target
S = np.eye(A.shape[0])
xt = np.ravel(offset)
S_A = S#A
grad = (1/S.shape[0])*S_A.T#S_A#(x-xt) - A.T#(y-A#xt)
return grad
x0 = np.zeros((X.shape[0],1))
xt = np.zeros((2,1))
x_new = np.zeros((2,1))
for it in range(1):
result = minimize(f2min, x0=xt,args=(X,y,x_new),
method='CG', jac=False )
print(result)
x_new = result.x
I don't think that this loop is correct at all because at the very least there should be some local convergence before moving on to the next step. The output is:
fun: 0.0
jac: array([ 0.00745058, 0.00774882])
message: 'Desired error not necessarily achieved due to precision loss.'
nfev: 416
nit: 0
njev: 101
status: 2
success: False
x: array([ 0., 0.])
Does anyone have an idea if:
(1) Why I'm not achieving convergence at each step
(2) I can implement step 2 in a better way?

2nd ODE with mixed IVP and BVP, (ie.) y(0) and y'(L). How to solve?

this is a 2nd ODE with boundaries that I'm trying to solve, but I can't figure out. It is a heat transfer problem. If you have insights, it would be very appreciable.
Basically, boundary problem, but with different location. One at 0 and the other at the end.
T(0) unknown, y'(0) is a func. of T(0.06), but T(0.06) is given.
The key is how to connect the known value, T(0.06)=300, to solve the problem.
y''=0, y(0)=t0, y'(0)=(4.82e-08*T0**4-208.0)/1.2, y(0.06)=300
I tried this code, but no luck.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.integrate import odeint
def dU_dx(U, x):
return [U[1], 0]
#set initial values
y0 = T0
z0 = (4.82e-08*T0**4-208.0)/1.2
U0 = [y0, z0]
yL = 300 # how do I use this boundary condition?
L=0.006
#solve 2nd ode
xs = np.linspace(0, L, 100)
Us = odeint(dU_dx, U0, xs)
ys = Us[:,0]
plt.xlabel("x")
plt.ylabel("T")
plt.title("2nd ODE")
plt.plot(xs,ys);
from scipy.interpolate import interp1d
g = interp1d(xs,ys)
T=g(0)
print("Temp(at 0)=",T)
This would definitely not an elegant code, but this is the way I get the answer I want. But please, feel free to pose if you know better code.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.integrate import odeint
def dU_dx(U, x):
return [U[1], 0]
#Assume initial value
T0=292.75
y0 = T0
z0 = (4.82e-08*T0**4-208.0)/1.2
U0 = [y0, z0]
L=0.06
xs = np.linspace(0, L, 100)
Us = odeint(dU_dx, U0, xs)
ys = Us[:,0]
plt.xlabel("x")
plt.ylabel("T")
plt.title("2nd ODE")
plt.plot(xs,ys);
from scipy.interpolate import interp1d
g = interp1d(xs,ys)
#Repeat until see T(0.06)=300
T0=g(L)
print("Temp(0)=",T0)