Issue related to the shape of the of the residual array when using optimize.least_square with only 1 unknown parameter - least-squares

I am confronted with an issue I cannot solve myself... I am trying to iteratively solve a system of five non linear equations with only one unknown parameter but keep getting the following error message shown below. However, this error does not come up when I solve for 3 unknown parameters instead (using the same code but replacing some numbers by parameters to be estimated). My code is the following:
def equations (L):
f1 = 5.519 - ((1.343876648526599/0.4)*(log(18/3.2441500954852724) - (-5*(148/L))))
f2 = 8.940 - ((1.343876648526599/0.4)*(log(56/3.2441500954852724) - (-5*(148/L))))
f3 = 11.658 - ((1.343876648526599/0.4)*(log(98/3.2441500954852724) - (-5*(148/L))))
f4 = 12.590 - ((1.343876648526599/0.4)*(log(123/3.2441500954852724) - (-5*(148/L))))
f5 = 12.983 - ((1.343876648526599/0.4)*(log(148/3.2441500954852724) - (-5*(148/L))))
return (f1, f2, f3, f4, f5)
x0 = 300
root = spo.least_squares(equations, x0, bounds = ((10), (500)), max_nfev = 100).x
Error message:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-34-7315f1f7ce27> in <module>()
---> 27 root = spo.least_squares(equations, x0, bounds = ((10), (500)), max_nfev = 100).x
/usr/local/lib/python3.6/dist-packages/scipy/optimize/_lsq/least_squares.py in least_squares(fun, x0, jac, bounds, method, ftol, xtol, gtol, x_scale, loss, f_scale, diff_step, tr_solver, tr_options, jac_sparsity, max_nfev, verbose, args, kwargs)
809 if f0.ndim != 1:
810 raise ValueError("`fun` must return at most 1-d array_like. "
--> 811 "f0.shape: {0}".format(f0.shape))
812
813 if not np.all(np.isfinite(f0)):
ValueError: `fun` must return at most 1-d array_like. f0.shape: (5, 1)

Related

Linear regression on Tensor flow Google Collab

I am trying to code a linear regression but I am stuck on this cell, as it returns me an error and I donĀ“t understand how to correct it. Would aprecciate some detailed feedback as to how to change my code to avoid this
Here is the cell that raises the error
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [MPG]")
_ = plt.ylabel("Count")
After that I Get:
/usr/local/lib/python3.7/dist-packages/matplotlib/axes/_axes.py:6630: RuntimeWarning: All-NaN slice encountered
xmin = min(xmin, np.nanmin(xi))
/usr/local/lib/python3.7/dist-packages/matplotlib/axes/_axes.py:6631: RuntimeWarning: All-NaN slice encountered
xmax = max(xmax, np.nanmax(xi))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-30-c4d487a4d6e3> in <module>()
1 error = test_predictions - test_labels
----> 2 plt.hist(error, bins = 25)
3 plt.xlabel("Prediction Error [MPG]")
4 _ = plt.ylabel("Count")
5 frames
<__array_function__ internals> in histogram(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/numpy/lib/histograms.py in _get_outer_edges(a, range)
322 if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
323 raise ValueError(
--> 324 "autodetected range of [{}, {}] is not finite".format(first_edge, last_edge))
325
326 # expand empty range to avoid divide by zero
ValueError: autodetected range of [nan, nan] is not finite```

ValueError: `f0` passed has more than 1 dimension

Dimension error appears when trying to call minimize function
import numpy as np
import math
import scipy
from scipy import optimize
from scipy.optimize import minimize, line_search
x1=[1,2,1] ; y1=0
x2=[1,1,2] ; y2=0
x3=[2,3,3] ; y3=1
x4=[2,2,1] ; y4=1
x5=[1,2,3] ; y5=0
x6=[1,3,1] ; y6=1
x7=[1,1,1] ; y7=0
x8=[1,2,2] ; y8=0
x9=[1,2,1] ; y9=0
x10=[1,1,1] ; y10=0
x11=[2,2,2] ; y11=1
x12=[1,2,2] ; y12=0
X= np.array([x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12])
y=np.array([y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12])
n=len(y)
def h(x):
return 1/(1+np.exp(-x))
def r(beta):
f=0
for i in range(n):
f=f+ (1-y[i])* np.dot(X[i],beta) + np.log( 1+np.exp(-np.dot(X[i],beta) ))
return np.array([f/n])
#gradient of r
def gradr(beta):
f=0
for i in range(n):
mu= h(np.dot(X[i],beta))
f=f+ (mu-y[i])*X[i]
return (f/n).reshape(3,1)
def exactsearch(beta_0,d):
phi_aux = lambda alfa : r(beta_0+ alfa*d)
alfa_0=np.array([1])
bds=[(0,None)]
res = minimize(phi_aux, alfa_0, bounds=bds)
alfa=np.array([res.x])
return alfa
def GradientMethod(beta,f):
N=0
e=10**(-5)
p=-gradr(beta)
alfa=f(beta,p)
while True:
if r(beta)==r(beta+alfa*p):break
if N==10000:break
if alfa<=e:break
else:
N=N+1
beta=beta+alfa*p
p=-gradr(beta)
alfa=f(beta,p)
return [beta,r(beta),N]
GradientMethod(np.array([1,1,1]),exactsearch)
X is a 3 by 12 matrix, r is a function that takes a size 3 vector and operates it with the vectors of X
When changing np.exp to math.exp the error changes to TypeError: only size-1 arrays can be converted to Python scalars. Also, previously I encountered the error ValueError: shapes (3,) and (1,3) not aligned: 3 (dim 0) != 1 (dim 0) but it went away when reshaping gradr.
I must add that I don't understand that much the function exactsearch, since it was given to me.
Full error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-20-bb9e6dc26271> in <module>
62 return [beta,r(beta),N]
63
---> 64 GradientMethod(np.array([1,1,1]),exactsearch)
<ipython-input-20-bb9e6dc26271> in GradientMethod(beta, f)
50 e=10**(-5)
51 p=-gradr(beta)
---> 52 alfa=f(beta,p)
53 while True:
54 if r(beta)==r(beta+alfa*p):break
<ipython-input-20-bb9e6dc26271> in exactsearch(beta_0, d)
42 alfa_0=np.array([1])
43 bds=[(0,None)]
---> 44 res = minimize(phi_aux, alfa_0, bounds=bds)
45 alfa=np.array([res.x])
46 return alfa
~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
615 **options)
616 elif meth == 'l-bfgs-b':
--> 617 return _minimize_lbfgsb(fun, x0, args, jac, bounds,
618 callback=callback, **options)
619 elif meth == 'tnc':
~/anaconda3/lib/python3.8/site-packages/scipy/optimize/lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, finite_diff_rel_step, **unknown_options)
304 iprint = disp
305
--> 306 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
307 bounds=new_bounds,
308 finite_diff_rel_step=finite_diff_rel_step)
~/anaconda3/lib/python3.8/site-packages/scipy/optimize/optimize.py in _prepare_scalar_function(fun, x0, jac, args, bounds, epsilon, finite_diff_rel_step, hess)
259 # ScalarFunction caches. Reuse of fun(x) during grad
260 # calculation reduces overall function evaluations.
--> 261 sf = ScalarFunction(fun, x0, args, grad, hess,
262 finite_diff_rel_step, bounds, epsilon=epsilon)
263
~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step, finite_diff_bounds, epsilon)
93
94 self._update_grad_impl = update_grad
---> 95 self._update_grad()
96
97 # Hessian Evaluation
~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in _update_grad(self)
169 def _update_grad(self):
170 if not self.g_updated:
--> 171 self._update_grad_impl()
172 self.g_updated = True
173
~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in update_grad()
89 self._update_fun()
90 self.ngev += 1
---> 91 self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
92 **finite_diff_options)
93
~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_numdiff.py in approx_derivative(fun, x0, method, rel_step, abs_step, f0, bounds, sparsity, as_linear_operator, args, kwargs)
386 f0 = np.atleast_1d(f0)
387 if f0.ndim > 1:
--> 388 raise ValueError("`f0` passed has more than 1 dimension.")
389
390 if np.any((x0 < lb) | (x0 > ub)):
ValueError: `f0` passed has more than 1 dimension.
So apparently the problem was with the dimensions of the arrays, somewhere for some reason an array went from being like [1,2,3] to [[1,2,3]], changing its shape from (3,) to (1,3), I tried to fix it by reshaping gradr to (3,1) but this just made everything worse, instead the solution was to eliminate the reshape of gradr and reshape every beta to (3,) before its operated.
def r(beta):
f=0
beta=beta.reshape(3,)
for i in range(n):
f=f+ (1-y[i])* np.dot(X[i],beta) + np.log( 1+np.exp(-np.dot(X[i],beta) ))
return np.array([f/n])
#gradient of r
def gradr(beta):
f=0
beta=beta.reshape(3,)
for i in range(n):
mu= h(np.dot(X[i],beta))
f=f+ (mu-y[i])*X[i]
return (f/n)

ValueError: The two structures don't have the same sequence length. Input structure has length 1, while shallow structure has length 2

What is the solution to the following error in tensorflow.
ValueError: The two structures don't have the same sequence length.
Input structure has length 1, while shallow structure has length 2.
I tried tensorflow versions: 2.9.1 and 2.4.0.
The toy example is given to reproduce the error.
import tensorflow as tf
d1 = tf.data.Dataset.range(10)
d1 = d1.map(lambda x:tf.cast([x], tf.float32))
def func1(x):
y1 = 2.0 * x
y2 = -3.0 * x
return tuple([y1, y2])
d2 = d1.map(lambda x: tf.py_function(func1, [x], [tf.float32, tf.float32]))
d3 = d2.padded_batch(3, padded_shapes=(None,))
for x, y in d2.as_numpy_iterator():
pass
The full error is:
ValueError Traceback (most recent call last)
~/Documents/pythonProject/tfProjects/asr/transformer/dataset.py in <module>
256 return tuple([y1, y2])
257 d2 = d1.map(lambda x: tf.py_function(func1, [x], [tf.float32, tf.float32]))
---> 258 d3 = d2.padded_batch(3, padded_shapes=(None,))
259 for x, y in d2.as_numpy_iterator():
260 pass
~/miniconda3/envs/jtf2/lib/python3.7/site-packages/tensorflow/python/data/ops/dataset_ops.py in padded_batch(self, batch_size, padded_shapes, padding_values, drop_remainder, name)
1887 padding_values,
1888 drop_remainder,
-> 1889 name=name)
1890
1891 def map(self,
~/miniconda3/envs/jtf2/lib/python3.7/site-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, batch_size, padded_shapes, padding_values, drop_remainder, name)
5171
5172 input_shapes = get_legacy_output_shapes(input_dataset)
-> 5173 flat_padded_shapes = nest.flatten_up_to(input_shapes, padded_shapes)
5174
5175 flat_padded_shapes_as_tensors = []
~/miniconda3/envs/jtf2/lib/python3.7/site-packages/tensorflow/python/data/util/nest.py in flatten_up_to(shallow_tree, input_tree)
377 `input_tree`.
378 """
--> 379 assert_shallow_structure(shallow_tree, input_tree)
380 return list(_yield_flat_up_to(shallow_tree, input_tree))
381
~/miniconda3/envs/jtf2/lib/python3.7/site-packages/tensorflow/python/data/util/nest.py in assert_shallow_structure(shallow_tree, input_tree, check_types)
290 if len(input_tree) != len(shallow_tree):
291 raise ValueError(
--> 292 "The two structures don't have the same sequence length. Input "
293 f"structure has length {len(input_tree)}, while shallow structure "
294 f"has length {len(shallow_tree)}.")
ValueError: The two structures don't have the same sequence length. Input structure has length 1, while shallow structure has length 2.
The following modification in padded_shapes argument will resolve the error.
import tensorflow as tf
d1 = tf.data.Dataset.range(10)
d1 = d1.map(lambda x:tf.cast([x], tf.float32))
def func1(x):
y1 = 2.0 * x
y2 = -3.0 * x
return tuple([y1, y2])
d2 = d1.map(lambda x: tf.py_function(func1, [x], [tf.float32, tf.float32]))
d3 = d2.padded_batch(3, padded_shapes=([None],[None]))
for x, y in d2.as_numpy_iterator():
pass

Numba / Numpy - Understanding Error Message

I'm experimenting with Numba to try and speed up a union-find algorithm I'm working on. Here's some example code. When I experiment with some sample data I cannot understand the type complaint that Numba appears to be raising.
from numba import jit
import numpy as np
indices = np.arange(8806806, dtype=np.int64)
sizes = np.ones(8806806, dtype=np.int64)
connected_components = 8806806
#jit(npython=True)
def root(p: int) -> int:
while p != indices[p]:
indices[p] = indices[indices[p]]
p = indices[p]
return p
#jit(npython=True)
def connected( p: int, q: int) -> bool:
return root(p) == root(q)
#jit(npython=True)
def union( p: int, q: int) -> None:
root1 = root(p)
root2 = root(q)
if root1 == root2:
return
if (sizes[root1] < sizes[root2]):
indices[root1] = root2
sizes[root2] += sizes[root1]
else:
indices[root2] = root1
sizes[root1] += sizes[root2]
connected_components -= 1
#jit(nopython=True)
def process_values(arr):
for row in arr:
typed_arr = row.astype('int64')
for first, second in zip(arr, arr[1:]):
union(first, second)
process_values(
np.array(
[np.array([8018361, 4645960]),
np.array([1137555, 7763897]),
np.array([7532943, 2248813]),
np.array([5352737, 71466, 3590473, 5352738, 2712260])], dtype='object'))
I cannot understand this error:
TypingError Traceback (most recent call last)
<ipython-input-45-62735e65f581> in <module>
44 np.array([1137555, 7763897]),
45 np.array([7532943, 2248813]),
---> 46 np.array([5352737, 71466, 3590473, 5352738, 2712260])], dtype='object'))
/opt/conda/lib/python3.7/site-packages/numba/core/dispatcher.py in _compile_for_args(self, *args, **kws)
399 e.patch_message(msg)
400
--> 401 error_rewrite(e, 'typing')
402 except errors.UnsupportedError as e:
403 # Something unsupported is present in the user code, add help info
/opt/conda/lib/python3.7/site-packages/numba/core/dispatcher.py in error_rewrite(e, issue_type)
342 raise e
343 else:
--> 344 reraise(type(e), e, None)
345
346 argtypes = []
/opt/conda/lib/python3.7/site-packages/numba/core/utils.py in reraise(tp, value, tb)
78 value = tp()
79 if value.__traceback__ is not tb:
---> 80 raise value.with_traceback(tb)
81 raise value
82
TypingError: Failed in nopython mode pipeline (step: nopython frontend)
non-precise type array(pyobject, 1d, C)
[1] During: typing of argument at <ipython-input-45-62735e65f581> (36)
File "<ipython-input-45-62735e65f581>", line 36:
def process_values(arr):
for row in arr:
^
Does this have anything to do with process_values taking an array of irregularly shaped arrays? Any pointers? Thanks!
the problem is that Numba does not accept arrays of dtype 'object'. You seem to be placing arrays inside arrays, you will have to use lists inside lists. Look for the typed.List class in Numba, https://numba.pydata.org/numba-doc/dev/reference/pysupported.html#typed-list
Alternatively, you can use awkward arrays: https://github.com/scikit-hep/awkward-1.0

TypeError using sns.distplot() on dataframe with one row

I'm plotting subsets of a dataframe, and one subset happens to have only one row. This is the only reason I can think of for why it's causing problems. This is what it looks like:
problem_dataframe = prob_df[prob_df['Date']==7]
problem_dataframe.head()
I try to do:
sns.distplot(problem_dataframe['floatTime'])
But I get the error:
TypeError: len() of unsized object
Would someone please tell me what's causing this and how to work around it?
The TypeError is resolved by setting bins=1.
But that uncovers a different error, ValueError: x must be 1D or 2D, which gets triggered by an internal function in Matplotlib's hist(), called _normalize_input():
import pandas as pd
import seaborn as sns
df = pd.DataFrame(['Tue','Feb',7,'15:37:58',2017,15.6196]).T
df.columns = ['Day','Month','Date','Time','Year','floatTime']
sns.distplot(df.floatTime, bins=1)
Output:
ValueError Traceback (most recent call last)
<ipython-input-25-858df405d200> in <module>()
6 df.columns = ['Day','Month','Date','Time','Year','floatTime']
7 df.floatTime.values.astype(float)
----> 8 sns.distplot(df.floatTime, bins=1)
/home/andrew/anaconda3/lib/python3.6/site-packages/seaborn/distributions.py in distplot(a, bins, hist, kde, rug, fit, hist_kws, kde_kws, rug_kws, fit_kws, color, vertical, norm_hist, axlabel, label, ax)
213 hist_color = hist_kws.pop("color", color)
214 ax.hist(a, bins, orientation=orientation,
--> 215 color=hist_color, **hist_kws)
216 if hist_color != color:
217 hist_kws["color"] = hist_color
/home/andrew/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
1890 warnings.warn(msg % (label_namer, func.__name__),
1891 RuntimeWarning, stacklevel=2)
-> 1892 return func(ax, *args, **kwargs)
1893 pre_doc = inner.__doc__
1894 if pre_doc is None:
/home/andrew/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py in hist(self, x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, **kwargs)
6141 x = np.array([[]])
6142 else:
-> 6143 x = _normalize_input(x, 'x')
6144 nx = len(x) # number of datasets
6145
/home/andrew/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py in _normalize_input(inp, ename)
6080 else:
6081 raise ValueError(
-> 6082 "{ename} must be 1D or 2D".format(ename=ename))
6083 if inp.shape[1] < inp.shape[0]:
6084 warnings.warn(
ValueError: x must be 1D or 2D
_normalize_input() was removed from Matplotlib (it looks like sometime last year), so I guess Seaborn is referring to an older version under the hood.
You can see _normalize_input() in this old commit:
def _normalize_input(inp, ename='input'):
"""Normalize 1 or 2d input into list of np.ndarray or
a single 2D np.ndarray.
Parameters
----------
inp : iterable
ename : str, optional
Name to use in ValueError if `inp` can not be normalized
"""
if (isinstance(x, np.ndarray) or
not iterable(cbook.safe_first_element(inp))):
# TODO: support masked arrays;
inp = np.asarray(inp)
if inp.ndim == 2:
# 2-D input with columns as datasets; switch to rows
inp = inp.T
elif inp.ndim == 1:
# new view, single row
inp = inp.reshape(1, inp.shape[0])
else:
raise ValueError(
"{ename} must be 1D or 2D".format(ename=ename))
...
I can't figure out why inp.ndim!=1, though. Performing the same np.asarray().ndim on the input returns 1 as expected:
np.asarray(df.floatTime).ndim # 1
So you're facing a few obstacles if you want to make a single-valued input work with sns.distplot().
Suggested Workaround
Check for a single-element df.floatTime, and if that's the case, just use plt.hist() instead (which is what distplot goes to anyway, along with KDE):
plt.hist(df.floatTime)