why is numpy array defaulted to global? - numpy

I find it strange that the following program passes a numpy array around like a global variable even though I have not defined it to be global inside a def:
import numpy as np
a = np.zeros([5])
a[:] = np.array([1,2,3,4,5])
print("outside a is ",a)
def func1():
print("func1: a is ",a)
func1()
def func2():
a[2] = a[2]*2
print("func2: a is ",a)
func2()
print("outside a is ",a)
What am I missing here?

Related

Use of scipy.ndimage.generic_filter1d ()

I am trying to make sense of the generic_filter1d() function in scipy.ndimage. My understanding is that this function would allow a function being applied to a 1d array by 'extracting' a subset of this array and applying a function that returns a single value (analogous to the generic_filter()?). Is this correct? I have the following simple code:
def test(x,y):
return np.sum(x/y)
and
import numpy as np
from scipy.ndimage import generic_filter1d
dummy_data= np.arange(10)
size = 5
y= np.linspace(0.1, size*0.1, size)
rstl = generic_filter1d(dummy_data, test, filter_size= size, extra_arguments =(y,))
And it fails. It says that I have an extra argument.

How do you add dataclasses as valid index values to a plotly chart?

I am trying to switch from the matplotlib pandas plotting backend to plotly. However, I am being held back by a common occurrence of this error:
TypeError: Object of type Quarter is not JSON serializable
Where Quarter is a dataclass in my codebase.
For a minimal example, consider:
#dataclass
class Foo:
val:int
df = pd.DataFrame({'x': [Foo(i) for i in range(10)], 'y':list(range(10))})
df.plot.scatter(x='x', y='y')
As expected, the above returns:
TypeError: Object of type Foo is not JSON serializable
Now, I don't expect plotly to be magical, but adding a __float__ magic method allows the Foo objects to be used with the matplotlib backend:
# This works
#dataclass
class Foo:
val:int
def __float__(self):
return float(self.val)
df = pd.DataFrame({'x': [Foo(i) for i in range(10)], 'y':list(range(10))})
df.plot.scatter(x='x', y='y')
How can I update my dataclass to allow for it to be used with the plotly backend?
You can get pandas to cast to float before invoking plotting backend.
from dataclasses import dataclass
import pandas as pd
#dataclass
class Foo:
val:int
def __float__(self):
return float(self.val)
df = pd.DataFrame({'x': [Foo(i) for i in range(10)], 'y':list(range(10))})
df["x"].astype(float)
pd.options.plotting.backend = "plotly"
df.assign(x=lambda d: d["x"].astype(float)).plot.scatter(x='x', y='y')
monkey patching
if you don't want to change code, you can monkey patch the plotly implementation of pandas plotting API
https://pandas.pydata.org/pandas-docs/stable/development/extending.html#plotting-backends
from dataclasses import dataclass
import pandas as pd
import wrapt, json
import plotly
#wrapt.patch_function_wrapper(plotly, 'plot')
def new_plot(wrapped, instance, args, kwargs):
try:
json.dumps(args[0][kwargs["x"]])
except TypeError:
args[0][kwargs["x"]] = args[0][kwargs["x"]].astype(float)
return wrapped(*args, **kwargs)
#dataclass
class Foo:
val:int
def __float__(self):
return float(self.val)
df = pd.DataFrame({'x': [Foo(i) for i in range(10)], 'y':list(range(10))})
df["x"].astype(float)
pd.options.plotting.backend = "plotly"
df.plot.scatter(x='x', y='y')

Invoke root_scalar from a numba decorated function

The following code fails when find_root is decorated with nb.jit. This is a toy example, but the idea is to have the ability to find the root of a scalar function (or potentially a multivariate function using root) for an array of values and store them in a numpy array.
Error message: TypingError: cannot determine Numba type of <class 'function'>
import numba as nb
import numpy as np
from scipy.optimize import root_scalar
a = 3.0
b = 1.0
c = -10.5
#nb.jit(nopython=True)
def f(x):
return a*x**2 + b*x + c
#nb.jit(nopython=True)
def fprime(x):
return 2*a*x + b
#nb.jit(nopython=True)
def fprime2(x):
return 2*a
#nb.jit(nopython=True) # <-- Commenting this line makes the code work but it is slow
def findroot(arr):
for i in range(len(arr)):
arr[i] = root_scalar(f, fprime=fprime, fprime2=fprime2, x0=0).root
if __name__ == '__main__':
arr = np.zeros(20, np.float)
import timeit
start = timeit.time.process_time()
findroot(arr)
end = timeit.time.process_time()
print(end - start)

python numpy vectorize an array of object instances

I'd like to encapsulate my calc function and all its parameters inside an object, but vectorize the execution for millions of objects much like how numpy would do it. Any suggestions?
the calculation is still basic arithmetic which numpy should be able to vectorize.
Example code:
import numpy as np
myarray = np.random.rand(3, 10000000)
############################# This works fine: FAST ###################################
def calc(a,b,c):
return (a+b/c)**b/a
res1 = calc(*myarray) #0.7 seconds
############################# What I'd like to do (unsuccessfully): SLOW ###################################
class MyClass():
__slots__ = ['a','b','c']
def __init__(self, a,b,c):
self.a, self.b, self.c = a,b,c
def calc(self):
return (self.a + self.b / self.c) ** self.b / self.a
def classCalc(myClass:MyClass):
return myClass.calc()
vectorizedClassCalc = np.vectorize(classCalc)
myobjects = np.array([MyClass(*args) for args in myarray.transpose()])
res2 = vectorizedClassCalc(myobjects) #8 seconds no different from a list comprehension
res3 = [obj.calc() for obj in myobjects] #7.5 seconds
perhaps pandas has additional features?

passing numpy array as parameter in theano function

As a beginner, i was trying to simply compute the dot product of two matrices using theano.
my code is very simple.
import theano
import theano.tensor as T
import numpy as np
from theano import function
def covarience(array):
input_array=T.matrix('input_array')
deviation_matrix = T.matrix('deviation_matrix')
matrix_filled_with_1s=T.matrix('matrix_filled_with_1s')
z = T.dot(input_array, matrix_filled_with_1s)
identity=np.ones((len(array),len(array)))
f=function([array,identity],z)
# print(f)
covarience(np.array([[2,4],[6,8]]))
but the problem is each time i run this code , i get error message like "TypeError: Unknown parameter type: "
Can anyone tell me whats wrong with my code?
You cannot pass numpy array to theano function, theano functions can only be defined by theano.tensor variables. So you can always define computations with interaction of tensor/symbolic variables, and to perform actual computation on values/real data you can use functions, it doesn't make sense to define theano function itself with numpy array.
This should work:
import theano
import theano.tensor as T
import numpy as np
a = T.matrix('a')
b = T.matrix('b')
z = T.dot(a, b)
f = theano.function([a, b], z)
a_d = np.asarray([[2, 4], [6, 8]], dtype=theano.config.floatX)
b_d = np.ones(a_d.shape, dtype=theano.config.floatX)
print(f(a_d, b_d))