Can you return a numpy scalar with numba? - numpy

Numpy scalars returned by a function decorated by (n)jit are casted to python float/int types per default. This came as a surprise to me, as it breaks the return contract. I am curious if this can be overruled. Any help or explanation is much appreciated.
import numpy as np
import numba as nb
from numba import uint64
#nb.njit
def foo():
return np.float32(1)
#nb.njit(uint64())
def bar():
return np.float32(1)
float64_not_np_float32 = foo()
int64_not_np_uint64 = bar()
print(type(float64_not_np_float32))
print(foo.nopython_signatures)
print(type(int64_not_np_uint64))
print(bar.nopython_signatures)
--> <class 'float'>
--> [() -> float32]
--> <class 'int'>
--> [() -> uint64]
Expected behaviour:
def foo():
return np.float32(1)
np_float32 = foo()
print(type(np_float32))
--> <class 'numpy.float32'>

Related

I get TypeError: only size-1 arrays can be converted to Python scalars, how to fix that

import numpy as np from math import * from matplotlib.pyplot import* def f(x,y) : return np.exp(-2*x)((sqrt(3)/6)*sin(2*sqrt(3)*x)+(1/2)*cos(2*sqrt(3)*x)) dx=0.1 a=0 b=6 N= int((b-a)/dx) x=np.linspace(a,b,N+1) y=np.zeros(N+1) y[0] = 6 y[1] = y[0] for i in range (N-1): y[i+2]=(2-4*dx)*y[i+1]+(-1+4*dx-16*dx**2)*y[i] Y2 = f(x,y) Err = abs (Y2-y) plot(x,y,'r',x,Y2,'b') show () plot(x,Err) show()
how to fix that? i kept get TypeError: only size-1 arrays can be converted to Python scalars
First of all don't give code examples like that please.
I cleaned up your code and I think you can use this:
import numpy as np
from math import *
from matplotlib.pyplot import*
def f(x,y) :
return np.exp(-2*x)*((np.sqrt(3)/6)*np.sin(2*np.sqrt(3)*x)+(1/2)*np.cos(2*np.sqrt(3)*x))
dx=0.1
a=0
b=6
N= int((b-a)/dx)
x=np.linspace(a,b,N+1)
y=np.zeros(N+1)
y[0] = 6
y[1] = y[0]
for i in range (N-1):
y[i+2]=(2-4*dx)*y[i+1]+(-1+4*dx-16*dx**2)*y[i]
Y2 = f(x,y)
Err = abs (Y2-y)
plot(x,y,'r',x,Y2,'b')
plot(x,Err)

why is numpy array defaulted to global?

I find it strange that the following program passes a numpy array around like a global variable even though I have not defined it to be global inside a def:
import numpy as np
a = np.zeros([5])
a[:] = np.array([1,2,3,4,5])
print("outside a is ",a)
def func1():
print("func1: a is ",a)
func1()
def func2():
a[2] = a[2]*2
print("func2: a is ",a)
func2()
print("outside a is ",a)
What am I missing here?

How do you add dataclasses as valid index values to a plotly chart?

I am trying to switch from the matplotlib pandas plotting backend to plotly. However, I am being held back by a common occurrence of this error:
TypeError: Object of type Quarter is not JSON serializable
Where Quarter is a dataclass in my codebase.
For a minimal example, consider:
#dataclass
class Foo:
val:int
df = pd.DataFrame({'x': [Foo(i) for i in range(10)], 'y':list(range(10))})
df.plot.scatter(x='x', y='y')
As expected, the above returns:
TypeError: Object of type Foo is not JSON serializable
Now, I don't expect plotly to be magical, but adding a __float__ magic method allows the Foo objects to be used with the matplotlib backend:
# This works
#dataclass
class Foo:
val:int
def __float__(self):
return float(self.val)
df = pd.DataFrame({'x': [Foo(i) for i in range(10)], 'y':list(range(10))})
df.plot.scatter(x='x', y='y')
How can I update my dataclass to allow for it to be used with the plotly backend?
You can get pandas to cast to float before invoking plotting backend.
from dataclasses import dataclass
import pandas as pd
#dataclass
class Foo:
val:int
def __float__(self):
return float(self.val)
df = pd.DataFrame({'x': [Foo(i) for i in range(10)], 'y':list(range(10))})
df["x"].astype(float)
pd.options.plotting.backend = "plotly"
df.assign(x=lambda d: d["x"].astype(float)).plot.scatter(x='x', y='y')
monkey patching
if you don't want to change code, you can monkey patch the plotly implementation of pandas plotting API
https://pandas.pydata.org/pandas-docs/stable/development/extending.html#plotting-backends
from dataclasses import dataclass
import pandas as pd
import wrapt, json
import plotly
#wrapt.patch_function_wrapper(plotly, 'plot')
def new_plot(wrapped, instance, args, kwargs):
try:
json.dumps(args[0][kwargs["x"]])
except TypeError:
args[0][kwargs["x"]] = args[0][kwargs["x"]].astype(float)
return wrapped(*args, **kwargs)
#dataclass
class Foo:
val:int
def __float__(self):
return float(self.val)
df = pd.DataFrame({'x': [Foo(i) for i in range(10)], 'y':list(range(10))})
df["x"].astype(float)
pd.options.plotting.backend = "plotly"
df.plot.scatter(x='x', y='y')

error with pyspark foreachPartition() code restructuring

My code structure is as follows:
class SampleClass
def __init__(self, spark):
df = (spark.read.table("sampletable").repartition(100))
def func1(partition):
print(partition.getPartitionId)
print(partition.count())
// some operations on partition DF
df.rdd.foreachPartition(func1)
def main():
s = SampleClass()
if __name__ == "__main__":
main()
But I get an error like:
Could not serialize object: TypeError: can't pickle _thread.RLock objects
how can i fix this calling structure? I am using databricks runtime 7.3LTS and spark 3.0.
Thanks.

Invoke root_scalar from a numba decorated function

The following code fails when find_root is decorated with nb.jit. This is a toy example, but the idea is to have the ability to find the root of a scalar function (or potentially a multivariate function using root) for an array of values and store them in a numpy array.
Error message: TypingError: cannot determine Numba type of <class 'function'>
import numba as nb
import numpy as np
from scipy.optimize import root_scalar
a = 3.0
b = 1.0
c = -10.5
#nb.jit(nopython=True)
def f(x):
return a*x**2 + b*x + c
#nb.jit(nopython=True)
def fprime(x):
return 2*a*x + b
#nb.jit(nopython=True)
def fprime2(x):
return 2*a
#nb.jit(nopython=True) # <-- Commenting this line makes the code work but it is slow
def findroot(arr):
for i in range(len(arr)):
arr[i] = root_scalar(f, fprime=fprime, fprime2=fprime2, x0=0).root
if __name__ == '__main__':
arr = np.zeros(20, np.float)
import timeit
start = timeit.time.process_time()
findroot(arr)
end = timeit.time.process_time()
print(end - start)