I created a simple class that works without #jitclass. When I try to improve it with #jitclass it stops working. What is happening here? - numpy

Following example 12.4 from the following website https://python-programming.quantecon.org/numba.html#id4 i constructed a simple class to model an AR(1) process.
Although the code works fine without the use of #jitclass, the code stops working after I remove ("#").
import numpy as np
import numba
import matplotlib.pyplot as plt
from numba import float64
from numba import int32
from numba.experimental import jitclass
#ar_1_data = [('ρ', float64), ('z_0', float64), ('μ', float64), ('σ', float64)]
##jitclass(ar_1_data)
class ar_1:
def __init__(self, ρ = 0.5, z_0 = 1, μ = 0, σ = 1):
self.ρ = ρ
self.z = z_0
self.lnz = np.log(z_0)
self.μ = μ
self.σ = σ
def update(self):
self.z = self.z**(self.ρ) * np.e**(np.random.normal(self.μ,self.σ))
def sequence(self, n):
path = []
path_log = []
for i in range(n):
path.append(self.z)
path_log.append(np.log(self.z))
self.update()
self.sequence = path
self.sequence_log = path_log
a = ar_1()
a.sequence(100)
Here is the Error im getting after removing the "#":
---------------------------------------------------------------------------
TypingError Traceback (most recent call last)
Input In [83], in <cell line: 1>()
----> 1 a = ar_1()
2 a.sequence(100)
File ~\anaconda3\lib\site-packages\numba\experimental\jitclass\base.py:124, in JitClassType.__call__(cls, *args, **kwargs)
122 bind = cls._ctor_sig.bind(None, *args, **kwargs)
123 bind.apply_defaults()
--> 124 return cls._ctor(*bind.args[1:], **bind.kwargs)
File ~\anaconda3\lib\site-packages\numba\core\dispatcher.py:468, in _DispatcherBase._compile_for_args(self, *args, **kws)
464 msg = (f"{str(e).rstrip()} \n\nThis error may have been caused "
465 f"by the following argument(s):\n{args_str}\n")
466 e.patch_message(msg)
--> 468 error_rewrite(e, 'typing')
469 except errors.UnsupportedError as e:
470 # Something unsupported is present in the user code, add help info
471 error_rewrite(e, 'unsupported_error')
File ~\anaconda3\lib\site-packages\numba\core\dispatcher.py:409, in _DispatcherBase._compile_for_args.<locals>.error_rewrite(e, issue_type)
407 raise e
408 else:
--> 409 raise e.with_traceback(None)
TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Failed in nopython mode pipeline (step: nopython frontend)
Cannot resolve setattr: (instance.jitclass.ar_1#2658a3e4d30<ρ:float64,z_0:float64,μ:float64,σ:float64>).z = int64
File "..\..\..\..\..\AppData\Local\Temp\ipykernel_17336\4275445632.py", line 9:
<source missing, REPL/exec in use?>
During: typing of set attribute 'z' at C:\Users\Hogar\AppData\Local\Temp\ipykernel_17336\4275445632.py (9)
File "..\..\..\..\..\AppData\Local\Temp\ipykernel_17336\4275445632.py", line 9:
<source missing, REPL/exec in use?>
During: resolving callee type: jitclass.ar_1#2658a3e4d30<ρ:float64,z_0:float64,μ:float64,σ:float64>
During: typing of call at <string> (3)
During: resolving callee type: jitclass.ar_1#2658a3e4d30<ρ:float64,z_0:float64,μ:float64,σ:float64>
During: typing of call at <string> (3)
File "<string>", line 3:
<source missing, REPL/exec in use?>

Related

How to subset a 1-d array using a boolean 1-d array in numba decorated function?

I gotta say, numba seems to be usable only in extremely simplistic use cases carefully designed to be presented in talks
I can run the following code just fine:
def rt(hi):
for i in hi:
hi_ = i == hi
t = hi[hi_]
return None
rt(np.array(['a','b','c','d'],dtype='U'))
But, when i decorate the above code with njit:
#njit
def rt(hi):
for i in hi:
hi_ = i == hi
t = hi[hi_]
return None
rt(np.array(['a','b','c','d'],dtype='U'))
I get the following error:
---------------------------------------------------------------------------
TypingError Traceback (most recent call last)
<ipython-input-34-eadef1d0ecee> in <module>
5 t = hi[hi_]
6 return None
----> 7 rt(np.array(['a','b','c','d'],dtype='U'))
~/miniconda/envs/IndusInd_credit_cards_collections_scorecard_/lib/python3.8/site-packages/numba/core/dispatcher.py in _compile_for_args(self, *args, **kws)
418 e.patch_message(msg)
419
--> 420 error_rewrite(e, 'typing')
421 except errors.UnsupportedError as e:
422 # Something unsupported is present in the user code, add help info
~/miniconda/envs/IndusInd_credit_cards_collections_scorecard_/lib/python3.8/site-packages/numba/core/dispatcher.py in error_rewrite(e, issue_type)
359 raise e
360 else:
--> 361 raise e.with_traceback(None)
362
363 argtypes = []
TypingError: Failed in nopython mode pipeline (step: nopython frontend)
No implementation of function Function(<built-in function getitem>) found for signature:
>>> getitem(array([unichr x 1], 1d, C), Literal[bool](False))
There are 22 candidate implementations:
- Of which 20 did not match due to:
Overload of function 'getitem': File: <numerous>: Line N/A.
With argument(s): '(array([unichr x 1], 1d, C), bool)':
No match.
- Of which 1 did not match due to:
Overload in function 'GetItemBuffer.generic': File: numba/core/typing/arraydecl.py: Line 162.
With argument(s): '(array([unichr x 1], 1d, C), bool)':
Rejected as the implementation raised a specific error:
TypeError: unsupported array index type bool in [bool]
raised from /home/sarthak/miniconda/envs/IndusInd_credit_cards_collections_scorecard_/lib/python3.8/site-packages/numba/core/typing/arraydecl.py:68
- Of which 1 did not match due to:
Overload in function 'GetItemBuffer.generic': File: numba/core/typing/arraydecl.py: Line 162.
With argument(s): '(array([unichr x 1], 1d, C), Literal[bool](False))':
Rejected as the implementation raised a specific error:
TypeError: unsupported array index type Literal[bool](False) in [Literal[bool](False)]
raised from /home/sarthak/miniconda/envs/IndusInd_credit_cards_collections_scorecard_/lib/python3.8/site-packages/numba/core/typing/arraydecl.py:68
During: typing of intrinsic-call at <ipython-input-34-eadef1d0ecee> (5)
File "<ipython-input-34-eadef1d0ecee>", line 5:
def rt(hi):
<source elided>
hi_ = i == hi
t = hi[hi_]
^
How to subset a 1-d array using a boolean 1-d array in numba decorated function?

Numba / Numpy - Understanding Error Message

I'm experimenting with Numba to try and speed up a union-find algorithm I'm working on. Here's some example code. When I experiment with some sample data I cannot understand the type complaint that Numba appears to be raising.
from numba import jit
import numpy as np
indices = np.arange(8806806, dtype=np.int64)
sizes = np.ones(8806806, dtype=np.int64)
connected_components = 8806806
#jit(npython=True)
def root(p: int) -> int:
while p != indices[p]:
indices[p] = indices[indices[p]]
p = indices[p]
return p
#jit(npython=True)
def connected( p: int, q: int) -> bool:
return root(p) == root(q)
#jit(npython=True)
def union( p: int, q: int) -> None:
root1 = root(p)
root2 = root(q)
if root1 == root2:
return
if (sizes[root1] < sizes[root2]):
indices[root1] = root2
sizes[root2] += sizes[root1]
else:
indices[root2] = root1
sizes[root1] += sizes[root2]
connected_components -= 1
#jit(nopython=True)
def process_values(arr):
for row in arr:
typed_arr = row.astype('int64')
for first, second in zip(arr, arr[1:]):
union(first, second)
process_values(
np.array(
[np.array([8018361, 4645960]),
np.array([1137555, 7763897]),
np.array([7532943, 2248813]),
np.array([5352737, 71466, 3590473, 5352738, 2712260])], dtype='object'))
I cannot understand this error:
TypingError Traceback (most recent call last)
<ipython-input-45-62735e65f581> in <module>
44 np.array([1137555, 7763897]),
45 np.array([7532943, 2248813]),
---> 46 np.array([5352737, 71466, 3590473, 5352738, 2712260])], dtype='object'))
/opt/conda/lib/python3.7/site-packages/numba/core/dispatcher.py in _compile_for_args(self, *args, **kws)
399 e.patch_message(msg)
400
--> 401 error_rewrite(e, 'typing')
402 except errors.UnsupportedError as e:
403 # Something unsupported is present in the user code, add help info
/opt/conda/lib/python3.7/site-packages/numba/core/dispatcher.py in error_rewrite(e, issue_type)
342 raise e
343 else:
--> 344 reraise(type(e), e, None)
345
346 argtypes = []
/opt/conda/lib/python3.7/site-packages/numba/core/utils.py in reraise(tp, value, tb)
78 value = tp()
79 if value.__traceback__ is not tb:
---> 80 raise value.with_traceback(tb)
81 raise value
82
TypingError: Failed in nopython mode pipeline (step: nopython frontend)
non-precise type array(pyobject, 1d, C)
[1] During: typing of argument at <ipython-input-45-62735e65f581> (36)
File "<ipython-input-45-62735e65f581>", line 36:
def process_values(arr):
for row in arr:
^
Does this have anything to do with process_values taking an array of irregularly shaped arrays? Any pointers? Thanks!
the problem is that Numba does not accept arrays of dtype 'object'. You seem to be placing arrays inside arrays, you will have to use lists inside lists. Look for the typed.List class in Numba, https://numba.pydata.org/numba-doc/dev/reference/pysupported.html#typed-list
Alternatively, you can use awkward arrays: https://github.com/scikit-hep/awkward-1.0

Tesorflow Custom Layer in High level API: throws object has no attribute '_expects_mask_arg' error

I am trying to reconsturct an image based on three inputs of previous layers normal (None,128,128,3),albedo(None,128,128,3) and lighting(27) . But here the code still says object has no attribute '_expects_mask_arg' error .I have presented my code here in which I have implemented a custom layer using Tensorflow v2 beta using the high level API.
import math
class Reconstruction_Layer(tf.keras.layers.Layer):
def __init__(self,input_shape ):
super(Reconstruction_Layer, self).__init__()
#self.num_outputs = num_outputs
#self.pixel=np.zeros((9),dtype=int)
self.sphar=np.zeros((9),dtype=float)
self.y=np.zeros((9),dtype=float)
self.reconstructed_img=np.zeros((128,128,3),dtype=float)
#self.y=tf.zeros([128,128,9])
self.normal_light=np.zeros((128,128,9),dtype=float)
self.y_temp=np.zeros((9),dtype=float)
w_init = tf.random_normal_initializer()
self.r_img = tf.Variable(initial_value=w_init(shape=input_shape),dtype='float32',trainable=True)
def build(self,input_shape):
super(MyLayer, self).build(input_shape)
def call(self,input_layer):
self.normal,self.albedo,self.light = input_layer
for i in range(128):
for j in range(128):
#self.y=spherical_harmonic_calc(self.normal(i,j))
self.pixel=self.normal[i,j,:]
#self.normal_light(i,j)= self.y
self.sphar[0]=(1/((4*math.pi)**0.5))
self.sphar[1]=((3/(4*math.pi))**0.5)*self.pixel[2]
self.sphar[3]=(((3/(4*math.pi))**0.5)*self.pixel[1])
self.sphar[4]=((1/2)*((5/(4*math.pi))**0.5)*(3*(self.pixel[2]**2) - 1))
self.sphar[5]=(3*((5/(12*math.pi))**0.5)*self.pixel[2]*self.pixel[0])
self.sphar[6]=(3*((5/(12*math.pi))**0.5)*self.pixel[2]*self.pixel[1])
self.sphar[7]=((3/2)*((5/(12*math.pi))**0.5)*((self.pixel[0]**2)-(self.pixel[1]**2)))
self.sphar[8]=(3*((5/(12*math.pi))**0.5)*self.pixel[0]*self.pixel[1])
self.normal_light[i,j,:]=self.sphar
for j in range(128):
for k in range(128):
for i in range(3):
self.reconstructed_img[j,k,i]=self.albedo[j,k,i]* tf.tensordot(self.normal_light[j,k],self.light[i*9:(i+1)*9 ],axes=1)
self.reconstructed_img=tf.convert_to_tensor(self.reconstructed_img)
self.r_img=self.reconstructed_img
return self.r_img
"""
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-74-06759ef5b0b5> in <module>
1 import numpy as np
----> 2 x=Reconstruction_Layer((128,128,3))(d)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
580 # explicitly take priority.
581 input_masks = self._collect_input_masks(inputs, args, kwargs)
--> 582 if (self._expects_mask_arg and input_masks is not None and
583 not self._call_arg_was_passed('mask', args, kwargs)):
584 kwargs['mask'] = input_masks
AttributeError: 'Reconstruction_Layer' object has no attribute '_expects_mask_arg'
"""
I just had the same error and it was due to me forgetting to call .__init__() after super(). You did it, but this make me think that this error is due to wrong initialization of the base layer you are deriving from.
I notice that in the doc example it's not necessary to call build() on the base layer, and it works for me if you remove that function (as it does nothing related to your layer).

tf.keras.layers.Concatenate() works with a list but fails on a tuple of tensors

This will work:
tf.keras.layers.Concatenate()([features['a'], features['b']])
While this:
tf.keras.layers.Concatenate()((features['a'], features['b']))
Results in:
TypeError: int() argument must be a string or a number, not 'TensorShapeV1'
Is that expected? If so - why does it matter what sequence do I pass?
Thanks,
Zach
EDIT (adding a code example):
import pandas as pd
import numpy as np
data = {
'a': [1.0, 2.0, 3.0],
'b': [0.1, 0.3, 0.2],
}
with tf.Session() as sess:
ds = tf.data.Dataset.from_tensor_slices(data)
ds = ds.batch(1)
it = ds.make_one_shot_iterator()
features = it.get_next()
concat = tf.keras.layers.Concatenate()((features['a'], features['b']))
try:
while True:
print(sess.run(concat))
except tf.errors.OutOfRangeError:
pass
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-135-0e1a45017941> in <module>()
6 features = it.get_next()
7
----> 8 concat = tf.keras.layers.Concatenate()((features['a'], features['b']))
9
10
google3/third_party/tensorflow/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
751 # the user has manually overwritten the build method do we need to
752 # build it.
--> 753 self.build(input_shapes)
754 # We must set self.built since user defined build functions are not
755 # constrained to set self.built.
google3/third_party/tensorflow/python/keras/utils/tf_utils.py in wrapper(instance, input_shape)
148 tuple(tensor_shape.TensorShape(x).as_list()) for x in input_shape]
149 else:
--> 150 input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list())
151 output_shape = fn(instance, input_shape)
152 if output_shape is not None:
google3/third_party/tensorflow/python/framework/tensor_shape.py in __init__(self, dims)
688 else:
689 # Got a list of dimensions
--> 690 self._dims = [as_dimension(d) for d in dims_iter]
691
692 #property
google3/third_party/tensorflow/python/framework/tensor_shape.py in as_dimension(value)
630 return value
631 else:
--> 632 return Dimension(value)
633
634
google3/third_party/tensorflow/python/framework/tensor_shape.py in __init__(self, value)
183 raise TypeError("Cannot convert %s to Dimension" % value)
184 else:
--> 185 self._value = int(value)
186 if (not isinstance(value, compat.bytes_or_text_types) and
187 self._value != value):
TypeError: int() argument must be a string or a number, not 'TensorShapeV1'
https://github.com/keras-team/keras/blob/master/keras/layers/merge.py#L329
comment on the concanate class states it requires a list.
this class calls K.backend's concatenate function
https://github.com/keras-team/keras/blob/master/keras/backend/tensorflow_backend.py#L2041
which also states it requires a list.
in tensorflow https://github.com/tensorflow/tensorflow/blob/r1.12/tensorflow/python/ops/array_ops.py#L1034
also states it requires a list of tensors. Why? I don't know. in this function the tensors (variable called "values") actually gets checked if its a list or tuple. but somewhere along the way you still get an error.

How to fit two numpy matrices with Pyspark's SVM?

I have two numpy matrices like this:
Features:
(878049, 6)
<type 'numpy.ndarray'>
Labels:
(878049,)
<type 'numpy.ndarray'>
I was curious about if I can use Pyspark's random forests to fit the previous mentioned matrices. From the documentation, we have that RF algorithm can be used as follows:
model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},
numTrees=3, featureSubsetStrategy="auto",
impurity='gini', maxDepth=4, maxBins=32)
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
Thus, my questions are: do I need to transform the numpy arrays to an rdd or in which format should I need to convert the features and labels matrices in order to fit them with the RF implementation of MLlib?.
Update
Then from #CafeFeed answer I tried the following:
In [24]:
#CV
(trainingData, testData) = data.randomSplit([0.7, 0.3])
In [26]:
from pyspark.mllib.tree import DecisionTree, DecisionTreeModel
from pyspark.mllib.util import MLUtils
import numpy as np
​
# Train a DecisionTree model.
# Empty categoricalFeaturesInfo indicates all features are continuous.
​
model = DecisionTree.trainClassifier(trainingData, numClasses=np.unique(y))
​
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
print('Test Error = ' + str(testErr))
print('Learned classification tree model:')
print(model.toDebugString())
​
However, I got this exception:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-27-ded4b074521b> in <module>()
6 # Empty categoricalFeaturesInfo indicates all features are continuous.
7
----> 8 model = DecisionTree.trainClassifier(trainingData, numClasses=np.unique(y), categoricalFeaturesInfo={},impurity='gini', maxDepth=5, maxBins=32)
9
10 # Evaluate model on test instances and compute test error
/usr/local/Cellar/apache-spark/1.5.1/libexec/python/pyspark/mllib/tree.pyc in trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
183 """
184 return cls._train(data, "classification", numClasses, categoricalFeaturesInfo,
--> 185 impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
186
187 #classmethod
/usr/local/Cellar/apache-spark/1.5.1/libexec/python/pyspark/mllib/tree.pyc in _train(cls, data, type, numClasses, features, impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
124 assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
125 model = callMLlibFunc("trainDecisionTreeModel", data, type, numClasses, features,
--> 126 impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
127 return DecisionTreeModel(model)
128
/usr/local/Cellar/apache-spark/1.5.1/libexec/python/pyspark/mllib/common.pyc in callMLlibFunc(name, *args)
128 sc = SparkContext._active_spark_context
129 api = getattr(sc._jvm.PythonMLLibAPI(), name)
--> 130 return callJavaFunc(sc, api, *args)
131
132
/usr/local/Cellar/apache-spark/1.5.1/libexec/python/pyspark/mllib/common.pyc in callJavaFunc(sc, func, *args)
120 def callJavaFunc(sc, func, *args):
121 """ Call Java Function """
--> 122 args = [_py2java(sc, a) for a in args]
123 return _java2py(sc, func(*args))
124
/usr/local/Cellar/apache-spark/1.5.1/libexec/python/pyspark/mllib/common.pyc in _py2java(sc, obj)
86 else:
87 data = bytearray(PickleSerializer().dumps(obj))
---> 88 obj = sc._jvm.SerDe.loads(data)
89 return obj
90
/usr/local/Cellar/apache-spark/1.5.1/libexec/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
536 answer = self.gateway_client.send_command(command)
537 return_value = get_return_value(answer, self.gateway_client,
--> 538 self.target_id, self.name)
539
540 for temp_arg in temp_args:
/usr/local/Cellar/apache-spark/1.5.1/libexec/python/pyspark/sql/utils.pyc in deco(*a, **kw)
34 def deco(*a, **kw):
35 try:
---> 36 return f(*a, **kw)
37 except py4j.protocol.Py4JJavaError as e:
38 s = e.java_exception.toString()
/usr/local/Cellar/apache-spark/1.5.1/libexec/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
298 raise Py4JJavaError(
299 'An error occurred while calling {0}{1}{2}.\n'.
--> 300 format(target_id, '.', name), value)
301 else:
302 raise Py4JError(
Py4JJavaError: An error occurred while calling z:org.apache.spark.mllib.api.python.SerDe.loads.
: net.razorvine.pickle.PickleException: expected zero arguments for construction of ClassDict (for numpy.core.multiarray._reconstruct)
at net.razorvine.pickle.objects.ClassDictConstructor.construct(ClassDictConstructor.java:23)
at net.razorvine.pickle.Unpickler.load_reduce(Unpickler.java:701)
at net.razorvine.pickle.Unpickler.dispatch(Unpickler.java:171)
at net.razorvine.pickle.Unpickler.load(Unpickler.java:85)
at net.razorvine.pickle.Unpickler.loads(Unpickler.java:98)
at org.apache.spark.mllib.api.python.SerDe$.loads(PythonMLLibAPI.scala:1462)
at org.apache.spark.mllib.api.python.SerDe.loads(PythonMLLibAPI.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
at py4j.Gateway.invoke(Gateway.java:259)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:207)
at java.lang.Thread.run(Thread.java:745)
Docs are clear. You need RDD:
>>> from pyspark.mllib.regression import LabeledPoint
>>> from pyspark.mllib.tree import RandomForest
>>> import numpy as np
>>>
>>> np.random.seed(1)
>>> features = np.random.random((100, 10))
>>> labels = np.random.choice([0, 1], 100)
>>> data = sc.parallelize(zip(labels, features)).map(lambda x: LabeledPoint(x[0], x[1]))
>>> RandomForest.trainClassifier(data, numClasses=2, categoricalFeaturesInfo={}, numTrees=2)
TreeEnsembleModel classifier with 2 trees