Invoke root_scalar from a numba decorated function - numpy

The following code fails when find_root is decorated with nb.jit. This is a toy example, but the idea is to have the ability to find the root of a scalar function (or potentially a multivariate function using root) for an array of values and store them in a numpy array.
Error message: TypingError: cannot determine Numba type of <class 'function'>
import numba as nb
import numpy as np
from scipy.optimize import root_scalar
a = 3.0
b = 1.0
c = -10.5
#nb.jit(nopython=True)
def f(x):
return a*x**2 + b*x + c
#nb.jit(nopython=True)
def fprime(x):
return 2*a*x + b
#nb.jit(nopython=True)
def fprime2(x):
return 2*a
#nb.jit(nopython=True) # <-- Commenting this line makes the code work but it is slow
def findroot(arr):
for i in range(len(arr)):
arr[i] = root_scalar(f, fprime=fprime, fprime2=fprime2, x0=0).root
if __name__ == '__main__':
arr = np.zeros(20, np.float)
import timeit
start = timeit.time.process_time()
findroot(arr)
end = timeit.time.process_time()
print(end - start)

Related

ThreadPoolExecutor DataFrame

I am dealing with a simple loop.
I have a slightly larger dataframe and I would like to use the processor (currently 2%).
I tried this:
import pandas as pd
import numpy as np
import time
from concurrent.futures import ThreadPoolExecutor
scan = pd.DataFrame([[0,2,3,5],[4,2,7,7], [5,6,2,3]], columns=['st1','nd1','st2','nd2'])
def task(value):
calc_all = pd.DataFrame()
for i in range(0,3,2):
j=i+1
calc = pd.concat([pd.DataFrame(scan.iloc[:,i]), pd.DataFrame(scan.iloc[:,j])],axis=1)
calc['th'] = calc.iloc[:,0] + calc.iloc[:,1]
calc_all = pd.concat([calc_all, calc], axis=1)
time.sleep(1) #tested time
return calc_all
if __name__ == '__main__':
with ThreadPoolExecutor(2) as exe:
for result in exe.map(task, range(2)):
print(result)
It's not faster. What did I do wrong?

Symbolic use of hyp1f1 with sympy

How to write hypergeometric function 1F1 in sympy?
I have the following code and I would like to correctly write the hypergeometric function 1F1 into the expression y. Could you explain how to do it?
import sympy as sp
import scipy as sc
import numpy as np
def Psii():
r=sp.symbols('r')
n = 2
y = sp.functions.special.hyper.hyper([1,1],[1],d) / n ** 2
yprime = sp.diff(y,r)
f = sp.utilities.lambdify(r, yprime, "numpy")
return y, yprime, f(3)
print(Psii())

How improve this Matplotlib animation?

I have written a program that plots a logarithm gradually. However, there are two issues:
When the animation is completed, and next I minimize the animation window and then maximize it again, the graph is gone.
When I close the animation window while it is still plotting, then the thread myDataLoop continues, and still prints 'done' after some time. The next time I run the program, it will take longer to start the animation, and the animation becomes glitchy. (This continues until I restart the kernel)
How can I solve this?
import numpy as np
import time
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
from PyQt5.QtGui import *
import matplotlib
matplotlib.use("Qt5Agg")
from matplotlib.figure import Figure
from matplotlib.animation import TimedAnimation
from matplotlib.lines import Line2D
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
import threading
#Time in seconds needed to construct the figure
Time=10
Modified_length=10**9
#We plot a log function
cumulative=[]
for i in range(1,int(Modified_length/8000)+1):
cumulative.append(np.log(i*8000))
class CustomMainWindow(QMainWindow):
def __init__(self):
super(CustomMainWindow, self).__init__()
# Define the geometry of the main window
self.setGeometry(0, 30, 1600, 830)
self.setWindowTitle(" ")
# Create FRAME_A
self.FRAME_A = QFrame(self)
self.FRAME_A.setStyleSheet("QWidget { background-color: %s }" % QColor(210,210,235,255).name())
self.LAYOUT_A = QGridLayout()
self.FRAME_A.setLayout(self.LAYOUT_A)
self.setCentralWidget(self.FRAME_A)
# Place the matplotlib figure
self.myFig = CustomFigCanvas()
self.LAYOUT_A.addWidget(self.myFig, *(0,1))
# Add the callbackfunc to ..
myDataLoop = threading.Thread(name = 'myDataLoop', target = dataSendLoop, daemon = True, args = (self.addData_callbackFunc,))
myDataLoop.start()
self.show()
return
def addData_callbackFunc(self, value):
# print("Add data: " + str(value))
self.myFig.addData(value)
return
''' End Class '''
class CustomFigCanvas(FigureCanvas, TimedAnimation):
def __init__(self):
self.addedData = []
# print(matplotlib.__version__)
# The data
self.xlim = int(Modified_length)
self.n = np.linspace(0, self.xlim - 1, int(self.xlim/8000))
self.y=[0]
# The window
self.fig = Figure(figsize=(5,5), dpi=120)
self.ax1 = self.fig.add_subplot(111)
# self.ax1 settings
self.line1 = Line2D([], [], color='blue')
self.ax1.add_line(self.line1)
self.ax1.set_xlim(0, self.xlim - 1)
self.ax1.set_ylim(0,100)
self.ax2 = self.ax1.twinx()
FigureCanvas.__init__(self, self.fig)
TimedAnimation.__init__(self, self.fig, interval = 50, blit = True)
return
def new_frame_seq(self):
return iter(range(self.n.size))
def _init_draw(self):
lines = [self.line1]
for l in lines:
l.set_data([], [])
return
def addData(self, value):
self.addedData.append(value)
return
def _step(self, *args):
# Extends the _step() method for the TimedAnimation class.
try:
TimedAnimation._step(self, *args)
except Exception:
TimedAnimation._stop(self)
pass
return
def _draw_frame(self, framedata):
global Q
while(len(self.addedData) > 0):
self.y=np.append(self.y,self.addedData[0])
del(self.addedData[0])
l=len(self.y)
self.line1.set_data(self.n[ 0 : l], self.y[ 0 : l ])
self._drawn_artists = [self.line1]
return
''' End Class '''
# You need to setup a signal slot mechanism, to
# send data to your GUI in a thread-safe way.
# Believe me, if you don't do this right, things
# go very very wrong..
class Communicate(QObject):
data_signal = pyqtSignal(float)
''' End Class '''
def dataSendLoop(addData_callbackFunc):
# Setup the signal-slot mechanism.
mySrc = Communicate()
mySrc.data_signal.connect(addData_callbackFunc)
# Use the log data
# n = np.linspace(0, int(Modified_length)-1, int(Modified_length))
totaly=cumulative
i = 0
while(True):
if(i > int(Modified_length/8000)-1):
print('done')
break
i = 0
time.sleep(100*Time/Modified_length)
for j in range(100):
mySrc.data_signal.emit(totaly[i]) # <- Here you emit a signal!
i += 1
###
###
if __name__== '__main__':
app = QApplication(sys.argv)
QApplication.setStyle(QStyleFactory.create('Plastique'))
myGUI = CustomMainWindow()
sys.exit(app.exec_())

python numpy vectorize an array of object instances

I'd like to encapsulate my calc function and all its parameters inside an object, but vectorize the execution for millions of objects much like how numpy would do it. Any suggestions?
the calculation is still basic arithmetic which numpy should be able to vectorize.
Example code:
import numpy as np
myarray = np.random.rand(3, 10000000)
############################# This works fine: FAST ###################################
def calc(a,b,c):
return (a+b/c)**b/a
res1 = calc(*myarray) #0.7 seconds
############################# What I'd like to do (unsuccessfully): SLOW ###################################
class MyClass():
__slots__ = ['a','b','c']
def __init__(self, a,b,c):
self.a, self.b, self.c = a,b,c
def calc(self):
return (self.a + self.b / self.c) ** self.b / self.a
def classCalc(myClass:MyClass):
return myClass.calc()
vectorizedClassCalc = np.vectorize(classCalc)
myobjects = np.array([MyClass(*args) for args in myarray.transpose()])
res2 = vectorizedClassCalc(myobjects) #8 seconds no different from a list comprehension
res3 = [obj.calc() for obj in myobjects] #7.5 seconds
perhaps pandas has additional features?

Tensorflow - Read and Save TFRecords to Dict and Use Multiprocessing

I am trying to speed up the conversion of select tfrecords to a series of python dictionaries. Here's what I have. Initially the CPU utilization spikes, but then goes to almost zero, suggesting my code is not working correctly.
My goal is to have 3 dictionaries saved and pickled. There are 14,000+ tfrecord files (2 gigs appx). At the current rate, it will take about 84 hours to run on a single process.
Are there any problems with my use of manage dicts
import glob
import tensorflow as tf
import cPickle
import numpy as np
from tqdm import tqdm
import collections
from multiprocessing import Process, Manager, Pool
def get_multihot_encoding(example_label):
enc = np.zeros(10)
for label in example_label:
if label in lookup.values():
index = lookup_inverted[label]
enc[index] = 1
return list(enc)
# Set-up MultiProcessing
manager = Manager()
audio_embeddings_dict = manager.dict()
audio_labels_dict = manager.dict()
audio_multihot_dict = manager.dict()
sess = tf.Session()
# The iterable which gets passed to the function
all_tfrecord_filenames = glob.glob('/Users/jeff/features/audioset_v1_embeddings/unbal_train/*.tfrecord')
def process_tfrecord(tfrecord):
for idx, example in enumerate(tf.python_io.tf_record_iterator(tfrecord)):
tf_example = tf.train.Example.FromString(example)
vid_id = tf_example.features.feature['video_id'].bytes_list.value[0].decode(encoding='UTF-8')
example_label = list(np.asarray(tf_example.features.feature['labels'].int64_list.value))
# Non zero intersect of 2 sets is True - only create dict entries if this is true!
if set(example_label) & label_filters:
print(set(example_label) & label_filters, " Is the intersection of the two")
tf_seq_example = tf.train.SequenceExample.FromString(example)
n_frames = len(tf_seq_example.feature_lists.feature_list['audio_embedding'].feature)
audio_frame = []
for i in range(n_frames):
audio_frame.append(tf.cast(tf.decode_raw(
tf_seq_example.feature_lists.feature_list['audio_embedding'].feature[i].bytes_list.value[0],tf.uint8)
,tf.float32).eval(session=sess))
audio_embeddings_dict[vid_id] = audio_frame
audio_labels_dict[vid_id] = example_label
audio_multihot_dict[vid_id] = get_multihot_encoding(example_label)
#print(get_multihot_encoding(example_label), "Is the encoded label")
if idx % 100 == 0:
print ("Saving dictionary at loop: {}".format(idx))
cPickle.dump(audio_embeddings_dict, open('audio_embeddings_dict_unbal_train_multi_{}.pkl'.format(idx), 'wb'))
cPickle.dump(audio_multihot_dict, open('audio_multihot_dict_bal_untrain_multi_{}.pkl'.format(idx), 'wb'))
cPickle.dump(audio_multihot_dict, open('audio_labels_unbal_dict_multi_{}.pkl'.format(idx), 'wb'))
pool = Pool(50)
result = pool.map(process_tfrecord, all_tfrecord_filenames)