Calling Numba-generated PyCFunctionWithKeywords from Python - numpy

I serialized a jitted Numba function to a byte array and now want to deserialize and call it. This works fine for primitive data types with llvm_cfunc_wrapper_name:
import numba, ctypes
import llvmlite.binding as llvm
#numba.njit("f8(f8)")
def foo(x):
return x + 0.5
# serialize function to byte array
sig = foo.signatures[0]
lib = foo.overloads[sig].library
cfunc_name = foo.overloads[sig].fndesc.llvm_cfunc_wrapper_name
function_bytes = lib._get_compiled_object()
# deserialize function_bytes to func
llvm.initialize()
llvm.initialize_native_target()
llvm.initialize_native_asmprinter()
target = llvm.Target.from_default_triple()
target_machine = target.create_target_machine()
backing_mod = llvm.parse_assembly("")
engine = llvm.create_mcjit_compiler(backing_mod, target_machine)
engine.add_object_file(llvm.ObjectFileRef.from_data(function_bytes))
func_ptr = engine.get_function_address(cfunc_name)
func = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double)(func_ptr)
print(func(0.25))
But I want to call functions with NumPy arguments. There is a llvm_cpython_wrapper_name for that which uses PyCFunctionWithKeywords, but unfortunately my best guess segfaults:
import numba, ctypes
import llvmlite.binding as llvm
import numpy as np
#numba.njit("f8[:](f8[:])")
def foo(x):
return x + 0.5
# serialize function to byte array
sig = foo.signatures[0]
lib = foo.overloads[sig].library
cpython_name = foo.overloads[sig].fndesc.llvm_cpython_wrapper_name
function_bytes = lib._get_compiled_object()
# deserialize function_bytes to func
llvm.initialize()
llvm.initialize_native_target()
llvm.initialize_native_asmprinter()
target = llvm.Target.from_default_triple()
target_machine = target.create_target_machine()
backing_mod = llvm.parse_assembly("")
engine = llvm.create_mcjit_compiler(backing_mod, target_machine)
engine.add_object_file(llvm.ObjectFileRef.from_data(function_bytes))
func_ptr = engine.get_function_address(cpython_name)
def func(*args, **kwargs):
py_obj_ptr = ctypes.POINTER(ctypes.py_object)
return ctypes.CFUNCTYPE(py_obj_ptr, py_obj_ptr, py_obj_ptr, py_obj_ptr)(func_ptr)(
ctypes.cast(id(None), py_obj_ptr),
ctypes.cast(id(args), py_obj_ptr),
ctypes.cast(id(kwargs), py_obj_ptr))
# segfaults here
print(func(np.ones(3)))
Here are some links to Numba source code (unfortunately very hard to follow), which might be helpful to figure this out.
https://github.com/numba/numba/blob/61ec1fd0f69aeadece218dccf4c39ebc5c7dfbc4/numba/core/callwrapper.py#L105
https://github.com/numba/numba/blob/61ec1fd0f69aeadece218dccf4c39ebc5c7dfbc4/numba/core/pythonapi.py#L1456

Related

can i use OR-tools for TSP with partial distance matrix (for a huge set of nodes)?

i'm trying to solve tsp with OR-tools for a problem of something like 80,000 nodes, the problem is, I need a huge distance matrix that takes to much memory ,so its infeasible and i don't get a solution.
so:
is there an option to work with partial distance matrix in or-tools?
if not is there a way to improve my code?
is there another external solver that can work for this task in python?
import math
from collections import namedtuple
import random
import time
from collections import namedtuple
from sklearn.metrics.pairwise import euclidean_distances
import numpy as np
import numba
from scipy.spatial import distance_matrix
from sklearn.metrics.pairwise import euclidean_distances
from math import sqrt
Point = namedtuple("Point", ['x', 'y'])
def solve_it(input_data):
# Modify this code to run your optimization algorithm
global POINTS
# parse the input
lines = input_data.split('\n')
nodeCount = int(lines[0])
points = []
for i in range(1, nodeCount+1):
line = lines[i]
parts = line.split()
points.append(Point(float(parts[0]), float(parts[1])))
#2.routing with or tools
def dist_matrix(nodeCount,points):
data=[]
for k in range(len(points)):
data.append([int(points[k].x),int(points[k].y)])
D=euclidean_distances(data, data)
return D
def create_data_model(D):
"""Stores the data for the problem."""
data = {}
data['distance_matrix'] = D # yapf: disable
data['num_vehicles'] = 1
data['depot'] = 0
return data
def print_solution(manager, routing, solution):
index = routing.Start(0)
plan_output = []#Route for vehicle 0:\n'
route_distance = 0
while not routing.IsEnd(index):
plan_output.append(manager.IndexToNode(index))
index = solution.Value(routing.NextVar(index))
return plan_output
def or_main(nodeCount,points):
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
"""Entry point of the program."""
# Instantiate the data problem.
global sol
D=dist_matrix(nodeCount,points)
data = create_data_model(D)
# Create the routing index manager.
manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']),
data['num_vehicles'], data['depot'])
# Create Routing Model.
routing = pywrapcp.RoutingModel(manager)
def distance_callback(from_index, to_index):
"""Returns the distance between the two nodes."""
# Convert from routing variable Index to distance matrix NodeIndex.
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
return data['distance_matrix'][from_node][to_node]
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
# Define cost of each arc.
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.local_search_metaheuristic = (
routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
k = 100
if nodeCount <= 100:
k = 30
elif 100 <= nodeCount <= 1000:
k = 300
elif nodeCount > 1000:
k = 17000
search_parameters.time_limit.seconds =k
search_parameters.log_search = True
# Solve the problem.
solution = routing.SolveWithParameters(search_parameters)
# #print solution on console.
if solution:
sol=print_solution(manager, routing, solution)
return sol
######################################################################
solution=or_main(nodeCount,points)
# calculate the length of the tour
obj = length(points[solution[-1]], points[solution[0]])
for index in range(0, nodeCount-1):
obj += length(points[solution[index]], points[solution[index+1]])
# prepare the solution in the specified output format
output_data = '%.2f' % obj + ' ' + str(0) + '\n'
output_data += ' '.join(map(str, solution))
return output_data
if __name__ == '__main__':
import sys
if len(sys.argv) > 1:
file_location = sys.argv[1].strip()
with open(file_location, 'r') as input_data_file:
input_data = input_data_file.read()
#print(solve_it(input_data))
else:
print('This test requires an input file. Please select one from the data directory. (i.e. python solver.py ./data/tsp_51_1)')

Numpy , OOP and callables

I'm implementing a Markov Chain Montecarlo with metropolis and barkes alphas for numerical integration. I've created a class called MCMCIntegrator(). I've loaded it with some attributes, one of then is the pdf of the function (a lambda) we're trying to integrate called g.
import numpy as np
import scipy.stats as st
class MCMCIntegrator:
def __init__(self):
self.g = lambda x: st.gamma.pdf(x, 0, 1, scale=1 / 1.23452676)*np.abs(np.cos(1.123454156))
self.size = 10000
self.std = 0.6
self.real_int = 0.06496359
There are other methods in this class, the size is the size of the sample that the class must generate, std is the standard deviation of the Normal Kernel, which you will see in a few seconds. The real_int is the value of the integral from 1 to 2 of the function we're integrating. I've generated it with a R script. Now, to the problem.
def _chain(self, method=None):
"""
Markov chain heat-up with burn-in
:param method: Metrpolis or barker alpha
:return: np.array containing the sample
"""
old = 0
sample = np.zeros(int(self.size * 1.5))
i = 0
if method:
def alpha(a, b): return min(1, self.g(b) / self.g(a))
else:
def alpha(a, b): return self.g(b) / (self.g(a) + self.g(b))
while i != len(sample):
if new < 0:
new = st.norm(loc=old, scale=self.std).rvs()
alpha = alpha(old, new)
u = st.uniform.rvs()
if alpha > u:
sample[i] = new
old = new
i += 1
return np.array(sample)
When I call the _chain() method, this is the following error:
44 while i != len(sample):
45 new = st.norm(loc=old, scale=self.std).rvs()
---> 46 alpha = alpha(old, new)
47 u = st.uniform.rvs()
48
TypeError: 'numpy.float64' object is not callable
alpha returns a nnumpy.float, but I don't know why it's saying it's not callable.
You define a method named alpha based on some condition in an 'early' section of the code:
if method:
def alpha(a, b): return min(1, self.g(b) / self.g(a))
else:
def alpha(a, b): return self.g(b) / (self.g(a) + self.g(b))
and then in the while loop (a 'later' part of the code), you assign the return value of this function to a variable named alpha.
Since the names of these two objects are same, and the variable has been declared later in the code, without the function being re-declared anywhere after this variable creation, the variable replaces the function in the namespace and now you can't make calls to alpha anymore, because it has ceased to be a function.
If it is not a hindrance to your program logic (doesn't seem to be), renaming the variable to some other nice name would be okay.

could not convert string to float in python

i try to analysis the Principle Component from cvs file but when i run the code i get this error
C:\Users\Lenovo\Desktop>python pca.py
ValueError: could not convert string to float: Annee;NET;INT;SUB;LMT;DCT;IMM;EXP;VRD
this is my cvs file
i try to remove any space and any think
this is my python script, i don't know what i miss
Note: i run this code under python2.7
from sklearn.externals import joblib
import numpy as np
import glob
import os
import time
import numpy
my_matrix = numpy.loadtxt(open("pca.csv","rb"),delimiter= ",",skiprows=0)
def pca(dataMat, r, autoset_r=False, autoset_rate=0.9):
"""
purpose: principal components analysis
"""
print("Start to do PCA...")
t1 = time.time()
meanVal = np.mean(dataMat, axis=0)
meanRemoved = dataMat - meanVal
# normData = meanRemoved / np.std(dataMat)
covMat = np.cov(meanRemoved, rowvar=0)
eigVals, eigVects = np.linalg.eig(np.mat(covMat))
eigValIndex = np.argsort(-eigVals)
if autoset_r:
r = autoset_eigNum(eigVals, autoset_rate)
print("autoset: take top {} of {} features".format(r, meanRemoved.shape[1]))
r_eigValIndex = eigValIndex[:r]
r_eigVect = eigVects[:, r_eigValIndex]
lowDDataMat = meanRemoved * r_eigVect
reconMat = (lowDDataMat * r_eigVect.T) + meanVal
t2 = time.time()
print("PCA takes %f seconds" %(t2-t1))
joblib.dump(r_eigVect, './pca_args_save/r_eigVect.eig')
joblib.dump(meanVal, './pca_args_save/meanVal.mean')
return lowDDataMat, reconMat
def autoset_eigNum(eigValues, rate=0.99):
eigValues_sorted = sorted(eigValues, reverse=True)
eigVals_total = eigValues.sum()
for i in range(1, len(eigValues_sorted)+1):
eigVals_sum = sum(eigValues_sorted[:i])
if eigVals_sum / eigVals_total >= rate:
break
return i
It seemed that NumPy has some problem parsing your index row to float.
Try setting skiprows = 1 in your np.readtxt command in order to skip the table header.

Python multiprocessing how to update a complex object in a manager list without using .join() method

I started programming in Python about 2 months ago and I've been struggling with this problem in the last 2 weeks.
I know there are many similar threads to this one but I can't really find a solution which suits my case.
I need to have the main process which is the one which interacts with Telegram and another process, buffer, which understands the complex object received from the main and updates it.
I'd like to do this in a simpler and smoother way.
At the moment objects are not being updated due to the use of multi-processing without the join() method.
I tried then to use multi-threading instead but it gives me compatibility problems with Pyrogram a framework which i am using to interact with Telegram.
I wrote again the "complexity" of my project in order to reproduce the same error I am getting and in order to get and give the best help possible from and for everyone.
a.py
class A():
def __init__(self, length = -1, height = -1):
self.length = length
self.height = height
b.py
from a import A
class B(A):
def __init__(self, length = -1, height = -1, width = -1):
super().__init__(length = -1, height = -1)
self.length = length
self.height = height
self.width = width
def setHeight(self, value):
self.height = value
c.py
class C():
def __init__(self, a, x = 0, y = 0):
self.a = a
self.x = x
self.y = y
def func1(self):
if self.x < 7:
self.x = 7
d.py
from c import C
class D(C):
def __init__(self, a, x = 0, y = 0, z = 0):
super().__init__(a, x = 0, y = 0)
self.a = a
self.x = x
self.y = y
self.z = z
def func2(self):
self.func1()
main.py
from b import B
from d import D
from multiprocessing import Process, Manager
from buffer import buffer
if __name__ == "__main__":
manager = Manager()
lizt = manager.list()
buffer = Process(target = buffer, args = (lizt, )) #passing the list as a parameter
buffer.start()
#can't invoke buffer.join() here because I need the below code to keep running while the buffer process takes a few minutes to end an instance passed in the list
#hence I can't wait the join() function to update the objects inside the buffer but i need objects updated in order to pop them out from the list
import datetime as dt
t = dt.datetime.now()
#library of kind of multithreading (pool of 4 processes), uses asyncio lib
#this while was put to reproduce the same error I am getting
while True:
if t + dt.timedelta(seconds = 10) < dt.datetime.now():
lizt.append(D(B(5, 5, 5)))
t = dt.datetime.now()
"""
#This is the code which looks like the one in my project
#main.py
from pyrogram import Client #library of kind of multithreading (pool of 4 processes), uses asyncio lib
from b import B
from d import D
from multiprocessing import Process, Manager
from buffer import buffer
if __name__ == "__main__":
api_id = 1234567
api_hash = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
app = Client("my_account", api_id, api_hash)
manager = Manager()
lizt = manager.list()
buffer = Process(target = buffer, args = (lizt, )) #passing the list as a parameter
buffer.start()
#can't invoke buffer.join() here because I need the below code to run at the same time as the buffer process
#hence I can't wait the join() function to update the objects inside the buffer
#app.on_message()
def my_handler(client, message):
lizt.append(complex_object_conatining_message)
"""
buffer.py
def buffer(buffer):
print("buffer was defined")
while True:
if len(buffer) > 0:
print(buffer[0].x) #prints 0
buffer[0].func2() #this changes the class attribute locally in the class instance but not in here
print(buffer[0].x) #prints 0, but I'd like it to be 7
print(buffer[0].a.height) #prints 5
buffer[0].a.setHeight(10) #and this has the same behaviour
print(buffer[0].a.height) #prints 5 but I'd like it to be 10
buffer.pop(0)
This is the whole code about the problem I am having.
Literally every suggestion is welcome, hopefully constructive, thank you in advance!
At last I had to change the way to solve this problem, which was using asyncio like the framework was doing as well.
This solution offers everything I was looking for:
-complex objects update
-avoiding the problems of multiprocessing (in particular with join())
It is also:
-lightweight: before I had 2 python processes 1) about 40K 2) about 75K
This actual process is about 30K (and it's also faster and cleaner)
Here's the solution, I hope it will be useful for someone else like it was for me:
The part of the classes is skipped because this solution updates complex objects absolutely fine
main.py
from pyrogram import Client
import asyncio
import time
def cancel_tasks():
#get all task in current loop
tasks = asyncio.Task.all_tasks()
for t in tasks:
t.cancel()
try:
buffer = []
firstWorker(buffer) #this one is the old buffer.py file and function
#the missing loop and loop method are explained in the next piece of code
except KeyboardInterrupt:
print("")
finally:
print("Closing Loop")
cancel_tasks()
firstWorker.py
import asyncio
def firstWorker(buffer):
print("First Worker Executed")
api_id = 1234567
api_hash = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
app = Client("my_account", api_id, api_hash)
#app.on_message()
async def my_handler(client, message):
print("Message Arrived")
buffer.append(complex_object_conatining_message)
await asyncio.sleep(1)
app.run(secondWorker(buffer)) #here is the trick: I changed the
#method run() of the Client class
#inside the Pyrogram framework
#since it was a loop itself.
#In this way I added another task
#to the existing loop in orther to
#let run both of them together.
my secondWorker.py
import asyncio
async def secondWorker(buffer):
while True:
if len(buffer) > 0:
print(buffer.pop(0))
await asyncio.sleep(1)
The resources to understand the asyncio used in this code can be found here:
Asyncio simple tutorial
Python Asyncio Official Documentation
This tutorial about how to fix classical Asyncio errors

Simliest and pythonic way to Serialize/Deserialize Object <->dict <-> JSON in Python3.5+

I am new to python, and I know there are many answers, but most of the use the __dict__ which is not present in Python3 any more.
Let's say I have object:
class A(object):
def __init__(self, f1, f2):
self.f1 = f1
self.f2 = f2
now the f2 is another object:
class F2(object):
def __init__(self, f21, f22):
self.f21 = f21
self.f22 = f22
So A object is complex object. What is the simpliest way to:
serialize A to dict and than to json.
and then to deserialize it back from json -> A
all in Python 3.5+ and possibly without additional imports as our internal company nexus is limited.
import pickle
import base64
import json
class A(object):
def __init__(self, f1, f2):
self.f1 = f1
self.f2 = f2
a = pickle.dumps(A, 3)
j = base64.b64encode(a).decode('utf-8')
x = json.dumps([j])
print(a)
print(j)
print(x)
# output:
# b'\x80\x03c__main__\nA\nq\x00.'
# gANjX19tYWluX18KQQpxAC4=
# ["gANjX19tYWluX18KQQpxAC4="]
Deserialization would be the inverse; json.loads, string.encode base64.b64decode, pickle.loads.