TypeError: int() argument must be a string, a bytes-like object or a number, not 'PSKeyword' - pdfminer

I tried this code
but a error given
TypeError: int() argument must be a string, a bytes-like object or a number, not 'PSKeyword'
from pdfminer.layout import LAParams
from pdfminer.converter import PDFResourceManager, PDFPageAggregator
from pdfminer.pdfpage import PDFPage
from pdfminer.layout import LTTextBoxHorizontal
document = open('Sample.pdf', 'rb')
#Create resource manager
rsrcmgr = PDFResourceManager()
# Set parameters for analysis.
laparams = LAParams()
# Create a PDF page aggregator object.
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
for page in PDFPage.get_pages(document):
# receive the LTPage object for the page.
layout = device.get_result()
for element in layout:
if instanceof(element, LTTextBoxHorizontal):


AttributeError: 'Tensor' object has no attribute 'numpy' while mapping a function through my dataset

I'm trying to map a function process_image to the dataset. This function calls another function, get_label. In get_label, I'm trying to retrieve the label's name from images.
The file path is like this: C:\\Users\\sis\\Desktop\\test\\0002_c1s1_000451_03.jpg. The label is number 0002.
def get_lab(file_path):
parts = tf.strings.split(file_path, os.path.sep)
return label
I solved it! I didn't understand exactly where the error was, I think the previous code mixed eager mode and graph mode, so I changed the code of get_label function and it worked!
def get_lab(file_path):
parts = tf.strings.split(file_path, os.path.sep)[-1]
part=tf.strings.split(parts, sep='_')[0]
return label
You may applied to folder name that create sufficeints used of external programs.
[ Sample ]:
import os
import tensorflow as tf
def get_lab(file_path):
parts = tf.strings.split(file_path, os.path.sep)
return label.numpy()
directory = "F:\\datasets\\downloads\\Actors\\train\\Candidt Kibt\\01.tif\\"
print( 'label as number: ' + str(get_lab( directory )) )
directory = "F:\\datasets\\downloads\\Actors\\train\\"
print( 'classname: ' + str(tf.io.gfile.listdir(
[ Output ]:
label as number: 1.0
classname: ['Candidt Kibt', 'Pikaploy']

Calling Numba-generated PyCFunctionWithKeywords from Python

I serialized a jitted Numba function to a byte array and now want to deserialize and call it. This works fine for primitive data types with llvm_cfunc_wrapper_name:
import numba, ctypes
import llvmlite.binding as llvm
def foo(x):
return x + 0.5
# serialize function to byte array
sig = foo.signatures[0]
lib = foo.overloads[sig].library
cfunc_name = foo.overloads[sig].fndesc.llvm_cfunc_wrapper_name
function_bytes = lib._get_compiled_object()
# deserialize function_bytes to func
target = llvm.Target.from_default_triple()
target_machine = target.create_target_machine()
backing_mod = llvm.parse_assembly("")
engine = llvm.create_mcjit_compiler(backing_mod, target_machine)
func_ptr = engine.get_function_address(cfunc_name)
func = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double)(func_ptr)
But I want to call functions with NumPy arguments. There is a llvm_cpython_wrapper_name for that which uses PyCFunctionWithKeywords, but unfortunately my best guess segfaults:
import numba, ctypes
import llvmlite.binding as llvm
import numpy as np
def foo(x):
return x + 0.5
# serialize function to byte array
sig = foo.signatures[0]
lib = foo.overloads[sig].library
cpython_name = foo.overloads[sig].fndesc.llvm_cpython_wrapper_name
function_bytes = lib._get_compiled_object()
# deserialize function_bytes to func
target = llvm.Target.from_default_triple()
target_machine = target.create_target_machine()
backing_mod = llvm.parse_assembly("")
engine = llvm.create_mcjit_compiler(backing_mod, target_machine)
func_ptr = engine.get_function_address(cpython_name)
def func(*args, **kwargs):
py_obj_ptr = ctypes.POINTER(ctypes.py_object)
return ctypes.CFUNCTYPE(py_obj_ptr, py_obj_ptr, py_obj_ptr, py_obj_ptr)(func_ptr)(
ctypes.cast(id(None), py_obj_ptr),
ctypes.cast(id(args), py_obj_ptr),
ctypes.cast(id(kwargs), py_obj_ptr))
# segfaults here
Here are some links to Numba source code (unfortunately very hard to follow), which might be helpful to figure this out.

ERROR in getting file from Amazon S3 using python(3.8)

I am trying to get data from Amazon S3 and store it into a variable (file such as .pkl file).
And I am getting the following error:
expected str, bytes or os.PathLike object, not _io.BytesIO
CODE for S3
class S3Mgr:
def __init__(self,bucketName):
self.aws_access_key_id= CONFIG.S3[0]
self.aws_secret_access_key = CONFIG.S3[1]
self.region_name = CONFIG.S3[2]
self.bucketName = bucketName
def __connect(self):
self.s3 = boto3.client(
def retrieveModel(self,fileName):
a = self.s3.download_fileobj(Bucket=self.bucketName, Key="fcm/project/"+str(fileName))
return a['Body'].read()
CODE for pickle
import pickle
from io import BytesIO
S3obj = S3mgr("mybucket")
model = S3obj.retrieveModel("model.pkl")
data = BytesIO(model)
model = pickle.load(data)
prediction = model.predict(inputArray)
Above inputArray is the array of inputs.
Try this, replace download_fileobj with get_object in your s3Mgr class's retrieveModel method.
Something like this:
def retrieveModel(self,fileName):
a = self.s3.get_object(Bucket=self.bucketName, Key="fcm/project/"+str(fileName))
return a['Body'].read()
I Hope, it works. Ping if any progress or need any help.
Cheers 👍 !

Getting error while converting base64 string into image using pyspark

I want to extract and process an image data (3D array) available in base64 format using pyspark. I'm using pandas_udf with pyarrow as a processing function. While parsing the base64 string into pandas_udf function, first I convert the base64 string into image. But, at this step I'm getting error as "TypeError: file() argument 1 must be encoded string without null bytes, not str."
I am using function base64.b64decode(imgString) to convert base64 string to image. I'm using python 2.7
interested_cols = ["id","name","image_b64"]
indexed_avrodf = avrodf.select(interested_cols)
ctx_cols = ["id","name"]
result_sdf = indexed_avrodf.groupby(ctx_cols).apply(img_proc)
schema = StructType([
#pandas_udf(schema, PandasUDFType.GROUPED_MAP)
def img_proc(df):
df['Proc_output'] = df['image_b64'].apply(is_processed)
return df
def is_processed(imgString):
import cv2
from PIL import Image, ImageDraw, ImageChops
import base64
wisimg = base64.b64decode(imgString)
image = Image.open(wisimg)
return processed_status

how to make R datafile to Python type

I want to make R datatype to Python datatype below is the whole code
def convert_datafiles(datasets_folder):
import rpy2.robjects
for root, dirs, files in os.walk(datasets_folder):
for name in files:
# sort out .RData files
if name.endswith('.RData'):
name_ = os.path.splitext(name)[0]
name_path = os.path.join(datasets_folder, name_)
# creat sub-directory
if not os.path.exists(name_path):
file_path = os.path.join(root, name)
robj = robjects.r.load(file_path)
# check out subfiles in the data frame
for var in robj:
###### error happend right here
myRData = pandas2ri.ri2py_dataframe( var )
####error happend right here
# convert to DataFrame
if not isinstance(myRData, pd.DataFrame):
myRData = pd.DataFrame(myRData)
var_path = os.path.join(datasets_folder,name_,var+'.csv')
os.remove(os.path.join(datasets_folder, name)) # clean up
print ("=> Success!")
I want to make R datatype to pythone type, but the error keeps popping up like this : AttributeError: 'str' object has no attribute 'dtype'
How should I do to resolve this error?
The rpy2 documentation is somewhat incomplete when it comes to interaction with pandas, but unit tests will provide examples of conversion. For example:
rdataf = robjects.r('data.frame(a=1:2, '
' b=I(c("a", "b")), '
' c=c("a", "b"))')
with localconverter(default_converter + rpyp.converter) as cv:
pandas_df = robjects.conversion.ri2py(rdataf)