backtrader #IndexError: list index out of range problem

backtrader #IndexError: list index out of range problem - pandas

The data is coming in as unix timestamp in milliseconds.
So i am using #dtformat=lambda x: datetime.datetime.utcfromtimestamp(int(x) / 1000).
import backtrader as bt
import datetime
cerebro = bt.Cerebro()
data = bt.feeds.GenericCSVData(dataname="ftx1h.csv", dtformat=lambda x: datetime.datetime.utcfromtimestamp(int(x) / 1000))
cerebro.adddata(data)
cerebro.run()
cerebro.plot()
But now there is an #IndexError: list index out of range problem.
Traceback (most recent call last):
File "/Users/talha/VScode/Backtest/Strategy5.py", line 7, in
cerebro.run()
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/backtrader/cerebro.py", line 1127, in run
runstrat = self.runstrategies(iterstrat)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/backtrader/cerebro.py", line 1212, in runstrategies
data.preload()
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/backtrader/feed.py", line 688, in preload
while self.load():
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/backtrader/feed.py", line 479, in load
_loadret = self._load()
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/backtrader/feed.py", line 710, in _load
return self._loadline(linetokens)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/backtrader/feeds/csvgeneric.py", line 148, in _loadline
csvfield = linetokens[csvidx]
IndexError: list index out of range
The Data
1660435200000,24454.0,24528.0,24355.0,24527.0,67147303.3027
1660438800000,24527.0,24682.0,24422.0,24598.0,116587503.2913
1660442400000,24598.0,24632.0,24514.0,24608.0,34892443.4722
1660446000000,24608.0,24635.0,24553.0,24584.0,22989205.8046
1660449600000,24584.0,24652.0,24506.0,24581.0,44973942.2299
1660453200000,24581.0,24636.0,24540.0,24603.0,47360860.8874
1660456800000,24613.0,24695.0,24603.0,24689.0,32459071.9839
1660460400000,24689.0,25065.0,24682.0,24900.0,301312472.1925
1660464000000,24900.0,24902.0,24608.0,24673.0,170952143.2637
1660467600000,24672.0,24790.0,24643.0,24758.0,70969336.6324
1660471200000,24758.0,24759.0,24618.0,24627.0,64221863.5382
1660474800000,24627.0,24682.0,24525.0,24569.0,76070172.2453
1660478400000,24569.0,24593.0,24475.0,24535.0,59713290.9481
1660482000000,24535.0,24538.0,24406.0,24481.0,89923365.2962
1660485600000,24481.0,24534.0,24466.0,24518.0,49835330.6131
1660489200000,24518.0,24549.0,24468.0,24528.0,46556537.1289
1660492800000,24528.0,24560.0,24313.0,24315.0,108465182.8929
1660496400000,24315.0,24332.0,24178.0,24232.0,176757581.0963
1660500000000,24232.0,24300.0,24155.0,24282.0,68322221.4964
1660503600000,24281.0,24327.0,24244.0,24289.0,34982570.2998
1660507200000,24289.0,24352.0,24255.0,24315.0,40799224.029
1660510800000,24315.0,24448.0,24278.0,24349.0,94709768.3767
1660514400000,24349.0,24372.0,24182.0,24265.0,75704499.2116
1660518000000,24265.0,24358.0,24240.0,24310.0,46011851.1185
1660521600000,24310.0,24418.0,24171.0,24406.0,90810435.293
1660525200000,24406.0,24665.0,24340.0,24655.0,233812433.3012
1660528800000,24654.0,24950.0,24653.0,24937.0,223521446.7061
1660532400000,24937.0,25232.0,24813.0,24883.0,373106086.273
1660536000000,24883.0,24900.0,24696.0,24863.0,172798798.982
1660539600000,24863.0,24998.0,24762.0,24835.0,110243603.1486
1660543200000,24835.0,24841.0,24088.0,24129.0,338761936.3576
1660546800000,24130.0,24218.0,23950.0,24026.0,287354302.5387
1660550400000,24025.0,24089.0,23873.0,24044.0,151992609.2842
1660554000000,24044.0,24210.0,23992.0,24176.0,132115004.5245
1660557600000,24180.0,24300.0,24133.0,24275.0,88762538.3025
1660561200000,24275.0,24316.0,24009.0,24057.0,116716391.6228
1660564800000,24057.0,24197.0,23891.0,23966.0,135401782.9878

Your data is all one really long line, at least when I pasted it into geany. Are there any line endings? Could just be a problem with my browser. This might be your problem.

Related

Data Length Error when Merging PDFs with PyPDF2

I am starting a project that will take specific pages out of each PDF in a folder and merge those pages into a single file. I am getting the error below when building the quoted code about the length of the encryption, and I don't know where I would need to address that.
from PyPDF2 import PdfFileMerger
import glob
files = glob.glob('C:/Users/Jake/Documents/UPLOAD/test_merge/*.pdf')
merger = PdfFileMerger()
for file in files:
merger.append(file)
merger.write("merged.pdf")
merger.close()
ERROR
Traceback (most recent call last):
File "C:\Users\Jake\Documents\Work Projects\Python\Contract Merger\Merger .02", line 10, in <module>
merger.write("merged.pdf")
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_merger.py", line 312, in write
my_file, ret_fileobj = self.output.write(fileobj)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_writer.py", line 838, in write
self.write_stream(stream)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_writer.py", line 811, in write_stream
self._sweep_indirect_references(self._root)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_writer.py", line 960, in _sweep_indirect_references
data = self._resolve_indirect_object(data)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_writer.py", line 1005, in _resolve_indirect_object
real_obj = data.pdf.get_object(data)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_reader.py", line 1187, in get_object
retval = self._encryption.decrypt_object(
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_encryption.py", line 747, in decrypt_object
return cf.decrypt_object(obj)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_encryption.py", line 185, in decrypt_object
obj[dictkey] = self.decrypt_object(value)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_encryption.py", line 179, in decrypt_object
data = self.strCrypt.decrypt(obj.original_bytes)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_encryption.py", line 87, in decrypt
d = aes.decrypt(data)
File "C:\Users\Jake\Anaconda3\lib\site-packages\Crypto\Cipher\_mode_cbc.py", line 246, in decrypt
raise ValueError("Data must be padded to %d byte boundary in CBC mode" % self.block_size)
ValueError: Data must be padded to 16 byte boundary in CBC mode
[Finished in 393ms]
I wrote a basic program from a YouTube video and tried to run it, but I got the error that PyCryptodome was a dependent for PyPDF2. After installing that, I am getting an error about the data length for encryption when writing the pdf. Googling that error lead me to this solution. I am a bit of a novice, and I don't really understand why any kind of encryption is being applied in the first place, other than what I assume is necessary for the pdf reader/writer to operate, so I don't know where I would need to apply that solution in this code.
After writing up this question, I was lead to this solution, which I tried to run the code below, I received the same error.
from PyPDF2 import PdfFileMerger, PdfFileReader
import glob
merger = PdfFileMerger()
files = glob.glob('C:/Users/Jake/Documents/UPLOAD/test_merge/*.pdf')
for filename in files:
with open(filename, 'rb') as source:
tmp = PdfFileReader(source)
merger.append(tmp)
merger.write('Result.pdf')
ERROR
Traceback (most recent call last):
File "C:\Users\Jake\Documents\Work Projects\Python\Contract Merger\Merger .03.py", line 13, in <module>
merger.write('Result.pdf')
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_merger.py", line 312, in write
my_file, ret_fileobj = self.output.write(fileobj)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_writer.py", line 838, in write
self.write_stream(stream)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_writer.py", line 811, in write_stream
self._sweep_indirect_references(self._root)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_writer.py", line 960, in _sweep_indirect_references
data = self._resolve_indirect_object(data)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_writer.py", line 1005, in _resolve_indirect_object
real_obj = data.pdf.get_object(data)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_reader.py", line 1187, in get_object
retval = self._encryption.decrypt_object(
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_encryption.py", line 747, in decrypt_object
return cf.decrypt_object(obj)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_encryption.py", line 185, in decrypt_object
obj[dictkey] = self.decrypt_object(value)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_encryption.py", line 179, in decrypt_object
data = self.strCrypt.decrypt(obj.original_bytes)
File "C:\Users\Jake\Anaconda3\lib\site-packages\PyPDF2\_encryption.py", line 87, in decrypt
d = aes.decrypt(data)
File "C:\Users\Jake\Anaconda3\lib\site-packages\Crypto\Cipher\_mode_cbc.py", line 246, in decrypt
raise ValueError("Data must be padded to %d byte boundary in CBC mode" % self.block_size)
ValueError: Data must be padded to 16 byte boundary in CBC mode
[Finished in 268ms]
My thinking is that something else has gone wrong, but I am at a loss at to what that could be.
What have I done wrong with this build to get this error, and how can I correct it?

Turns out this is an issue with PyPDF2. There is a 3-line fix that can be injected to correct the error if you attempt this before it is patched.

Error while converting table from spark pool to pandas in synapse notebook

I have a table in the spark pool which i am able to access in synapse. However i am not able to call it as pandas. Below is the error:
%%pyspark
rawdata = df.select("*").toPandas()
ArrowInvalid : Casting from timestamp[us, tz=Etc/UTC] to timestamp[ns] would result in out of bounds timestamp: 33460473600000000
Traceback (most recent call last):
File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py", line 2132, in toPandas
pdf = table.to_pandas()
File "pyarrow/array.pxi", line 715, in pyarrow.lib._PandasConvertible.to_pandas
File "pyarrow/table.pxi", line 1565, in pyarrow.lib.Table._to_pandas
File "/home/trusted-service-user/cluster-env/env/lib/python3.6/site-packages/pyarrow/pandas_compat.py", line 779, in table_to_blockmanager
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
File "/home/trusted-service-user/cluster-env/env/lib/python3.6/site-packages/pyarrow/pandas_compat.py", line 1115, in _table_to_blocks
list(extension_columns.keys()))
File "pyarrow/table.pxi", line 1028, in pyarrow.lib.table_to_blocks
File "pyarrow/error.pxi", line 84, in pyarrow.lib.check_status
pyarrow.lib.ArrowInvalid: Casting from timestamp[us, tz=Etc/UTC] to timestamp[ns] would result in out of bounds timestamp: 33460473600000000

Reading keys from an .npz file with multiple workers in pytorch dataloader?

I have an .npz file where I have stored a dictionary. The dictionary has some keys and the values are numpy arrays. I want to read the dictionary in my getitem() method of the dataloader. When I set the dataloader num_workers to 1, everything runs fine. But when I increase the num workers, it throws the following error when reading the data from that npz file:
Traceback (most recent call last):
File "scripts/train.py", line 235, in <module>
train(args)
File "scripts/train.py", line 186, in train
solver(args.epoch, args.verbose)
File "/local-scratch/codebase/cap/lib/solver.py", line 174, in __call__
self._feed(self.dataloader["train"], "train", epoch_id)
File "/local-scratch/codebase/cap/lib/solver.py", line 366, in _feed
for data_dict in dataloader:
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 819, in __next__
return self._process_data(data)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 846, in _process_data
data.reraise()
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/_utils.py", line 369, in reraise
raise self.exc_type(msg)
zipfile.BadZipFile: Caught BadZipFile in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
data = fetcher.fetch(index)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/local-scratch/codebase/cap/lib/dataset.py", line 947, in __getitem__
other_bbox_feat = self.box_features['{}-{}_{}.{}'.format(scene_id, target_object_id, ann_id, object_id)]
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/numpy/lib/npyio.py", line 255, in __getitem__
pickle_kwargs=self.pickle_kwargs)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/numpy/lib/format.py", line 763, in read_array
data = _read_bytes(fp, read_size, "array data")
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/numpy/lib/format.py", line 892, in _read_bytes
r = fp.read(size - len(data))
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/zipfile.py", line 872, in read
data = self._read1(n)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/zipfile.py", line 962, in _read1
self._update_crc(data)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/zipfile.py", line 890, in _update_crc
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
zipfile.BadZipFile: Bad CRC-32 for file 'scene0519_00-13_1.0.npy'
As far as I know, pytorch dataloader uses multiprocessing to for data loading. Perhaps the issue is with multiprocessing and .npz files. I really appreciate any help.

compute() in dask not working

I am trying a simple parallel computation in Dask.
This is my code.
import time
import dask as dask
import dask.distributed as distributed
import dask.dataframe as dd
import dask.delayed as delayed
from dask.distributed import Client,progress
client = Client('localhost:8786')
df = dd.read_csv('file.csv')
ddf = df.groupby(['col1'])[['col2']].sum()
ddf = ddf.compute()
print ddf
It seems fine from the documentation but on running I am getting this :
Traceback (most recent call last):
File "dask_prg1.py", line 17, in <module>
ddf = ddf.compute()
File "/usr/local/lib/python2.7/site-packages/dask/base.py", line 156, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/usr/local/lib/python2.7/site-packages/dask/base.py", line 402, in compute
results = schedule(dsk, keys, **kwargs)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 2159, in get
direct=direct)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1562, in gather
asynchronous=asynchronous)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 652, in sync
return sync(self.loop, func, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 275, in sync
six.reraise(*error[0])
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 260, in f
result[0] = yield make_coro()
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1439, in _gather
traceback)
File "/usr/local/lib/python2.7/site-packages/dask/bytes/core.py", line 122, in read_block_from_file
with lazy_file as f:
File "/usr/local/lib/python2.7/site-packages/dask/bytes/core.py", line 166, in __enter__
f = SeekableFile(self.fs.open(self.path, mode=mode))
File "/usr/local/lib/python2.7/site-packages/dask/bytes/local.py", line 58, in open
return open(self._normalize_path(path), mode=mode)
IOError: [Errno 2] No such file or directory: 'file.csv'
I am not understanding what is wrong.Kindly help me with this .Thank you in advance .

You may wish to pass the absolute file path to read_csv. The reason is, that you are giving the work of opening and reading the file to a dask worker, and you might not have started that worked with the same working directory as your script/session.

Kazoo package using Jython

Kazoo's fairly working under the Python, but the project which i'm working on requires to use it under the Jython.
Here is the issue:
>>> from kazoo.client import KazooClient
>>> zk = KazooClient('127.0.0.1')
>>> zk.start()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\jython2.7.0\Lib\site-packages\kazoo\client.py", line 541, in start
event = self.start_async()
File "C:\jython2.7.0\Lib\site-packages\kazoo\client.py", line 576, in start_async
self._connection.start()
File "C:\jython2.7.0\Lib\site-packages\kazoo\protocol\connection.py", line 170, in start
rw_sockets = self.handler.create_socket_pair()
File "C:\jython2.7.0\Lib\site-packages\kazoo\handlers\threading.py", line 165, in create_socket_pair
return utils.create_socket_pair(socket)
File "C:\jython2.7.0\Lib\site-packages\kazoo\handlers\utils.py", line 148, in create_socket_pair
temp_srv_sock.bind(('', port))
File "C:\jython2.7.0\Lib\_socket.py", line 1367, in meth
return getattr(self._sock,name)(*args)
File "C:\jython2.7.0\Lib\_socket.py", line 812, in bind
self.bind_addr = _get_jsockaddr(address, self.family, self.type, self.proto, AI_PASSIVE)
File "C:\jython2.7.0\Lib\_socket.py", line 1565, in _get_jsockaddr
addr = _get_jsockaddr2(address_object, family, sock_type, proto, flags)
File "C:\jython2.7.0\Lib\_socket.py", line 1594, in _get_jsockaddr2
hostname = {AF_INET: INADDR_ANY, AF_INET6: IN6ADDR_ANY_INIT}[family]
KeyError: 0
How i'd already said - there is no this kind issue using the python.
I'm pretty sure that it is connected with the Jython-version of the _socket.py file, but don't know the workaround.
What can you recommend?

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

backtrader #IndexError: list index out of range problem - pandas

Your data is all one really long line, at least when I pasted it into geany. Are there any line endings? Could just be a problem with my browser. This might be your problem.

Related

Data Length Error when Merging PDFs with PyPDF2

Error while converting table from spark pool to pandas in synapse notebook

Reading keys from an .npz file with multiple workers in pytorch dataloader?

compute() in dask not working

Kazoo package using Jython

Categories

Resources