TypeError: Wrong number or type of arguments for overloaded function 'new_Date' - pandas

I am new to python. I am getting an error when running below code. The issue seems to be with date. can someone help me to correct i please. I have tried changing the date format in the excel but it does not solve the issue. The excel have a list of several bonds. I want to generate the coupon dates of the different bonds
BondData = pd.read_excel (r'C:\Users\Avishen\Desktop\Python\BONDDATA.xlsx')
Data = pd.DataFrame(BondData)
def scheduledates():
tenor = ql.Period(ql.Semiannual)
day_count = ql.Thirty360
calendar = ql.UnitedStates()
businessConvention = ql.Unadjusted
dateGeneration = ql.DateGeneration.Backward
monthEnd = False
# Dates in Bond Period
return ql.Schedule (issueDate, maturityDate, tenor, calendar, businessConvention,
businessConvention , dateGeneration, monthEnd)
new_df["Dates"]= Data.apply(lambda x: scheduledates(),axis = 1)
new_df["ISIN"] = Data.ISIN
new_df
Error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-4-877415e9cf83> in <module>
21 businessConvention , dateGeneration, monthEnd)
22
---> 23 new_df["Dates"]= Data.apply(lambda x: scheduledates(),axis = 1)
24 new_df["ISIN"] = Data.ISIN
25 new_df
~\anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7546 kwds=kwds,
7547 )
-> 7548 return op.get_result()
7549
7550 def applymap(self, func) -> "DataFrame":
~\anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
178 return self.apply_raw()
179
--> 180 return self.apply_standard()
181
182 def apply_empty_result(self):
~\anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
269
270 def apply_standard(self):
--> 271 results, res_index = self.apply_series_generator()
272
273 # wrap results
~\anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
298 for i, v in enumerate(series_gen):
299 # ignore SettingWithCopy here in case the user mutates
--> 300 results[i] = self.f(v)
301 if isinstance(results[i], ABCSeries):
302 # If we have a view on v, we need to make a copy because
<ipython-input-4-877415e9cf83> in <lambda>(x)
21 businessConvention , dateGeneration, monthEnd)
22
---> 23 new_df["Dates"]= Data.apply(lambda x: scheduledates(),axis = 1)
24 new_df["ISIN"] = Data.ISIN
25 new_df
<ipython-input-4-877415e9cf83> in scheduledates()
8
9 def scheduledates():
---> 10 issueDate = ql.Date(Data.issuedate)
11 maturityDate = ql.Date(Data.maturitydate)
12 tenor = ql.Period(ql.Semiannual)
~\anaconda3\lib\site-packages\QuantLib\QuantLib.py in __init__(self, *args)
425
426 def __init__(self, *args):
--> 427 _QuantLib.Date_swiginit(self, _QuantLib.new_Date(*args))
428
429 def weekdayNumber(self):
TypeError: Wrong number or type of arguments for overloaded function 'new_Date'.
Possible C/C++ prototypes are:
Date::Date()
Date::Date(Day,Month,Year)
Date::Date(Day,Month,Year,Hour,Minute,Second,Millisecond,Microsecond)
Date::Date(Day,Month,Year,Hour,Minute,Second,Millisecond)
Date::Date(Day,Month,Year,Hour,Minute,Second)
Date::Date(BigInteger)
Date::Date(std::string const &,std::string)
---------------------------------------------------------------------------
Data = pd.DataFrame(BondData)
Fields from Bond Data
ISIN
issuedate
maturitydate
coupon
Tradeyield
Bond_Price
MarketPrice
Nominal_Amount

From the traceback, the problem is the line:
issueDate = ql.Date(Data.issuedate)
(which for some reason is not in the code you pasted). Coming from Excel, issuedate should be an integer and thus compatible with the ql.Date constructor, but it's possible that pandas is reading it as a string or some other type. You should examine the data frame and check the type of the column. If it's not what you expect, you'll have to figure out if there are data in that column that pandas can't interpret as integers, and either clean them up of force the conversion somehow before passing them to ql.Date.

Related

Pandas Error Handling - "day is out of range for month"

I wonder how do I handle and spot the data which has wrong format in pandas.
I tried to convert string into pd data form however,
somewhere in the middle the format is not in line with the format that was expected (I assume), an error message popped.
I want to print what the data is
and I want to fix it into the right format that I could keep moving on
how do I handle this issue?
My code is as below.
def date_format(df):
target = pd.to_datetime(df['Issue Date'])
return target
The error message is
--------------------------------------------------------------------------- TypeError Traceback (most recent call
last)
~/anaconda3/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py
in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors,
require_iso8601, allow_object) 2053 try:
-> 2054 values, tz_parsed = conversion.datetime_to_datetime64(data) 2055 # If
tzaware, these values represent unix timestamps, so we
pandas/_libs/tslibs/conversion.pyx in
pandas._libs.tslibs.conversion.datetime_to_datetime64()
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call
last) in
----> 1 seg0=generateDb(csv,2017,1000000,0)
in generateDb(csv, year, segment,
index)
2
3 targetSeg0=segment_df(csv,segment,index)
----> 4 targetSeg0['Issue Date']=date_format(targetSeg0)
5 targetSeg0=remove_etc(targetSeg0)
6 filter_date(targetSeg0,year)
in date_format(df)
1 def date_format(df):
----> 2 target = pd.to_datetime(df['Issue Date'])
3 return target
~/anaconda3/lib/python3.8/site-packages/pandas/core/tools/datetimes.py
in to_datetime(arg, errors, dayfirst, yearfirst, utc, format, exact,
unit, infer_datetime_format, origin, cache)
797 result = result.tz_localize(tz)
798 elif isinstance(arg, ABCSeries):
--> 799 cache_array = _maybe_cache(arg, format, cache, convert_listlike)
800 if not cache_array.empty:
801 result = arg.map(cache_array)
~/anaconda3/lib/python3.8/site-packages/pandas/core/tools/datetimes.py
in _maybe_cache(arg, format, cache, convert_listlike)
168 unique_dates = unique(arg)
169 if len(unique_dates) < len(arg):
--> 170 cache_dates = convert_listlike(unique_dates, format)
171 cache_array = Series(cache_dates, index=unique_dates)
172 return cache_array
~/anaconda3/lib/python3.8/site-packages/pandas/core/tools/datetimes.py
in _convert_listlike_datetimes(arg, format, name, tz, unit, errors,
infer_datetime_format, dayfirst, yearfirst, exact)
457 assert format is None or infer_datetime_format
458 utc = tz == "utc"
--> 459 result, tz_parsed = objects_to_datetime64ns(
460 arg,
461 dayfirst=dayfirst,
~/anaconda3/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py
in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors,
require_iso8601, allow_object) 2057 return
values.view("i8"), tz_parsed 2058 except (ValueError,
TypeError):
-> 2059 raise e 2060 2061 if tz_parsed is not None:
~/anaconda3/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py
in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors,
require_iso8601, allow_object) 2042 2043 try:
-> 2044 result, tz_parsed = tslib.array_to_datetime( 2045 data, 2046 errors=errors,
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas/_libs/tslib.pyx in
pandas._libs.tslib.array_to_datetime_object()
pandas/_libs/tslib.pyx in
pandas._libs.tslib.array_to_datetime_object()
pandas/_libs/tslibs/parsing.pyx in
pandas._libs.tslibs.parsing.parse_datetime_string()
pandas/_libs/tslibs/parsing.pyx in
pandas._libs.tslibs.parsing._parse_delimited_date()
datetime.pxd in cpython.datetime.datetime_new()
ValueError: day is out of range for month
I'm eventually gonna filter the data as below.
def filter_date(df,year):
booldf = df['Issue Date'] >= pd.to_datetime(f"{year}-01-01")
booldf2 = df['Issue Date'] <= pd.to_datetime(f"{year}-12-31")
return df[
(df['Issue Date'] >= pd.to_datetime(f"{year}-01-01")) & (df['Issue Date'] <= pd.to_datetime(f"{year}-12-31"))
]
Sample data form is as below.
The Error occurs because of wrong data like below. (51th month doesn't exist).. I guess this is where the error came from.
Based on your comments I assume this will help:
Using as example (99/99/9999 is the incorrect data):
df = pd.DataFrame(["09/26/2016", "06/14/2017", "09/05/2018", "06/16/2017", "05/09/2018", "99/99/9999"], columns=['Issue Date']).
You mean something like this:
pd.to_datetime(df["Issue Date"], errors="coerce")
output:
see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html
if you want to you drop the rows with NaTs next with:
df = df.dropna(axis=0, subset=['Issue Date'])

Numba / Numpy - Understanding Error Message

I'm experimenting with Numba to try and speed up a union-find algorithm I'm working on. Here's some example code. When I experiment with some sample data I cannot understand the type complaint that Numba appears to be raising.
from numba import jit
import numpy as np
indices = np.arange(8806806, dtype=np.int64)
sizes = np.ones(8806806, dtype=np.int64)
connected_components = 8806806
#jit(npython=True)
def root(p: int) -> int:
while p != indices[p]:
indices[p] = indices[indices[p]]
p = indices[p]
return p
#jit(npython=True)
def connected( p: int, q: int) -> bool:
return root(p) == root(q)
#jit(npython=True)
def union( p: int, q: int) -> None:
root1 = root(p)
root2 = root(q)
if root1 == root2:
return
if (sizes[root1] < sizes[root2]):
indices[root1] = root2
sizes[root2] += sizes[root1]
else:
indices[root2] = root1
sizes[root1] += sizes[root2]
connected_components -= 1
#jit(nopython=True)
def process_values(arr):
for row in arr:
typed_arr = row.astype('int64')
for first, second in zip(arr, arr[1:]):
union(first, second)
process_values(
np.array(
[np.array([8018361, 4645960]),
np.array([1137555, 7763897]),
np.array([7532943, 2248813]),
np.array([5352737, 71466, 3590473, 5352738, 2712260])], dtype='object'))
I cannot understand this error:
TypingError Traceback (most recent call last)
<ipython-input-45-62735e65f581> in <module>
44 np.array([1137555, 7763897]),
45 np.array([7532943, 2248813]),
---> 46 np.array([5352737, 71466, 3590473, 5352738, 2712260])], dtype='object'))
/opt/conda/lib/python3.7/site-packages/numba/core/dispatcher.py in _compile_for_args(self, *args, **kws)
399 e.patch_message(msg)
400
--> 401 error_rewrite(e, 'typing')
402 except errors.UnsupportedError as e:
403 # Something unsupported is present in the user code, add help info
/opt/conda/lib/python3.7/site-packages/numba/core/dispatcher.py in error_rewrite(e, issue_type)
342 raise e
343 else:
--> 344 reraise(type(e), e, None)
345
346 argtypes = []
/opt/conda/lib/python3.7/site-packages/numba/core/utils.py in reraise(tp, value, tb)
78 value = tp()
79 if value.__traceback__ is not tb:
---> 80 raise value.with_traceback(tb)
81 raise value
82
TypingError: Failed in nopython mode pipeline (step: nopython frontend)
non-precise type array(pyobject, 1d, C)
[1] During: typing of argument at <ipython-input-45-62735e65f581> (36)
File "<ipython-input-45-62735e65f581>", line 36:
def process_values(arr):
for row in arr:
^
Does this have anything to do with process_values taking an array of irregularly shaped arrays? Any pointers? Thanks!
the problem is that Numba does not accept arrays of dtype 'object'. You seem to be placing arrays inside arrays, you will have to use lists inside lists. Look for the typed.List class in Numba, https://numba.pydata.org/numba-doc/dev/reference/pysupported.html#typed-list
Alternatively, you can use awkward arrays: https://github.com/scikit-hep/awkward-1.0

TypeError: "Set type is unordered" in OSMnx isochrones example

Running the OSMnx isochrones example, get a TypeError: "Set type is unordered" on the last cell.
Any idea what's going wrong?
OSMnx 0.15.1 on Python 3.8.5, Pandas 1.1.1, GeoPandas 0.8.1.
It works as expected with Pandas 1.0.5, but fails with Pandas 1.1 or 1.1.1
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
in
26 return isochrone_polys
27
---> 28 isochrone_polys = make_iso_polys(G, edge_buff=25, node_buff=0, infill=True)
29 fig, ax = ox.plot_graph(G, show=False, close=False, edge_color='#999999', edge_alpha=0.2, node_size=0)
30 for polygon, fc in zip(isochrone_polys, iso_colors):
in make_iso_polys(G, edge_buff, node_buff, infill)
5
6 node_points = [Point((data['x'], data['y'])) for node, data in subgraph.nodes(data=True)]
----> 7 nodes_gdf = gpd.GeoDataFrame({'id': subgraph.nodes()}, geometry=node_points)
8 nodes_gdf = nodes_gdf.set_index('id')
9
~/miniconda3/envs/osmnx-examples/lib/python3.8/site-packages/geopandas/geodataframe.py in __init__(self, *args, **kwargs)
87 crs = kwargs.pop("crs", None)
88 geometry = kwargs.pop("geometry", None)
---> 89 super(GeoDataFrame, self).__init__(*args, **kwargs)
90
91 # need to set this before calling self['geometry'], because
~/miniconda3/envs/osmnx-examples/lib/python3.8/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
466
467 elif isinstance(data, dict):
--> 468 mgr = init_dict(data, index, columns, dtype=dtype)
469 elif isinstance(data, ma.MaskedArray):
470 import numpy.ma.mrecords as mrecords
~/miniconda3/envs/osmnx-examples/lib/python3.8/site-packages/pandas/core/internals/construction.py in init_dict(data, index, columns, dtype)
281 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
282 ]
--> 283 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
284
285
~/miniconda3/envs/osmnx-examples/lib/python3.8/site-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity)
81
82 # don't force copy because getting jammed in an ndarray anyway
---> 83 arrays = _homogenize(arrays, index, dtype)
84
85 columns = ensure_index(columns)
~/miniconda3/envs/osmnx-examples/lib/python3.8/site-packages/pandas/core/internals/construction.py in _homogenize(data, index, dtype)
349 val = dict(val)
350 val = lib.fast_multiget(val, oindex._values, default=np.nan)
--> 351 val = sanitize_array(
352 val, index, dtype=dtype, copy=False, raise_cast_failure=False
353 )
~/miniconda3/envs/osmnx-examples/lib/python3.8/site-packages/pandas/core/construction.py in sanitize_array(data, index, dtype, copy, raise_cast_failure)
450 subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
451 elif isinstance(data, abc.Set):
--> 452 raise TypeError("Set type is unordered")
453 elif lib.is_scalar(data) and index is not None and dtype is not None:
454 data = maybe_cast_to_datetime(data, dtype)
TypeError: Set type is unordered
This is an issue in the example. It it initializes a data frame with subgraph.nodes()
nodes_gdf = gpd.GeoDataFrame({'id': subgraph.nodes()}, geometry=node_points)
subgraph.nodes() is a NodeView, which behaves both like a dictionary and a set. These are unordered types, but Pandas needs an ordered collection such as a numpy array or list. Pandas 1.1 introduced a type check to catch this in issue 32582.
A workaround is to explicitly convert the NodeView to a list:
nodes_gdf = gpd.GeoDataFrame({'id': list(subgraph.nodes())}, geometry=node_points)
I submitted a bug and a PR, which has already been accepted, so this is no longer an issue.

How to write seed_features that include a conditional statement

I'm trying to write a seed feature that produces reward if place == 1 else 0.
place and reward are both ft.variable_types.Numeric:
Entity: results
Variables:
id (dtype: index)
place (dtype: numeric)
reward (dtype: numeric)
I've tried the following alternatives with no luck:
Alternative 1
roi = (ft.Feature(es['results']['reward'])
if (ft.Feature(es['results']['place']) == 1)
else 0).rename('roi')
produces AssertionError: Column "roi" missing frome dataframe
when generating the features.
Alternative 2
roi = ((ft.Feature(es['results']['place']) == 1) *
ft.Feature(es['results']['reward'])).rename('roi')
produces AssertionError: Provided inputs don't match input type requirements when assigning the seed feature.
Alternative 2 should work since in Python:
>>> True * 3.14
3.14
>>> False * 3.14
0.0
The full stack trace:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-211-94dd07d98076> in <module>()
23
24
---> 25 roi = ((ft.Feature(es['results']['place']) == 1) * ft.Feature(es['results']['reward'])).rename('roi')
~/dev/venv/lib/python3.6/site-packages/featuretools/feature_base/feature_base.py in __mul__(self, other)
287 def __mul__(self, other):
288 """Multiply by other"""
--> 289 return self._handle_binary_comparision(other, primitives.MultiplyNumeric, primitives.MultiplyNumericScalar)
290
291 def __rmul__(self, other):
~/dev/venv/lib/python3.6/site-packages/featuretools/feature_base/feature_base.py in _handle_binary_comparision(self, other, Primitive, PrimitiveScalar)
230 def _handle_binary_comparision(self, other, Primitive, PrimitiveScalar):
231 if isinstance(other, FeatureBase):
--> 232 return Feature([self, other], primitive=Primitive)
233
234 return Feature([self], primitive=PrimitiveScalar(other))
~/dev/venv/lib/python3.6/site-packages/featuretools/feature_base/feature_base.py in __new__(self, base, entity, groupby, parent_entity, primitive, use_previous, where)
755 primitive=primitive,
756 groupby=groupby)
--> 757 return TransformFeature(base, primitive=primitive)
758
759 raise Exception("Unrecognized feature initialization")
~/dev/venv/lib/python3.6/site-packages/featuretools/feature_base/feature_base.py in __init__(self, base_features, primitive, name)
660 relationship_path=RelationshipPath([]),
661 primitive=primitive,
--> 662 name=name)
663
664 #classmethod
~/dev/venv/lib/python3.6/site-packages/featuretools/feature_base/feature_base.py in __init__(self, entity, base_features, relationship_path, primitive, name, names)
56 self._names = names
57
---> 58 assert self._check_input_types(), ("Provided inputs don't match input "
59 "type requirements")
60
AssertionError: Provided inputs don't match input type requirements
This should work on featuretools v0.11.0. Here is an example using a demo dataset. Both unit_price and total are numeric.
import featuretools as ft
es = ft.demo.load_retail(nrows=100)
es['order_products']
Entity: order_products
Variables:
...
unit_price (dtype: numeric)
total (dtype: numeric)
...
I create the seed feature.
unit_price = ft.Feature(es['order_products']['unit_price'])
total = ft.Feature(es['order_products']['total'])
seed = ((total == 1) * unit_price).rename('seed')
Then, calculate the feature matrix.
fm, fd = ft.dfs(target_entity='customers', entityset=es, seed_features=[seed])
fm.filter(regex='seed').columns.tolist()[:5]
['SUM(order_products.seed)',
'STD(order_products.seed)',
'MAX(order_products.seed)',
'SKEW(order_products.seed)',
'MIN(order_products.seed)']
In your case, this would be the seed feature.
place = ft.Feature(es['results']['place'])
reward = ft.Feature(es['results']['reward'])
roi = ((reward == 1) * place).rename('roi')
Let me know if that helps.

trimming column named is generating ValueError

I have a table which I run through a function to trim its columns down to length 128 (I know it's really long, there isn't anything I can do about that) characters so it can use to_sql to create a database from it.
def truncate_column_names(df, length):
rename = {}
for col in df.columns:
if len(col) > length:
new_col = col[:length-3]+"..."
rename[col] = new_col
result = df.rename(columns=rename)
return result
This function works fine and I get a table out just fine but the problem comes when I tried to save the file I get the error
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
The method I have doing some housekeeping before saving to a file included dropping duplicates and that is where this error is being spit out. I tested this by saving the original dataFrame and then just loading it, running the truncate function, and then trying drop_duplicates on the result and I get the same error.
The headers for the file before I try truncating looks like this:
http://pastebin.com/WXmvwHDg
I trimmed the file down to 1 record and still have the problem.
This was a result of the truncating causing some columns to have non-unique names.
To confirm this was an issue I did a short test:
In [113]: df = pd.DataFrame(columns=["ab", "ac", "ad"])
In [114]: df
Out[114]:
Empty DataFrame
Columns: [ab, ac, ad]
Index: []
In [115]: df.drop_duplicates()
Out[115]:
Empty DataFrame
Columns: [ab, ac, ad]
Index: []
In [116]: df.columns
Out[116]: Index([u'ab', u'ac', u'ad'], dtype='object')
In [117]: df.columns = df.columns.str[:1]
In [118]: df
Out[118]:
Empty DataFrame
Columns: [a, a, a]
Index: []
In [119]: df.drop_duplicates()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-119-daf275b6788b> in <module>()
----> 1 df.drop_duplicates()
C:\Miniconda\lib\site-packages\pandas\util\decorators.pyc in wrapper(*args, **kw
args)
86 else:
87 kwargs[new_arg_name] = new_arg_value
---> 88 return func(*args, **kwargs)
89 return wrapper
90 return _deprecate_kwarg
C:\Miniconda\lib\site-packages\pandas\core\frame.pyc in drop_duplicates(self, su
bset, take_last, inplace)
2826 deduplicated : DataFrame
2827 """
-> 2828 duplicated = self.duplicated(subset, take_last=take_last)
2829
2830 if inplace:
C:\Miniconda\lib\site-packages\pandas\util\decorators.pyc in wrapper(*args, **kw
args)
86 else:
87 kwargs[new_arg_name] = new_arg_value
---> 88 return func(*args, **kwargs)
89 return wrapper
90 return _deprecate_kwarg
C:\Miniconda\lib\site-packages\pandas\core\frame.pyc in duplicated(self, subset,
take_last)
2871
2872 vals = (self[col].values for col in subset)
-> 2873 labels, shape = map(list, zip( * map(f, vals)))
2874
2875 ids = get_group_index(labels, shape, sort=False, xnull=False)
C:\Miniconda\lib\site-packages\pandas\core\frame.pyc in f(vals)
2860
2861 def f(vals):
-> 2862 labels, shape = factorize(vals, size_hint=min(len(self), _SI
ZE_HINT_LIMIT))
2863 return labels.astype('i8',copy=False), len(shape)
2864
C:\Miniconda\lib\site-packages\pandas\core\algorithms.pyc in factorize(values, s
ort, order, na_sentinel, size_hint)
133 table = hash_klass(size_hint or len(vals))
134 uniques = vec_klass()
--> 135 labels = table.get_labels(vals, uniques, 0, na_sentinel)
136
137 labels = com._ensure_platform_int(labels)
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_labels (pandas\ha
shtable.c:13946)()
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
and got the same result. using df.columns.unique() after the truncation i had ~200 duplicate columns after the truncation