apply my created function to a data frame - pandas

I have the following function to calculate the black scholes model,
where I paste the necessary data from S0, K , K , T , r
and market_price in the function manually.
I would like to apply this same function to a pandas data frame
There I have the values ​​needed to perform the calculation
Data frame example
data = {'Name':['BOVAE115', 'BOVAE119', 'BBDCE251', 'BBDCE246'],
'Valor':[110.050003, 110.050003, 19.500000, 19.500000],
'Strike Value':[15.00, 19.00, 24.67, 25.19],
'Temp':[0.119048, 0.119048, 0.119048, 0.119048],
'Taxa':[11.65, 11.65, 11.65, 11.65],
'Market Price':[0.391968, 0.391968, 0.391968, 0.391968],
'Order':['c','c','c','c']
}
# Create DataFrame
df = pd.DataFrame(data)
df
How do I apply the created function to this list of values
Function
See the code
S0 = df['Valor']
K = df['Strike Value']
T = df['Temp']
r = df['Taxa']
market_price = df['Price']
flag = df['Order']
from py_vollib.black_scholes import black_scholes as bs
from py_vollib.black_scholes.greeks.analytical import vega
def implied_vol(S0, K, T, r, market_price, flag='c', tol=0.00001):
#"""Calculating the implied volatility of an European option
# S0: stock price
# K: strike price
# T: time to maturity
# r: risk-free rate
# market_price: option price in market
#"""
max_iter = 500 #max no. of iterations
vol_old = 0.3 #initial guess
for k in range(max_iter):
bs_price = bs(flag, S0, K, T, r, vol_old)
Cprime = vega(flag, S0, K, T, r, vol_old)*100
C = bs_price - market_price
vol_new = vol_old - C/Cprime
new_bs_price = bs(flag, S0, K, T, r, vol_new)
if (abs(vol_old-vol_new) < tol or abs(new_bs_price-market_price) < tol):
break
vol_old = vol_new
implied_vol = vol_new
return implied_vol
S0 = 14.73
K = 15.04
T = 20/252
r = 0.1165
market_price = 0.41
print(implied_vol(S0, K, T, r, market_price)*100)
I wanted to return the implied vol value in a data frame column
How can I apply this function in my dataframe

Related

SMOTE adds many rows with 0 values to dataframe

Please help me, i cannot understand why X_synthetic_df returns hundreds of rows with 0 values. All of the rows have normal values fine until row 1745. From that row, all the other row values contain nothing but zeros
def nearest_neighbors(nominal_columns, numeric_columns, df, row, k):
def distance(row1, row2):
distance = 0
for col in nominal_columns:
if row1[col] != row2[col]:
distance += 1
for col in numeric_columns:
distance += (row1[col] - row2[col])**2
return distance**0.5
distances = []
for i in range(len(df)):
r = df.iloc[i]
if r.equals(row):
continue
d = distance(row, r)
if(d!=0):
distances.append((d, i))
distances.sort()
nearest = [i for d, i in distances[:k]]
return nearest
def smotenc(X, y, nominal_cols, numeric_cols, k=5, seed=None):
minority_class = y[y==1]
majority_class = y[y==0]
minority_samples = X[y == 1]
minority_target = y[y == 1]
n_synthetic_samples = len(majority_class)-len(minority_class)
synthetic_samples = np.zeros((n_synthetic_samples, X.shape[1]))
if seed is not None:
np.random.seed(seed)
for i in range(len(minority_samples)):
nn = nearest_neighbors(nominal_cols, numeric_cols, minority_samples, minority_samples.iloc[i], k=k)
for j in range(min(k, n_synthetic_samples - i*k)):
nn_idx = int(np.random.choice(a=nn))
diff = minority_samples.iloc[(nn_idx)] - minority_samples.iloc[i]
print(diff)
if (diff == 0).all():
continue
synthetic_sample = minority_samples.iloc[i] + np.random.rand() * diff
synthetic_samples[(i*k)+j, :] = synthetic_sample
X_resampled = pd.concat([X[y == 1], pd.DataFrame(synthetic_samples,columns=X.columns)], axis=0)
y_resampled = np.concatenate((y[y == 1], [1] * n_synthetic_samples))
return X_resampled, y_resampled
minority_features = df_nominal.columns.get_indexer(df_nominal.columns)
synthetic = smotenc(check_x.head(3000),check_y.head(3000),nominal_cols,numeric_cols,seed = None)
X_synthetic_df = synthetic[0]
X_synthetic_df = pd.DataFrame(X_synthetic_df,columns = X.columns)
I was a dataframe with n synthetic samples, where n is the difference between the majority samples and minority class samples

divide by zero encountered in true_divide error without having zeros in my data

this is my code and this is my data, and this is the output of the code. I've tried adding one the values on the x axes, thinking maybe values so little can be interpreted as zeros. I've no idea what true_divide could be, and I cannot explain this divide by zero error since there is not a single zero in my data, checked all of my 2500 data points. Hoping that some of you could provide some clarification. Thanks in advance.
import pandas as pd
import matplotlib.pyplot as plt
from iminuit import cost, Minuit
import numpy as np
frame = pd.read_excel('/Users/lorenzotecchia/Desktop/Analisi Laboratorio/Analisi dati/Quinta Esperienza/500Hz/F0000CH2.xlsx', 'F0000CH2')
data = pd.read_excel('/Users/lorenzotecchia/Desktop/Analisi Laboratorio/Analisi dati/Quinta Esperienza/500Hz/F0000CH1.xlsx', 'F0000CH1')
# tempi_500Hz = pd.DataFrame(frame,columns=['x'])
# Vout_500Hz = pd.DataFrame(frame,columns=['y'])
tempi_500Hz = pd.DataFrame(frame,columns=['x1'])
Vout_500Hz = pd.DataFrame(frame,columns=['y1'])
# Vin_500Hz = pd.DataFrame(data,columns=['y'])
def fit_esponenziale(x, α, β):
return α * (1 - np.exp(-x / β))
plt.xlabel('ω(Hz)')
plt.ylabel('Attenuazioni')
plt.title('Fit Parabolico')
plt.scatter(tempi_500Hz, Vout_500Hz)
least_squares = cost.LeastSquares(tempi_500Hz, Vout_500Hz, np.sqrt(Vout_500Hz), fit_esponenziale)
m = Minuit(least_squares, α=0, β=0)
m.migrad()
m.hesse()
plt.errorbar(tempi_500Hz, Vout_500Hz, fmt="o", label="data")
plt.plot(tempi_500Hz, fit_esponenziale(tempi_500Hz, *m.values), label="fit")
fit_info = [
f"$\\chi^2$ / $n_\\mathrm{{dof}}$ = {m.fval:.1f} / {len(tempi_500Hz) - m.nfit}",]
for p, v, e in zip(m.parameters, m.values, m.errors):
fit_info.append(f"{p} = ${v:.3f} \\pm {e:.3f}$")
plt.legend()
plt.show()
input
output and example of data
There is actually a way to fit this completely linear.
See e.g.here
import matplotlib.pyplot as plt
import numpy as np
from scipy.integrate import cumtrapz
def fit_exp(x, a, b, c):
return a * (1 - np.exp( -b * x) ) + c
nn = 170
xl = np.linspace( 0, 0.001, nn )
yl = fit_exp( xl, 15, 5300, -8.1 ) + np.random.normal( size=nn, scale=0.05 )
"""
with y = a( 1- exp(-bx) ) + c
we have Y = int y = -1/b y + d x + h ....try it out or see below
so we get a linear equation for b (actually 1/b ) to optimize
this goes as:
"""
Yl = cumtrapz( yl, xl, initial=0 )
ST = [xl, yl, np.ones( nn ) ]
S = np.transpose( ST )
eta = np.dot( ST, Yl )
A = np.dot( ST, S )
sol = np.linalg.solve( A, eta )
bFit = -1/sol[1]
print( bFit )
"""
now we can do a normal linear fit
"""
ST = [ fit_exp(xl, 1, bFit, 0), np.ones( nn ) ]
S = np.transpose( ST )
A = np.dot( ST, S )
eta = np.dot( ST, yl )
aFit, cFit = np.linalg.solve( A, eta )
print( aFit, cFit )
print(aFit + cFit, sol[0] ) ### consistency check
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1 )
ax.plot(xl, yl, marker ='+', ls='' )
## at best a sufficient fit, at worst a good start for a non-linear fit
ax.plot(xl, fit_exp( xl, aFit, bFit, cFit ) )
plt.show()
"""
a ( 1 - exp(-b x)) + c = a + c - a exp(-b x) = d - a exp( -b x )
int y = d x + a/b exp( -b x ) + g
= d x +a/b exp( -b x ) + a/b - a/b + c/b - c/b + g
= d x - 1/b ( a - a exp( -b x ) + c ) + c/b + a/b + g
= d x + k y + h
with k = -1/b and h = g + c/b + a/b.
d and h are fitted but not used, but as a+c = d we can check
for consistency
"""
Here is a working Minuit vs curve_fit example. I scaled the function such that the decay in the exponential is in the order of 1 (generally a good idea for non linear fits ). Eventually, both methods give very similar results.
Note:I leave it open whether the error makes sense like this or not. The starting values equal to zero was definitively a bad idea.
import pandas as pd
import matplotlib.pyplot as plt
from iminuit import cost, Minuit
from scipy.optimize import curve_fit
import numpy as np
def fit_exp(x, a, b, c):
return a * (1 - np.exp(- 1000 * b * x) ) + c
nn = 170
xl = np.linspace( 0, 0.001, nn )
yl = fit_exp( xl, 15, 5.3, -8.1 ) + np.random.normal( size=nn, scale=0.05 )
#######################
### Minuit
#######################
least_squares = cost.LeastSquares(xl, yl, np.sqrt( np.abs( yl ) ), fit_exp )
print(least_squares)
m = Minuit(least_squares, a=1, b=5, c=-7)
print( "grad: ")
print( m.migrad() ) ### needs to be called to get fit values
print( m.values )### gives slightly different output
print("Hesse:")
print( m.hesse() )
#######################
### curve_fit
#######################
opt, cov = curve_fit(
fit_exp, xl, yl, sigma=np.sqrt( np.abs( yl ) ),
absolute_sigma=True
)
print( " curve_fit: ")
print( opt )
print( " covariance ")
print( cov )
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1 )
ax.plot(xl, yl, marker ='+', ls='' )
ax.plot(xl, fit_exp(xl, *m.values), ls="--")
ax.plot(xl, fit_exp(xl, *opt), ls=":")
plt.show()

RGB to HSV in numpy

I'm trying to implement RGB to HSV conversion from opencv in pure numpy using formula from here:
def rgb2hsv_opencv(img_rgb):
img_hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)
return img_hsv
def rgb2hsv_np(img_rgb):
assert img_rgb.dtype == np.float32
height, width, c = img_rgb.shape
r, g, b = img_rgb[:,:,0], img_rgb[:,:,1], img_rgb[:,:,2]
t = np.min(img_rgb, axis=-1)
v = np.max(img_rgb, axis=-1)
s = (v - t) / (v + 1e-6)
s[v==0] = 0
# v==r
hr = 60 * (g - b) / (v - t + 1e-6)
# v==g
hg = 120 + 60 * (b - r) / (v - t + 1e-6)
# v==b
hb = 240 + 60 * (r - g) / (v - t + 1e-6)
h = np.zeros((height, width), np.float32)
h = h.flatten()
hr = hr.flatten()
hg = hg.flatten()
hb = hb.flatten()
h[(v==r).flatten()] = hr[(v==r).flatten()]
h[(v==g).flatten()] = hg[(v==g).flatten()]
h[(v==b).flatten()] = hb[(v==b).flatten()]
h[h<0] += 360
h = h.reshape((height, width))
img_hsv = np.stack([h, s, v], axis=-1)
return img_hsv
img_bgr = cv2.imread('00000.png')
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
img_rgb = img_rgb / 255.0
img_rgb = img_rgb.astype(np.float32)
img_hsv1 = rgb2hsv_np(img_rgb)
img_hsv2 = rgb2hsv_opencv(img_rgb)
print('max diff:', np.max(np.fabs(img_hsv1 - img_hsv2)))
print('min diff:', np.min(np.fabs(img_hsv1 - img_hsv2)))
print('mean diff:', np.mean(np.fabs(img_hsv1 - img_hsv2)))
But I get big diff:
max diff: 240.0
min diff: 0.0
mean diff: 0.18085355
Do I missing something?
Also maybe it's possible to write numpy code more efficient, for example without flatten?
Also I have hard time finding original C++ code for cvtColor function, as I understand it should be actually function cvCvtColor from C code, but I can't find actual source code with formula.
From the fact that the max difference is exactly 240, I'm pretty sure that what's happening is in the case when both or either of v==r, v==g are simultaneously true alongside v==b, which gets executed last.
If you change the order from:
h[(v==r).flatten()] = hr[(v==r).flatten()]
h[(v==g).flatten()] = hg[(v==g).flatten()]
h[(v==b).flatten()] = hb[(v==b).flatten()]
To:
h[(v==r).flatten()] = hr[(v==r).flatten()]
h[(v==b).flatten()] = hb[(v==b).flatten()]
h[(v==g).flatten()] = hg[(v==g).flatten()]
The max difference may start showing up as 120, because of that added 120 in that equation. So ideally, you would want to execute these three lines in the order b->g->r. The difference should be negligible then (still noticing a max difference of 0.01~, chalking it up to some round off somewhere).
h[(v==b).flatten()] = hb[(v==b).flatten()]
h[(v==g).flatten()] = hg[(v==g).flatten()]
h[(v==r).flatten()] = hr[(v==r).flatten()]

Numpy dot product with 3d array

I've got two arrays:
data of shape (2466, 2498, 9), where the dimensions are (asset, date, returns).
correlation_matrix of shape (2466, 2466) (with 0's on the diagonal)
I want to get the dot product that equates to the expected returns, which is the returns of each asset multiplied by the correlation_matrix. It should give a shape the same as data.
I've tried:
data.transpose([1, 2, 0]) # correlation_matrix
but this just hangs my PC (been going 10 minutes and counting).
I also tried:
np.einsum('ijk,lm->ijk', data, correlation_matrix)
but I'm less familiar with einsum, and this also hangs.
What am I doing wrong?
With your .transpose((1, 2, 0)) data, the correct form is:
"ijs,sk" # -> ijk
Since for a tensor A and B, we can write:
C_{ijk} = Σ_s A_{ijs} * B_{sk}
If you want to avoid transposing your data beforehand, you can just permute the indices:
"sij,sk" # -> ijk
To verify:
p, q, r = 2466, 2498, 9
a = np.random.randint(255, size=(p, q, r))
b = np.random.randint(255, size=(p, p))
c1 = a.transpose((1, 2, 0)) # b
c2 = np.einsum("sij,sk", a, b)
>>> np.all(c1 == c2)
True
The amount of multiplications needed to compute this for (p, q, r) shaped data is p * np.prod(c.shape) == p * (q * r * p) == p**2 * q * r. In your case, that is 136_716_549_192 multiplications. You also need approximately the same number of additions, so that gives us somewhere close to 270 billion operations. If you want more speed, you could consider using a GPU for your computations via cupy.
def with_np():
p, q, r = 2466, 2498, 9
a = np.random.randint(255, size=(p, q, r))
b = np.random.randint(255, size=(p, p))
c1 = a.transpose((1, 2, 0)) # b
c2 = np.einsum("sij,sk", a, b)
def with_cp():
p, q, r = 2466, 2498, 9
a = cp.random.randint(255, size=(p, q, r))
b = cp.random.randint(255, size=(p, p))
c1 = a.transpose((1, 2, 0)) # b
c2 = cp.einsum("sij,sk", a, b)
>>> timeit(with_np, number=1)
513.066
>>> timeit(with_cp, number=1)
0.197
That's a speedup of 2600, including memory allocation, initialization, and CPU/GPU copy times! (A more realistic benchmark would give an even larger speedup.)
There are different ways to do this product:
# as you already suggested:
data.transpose([1, 2, 0]) # correlation_matrix
# using einsum
np.einsum('ijk,il', data, correlation_matrix)
# using tensordot to explicitly specify the axes to sum over
np.tensordot(data, correlation_matrix, axes=(0,0))
All of them should give the same result. The timing for some small matrices was more or less the same for me. So your problem is the large amount of data, not an inefficient implementation.
A=np.arange(100*120*9).reshape((100, 120, 9))
B=np.arange(100**2).reshape((100,100))
timeit('A.transpose([1,2,0])#B', globals=globals(), number=100)
# 0.747475513999234
timeit("np.einsum('ijk,il', A, B)", globals=globals(), number=100)
# 0.4993825999990804
timeit('np.tensordot(A, B, axes=(0,0))', globals=globals(), number=100)
# 0.5872082839996438

Shifting HSV pixel values in python using Numpy

I'm trying to convert (shift) the values of every pixel in an HSV image (taken from a frame of a video).
The idea is to invert yellow and red colours into blue colour (to avoid using three threshold later in the program, when I can use just one) by inverting the red and yellow values into blue values using following equation.
(Hue + 90) % 180 (in OpenCV 3 Hue is in range [0,180])
Here's what I came up with:
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV);
H = hsv[:,:,0]
mask= [H<75 and H>128]
print("orig",hsv[mask])
hsv[mask] = ((hsv[mask]+90) % 180)
Unfortunately It doesn't work as by this approach Im selecting the whole hue channel not its pixel values
There's two different possibilities here, and I'm not sure which you want, but they're both trivial to implement. You can invert (reverse may be a better word) the hue rainbow, which you can just do by using 180 - hue. Or you can shift the color by 180 degrees by using (hue + 90) % 180 like you mention.
Reversing the colors:
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
rev_h = 180 - h
rev_hsv = cv2.merge([rev_h, s, v])
rev_img = cv2.cvtColor(rev_hsv, cv2.COLOR_HSV2BGR)
Shifting the colors:
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
shift_h = (h + 90) % 180
shift_hsv = cv2.merge([shift_h, s, v])
shift_img = cv2.cvtColor(shift_hsv, cv2.COLOR_HSV2BGR)
Those are the idiomatic ways to do it in OpenCV.
Now you want to do the same thing as above but only for some masked subset of pixels that meet a condition. This is not too hard to do; if you want to shift some masked pixels:
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
h_mask = (h < 75) | (h > 128)
h[h_mask] = (h[h_mask] + 90) % 180
shift_hsv = cv2.merge([h, s, v])
shift_img = cv2.cvtColor(shift_hsv, cv2.COLOR_HSV2BGR)
Hue channel is uint8 type, value range is [0, 179]. Therefore, when add with a large number or a negative number, Python returns a garbage number. Here is my solution base on #alkasm color shifting code:
img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(img_hsv)
shift_h = random.randint(-50, 50)
h = ((h.astype('int16') + shift_h) % 180).astype('uint8')
shift_hsv = cv2.merge([h, s, v])
For random hue, saturation, and value shifting. Shift channel base on #bill-grates:
def shift_channel(c, amount):
if amount > 0:
lim = 255 - amount
c[c >= lim] = 255
c[c < lim] += amount
elif amount < 0:
amount = -amount
lim = amount
c[c <= lim] = 0
c[c > lim] -= amount
return c
rand_h, rand_s, rand_v = 50, 50, 50
img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(img_hsv)
# Random shift hue
shift_h = random.randint(-rand_h, rand_h)
h = ((h.astype('int16') + shift_h) % 180).astype('uint8')
# Random shift saturation
shift_s = random.randint(-rand_s, rand_s)
s = shift_channel(s, shift_s)
# Random shift value
shift_v = random.randint(-rand_v, rand_v)
v = shift_channel(v, shift_v)
shift_hsv = cv2.merge([h, s, v])
print(shift_h, shift_s, shift_v)
img_rgb = cv2.cvtColor(shift_hsv, cv2.COLOR_HSV2RGB)