I try to come up with the derivative to the following function:
def f(x, item):
return x[item]
def df_dx(x, item):
pass
where item is as defined in the numpy doc can be list, slice, int, list of slice.
But there are so many edge cases but it feels like there should be a very easy multiplication between two tensors that should be kind of the answer. Can anyone help?
I tried something like this:
to_length = lambda i: i if type(i) == int else len(
range(i.start, (i.stop if i.stop is not None else -1), (i.step if i.step is not None else 1)))
input_shape = list(X.shape)
seg_shape = []
if type(self.item) == int or type(self.item) == slice:
seg_shape = [to_length(self.item)]
elif type(self.item) == list:
seg_shape = [to_length(i) for i in self.item]
else:
raise Exception(f"Unknown type {type(self.item)}")
v = np.zeros(seg_shape + input_shape)
if len(seg_shape) == 1:
# np.fill_diagonal(v[:, self.i:self.i + self.n], 1)
np.fill_diagonal(v[..., self.item], 1)
return v
elif len(seg_shape) == 2:
np.fill_diagonal(v[:, :, self.item[0], self.item[1]], 1)
return v
But it didn't really work.
Your defined function isn't continuous. There is a numpy gradient function. Which may do what you want.
import numpy as np
x = ( np.arange( 20 ) - 10 ) * .1
x = x*x
x
# array([1. , 0.81, 0.64, 0.49, 0.36, 0.25, 0.16, 0.09, 0.04, 0.01, 0. ,
# 0.01, 0.04, 0.09, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81])
def f(x, item):
return x[item]
def df_dx(x, item):
return np.gradient( x )[ item ]
df_dx( x, None )
# array([[-0.19, -0.18, -0.16, -0.14, -0.12, -0.1 , -0.08, -0.06, -0.04,
# -0.02, 0. , 0.02, 0.04, 0.06, 0.08, 0.1 , 0.12, 0.14,
# 0.16, 0.17]])
df_dx( x, [ 2, 8, 10, 17 ] )
# array([-0.16, -0.04, 0. , 0.14])
Related
there is a fair number of questions on gradients out there, but I haven't been able to fix my problem. In a nutshell: Trying to run a Monte Carlo simulation and get pathwise differentials. There are a few tutorials out there, but they do run into the same problem as my own code. I have boiled it down into the toy example below.
The second order derivative called gamma is wrong however (I'll highlight it below). So something is wrong with my nested gradient tape. Having the tape watch the variable has no effect actually. There must be something I am not aware of here. Any hint much appreciated.
edit: I have cross checked this with a deterministic function and the code works just fine. Second derivatives are calculated correctly using gradient tape. So no idea why it doesn't work on a Monte Carlo.
import numpy as np
import pandas as pd
import tensorflow as tf
from pprint import pprint
DTYPE = tf.float32
SEED = 3232
S0 = tf.Variable(100, dtype=DTYPE)
strike = tf.Variable(110, dtype=DTYPE)
time_to_expiry = tf.Variable(1, dtype=DTYPE)
implied_vol = tf.Variable(0.3, dtype=DTYPE)
v = dict(S0=S0,strike=strike,time_to_expiry=time_to_expiry,implied_vol=implied_vol)
#tf.function
def brownian(S0, dt, sigma, mu, dw):
dt_sqrt = tf.math.sqrt(dt)
shock = sigma * dt_sqrt * dw
drift = (mu - (sigma ** 2) / 2)
bm = tf.math.exp(drift * dt + shock)
out = S0 * tf.math.cumprod(bm, axis=1)
return out
#tf.function
def pricer_montecarlo(S0, strike, time_to_expiry, implied_vol, dw):
sigma = implied_vol
T = time_to_expiry
r = tf.constant(0.0,dtype=DTYPE)
K = strike
dt = T / dw.shape[1]
st = brownian(S0, dt, sigma, r, dw)
payout = tf.math.maximum(st[:, -1] - K, 0)
npv = tf.exp(-r * T) * tf.reduce_mean(payout)
return npv
def calculate_montecarlo(greeks=True):
nsims = 10**7
nobs = 2
dw = tf.random.normal((nsims, nobs), seed=SEED)
out = dict()
if greeks:
with tf.GradientTape() as g2:
g2.watch(v['S0'])
with tf.GradientTape() as g1:
g1.watch(v['S0'])
npv = pricer_montecarlo(**v, dw=dw)
dv = g1.gradient(npv, v)
g2.watch(dv)
d2v = g2.gradient(dv['S0'], v)
out["dv"] = {k: v.numpy() for k, v in dv.items()}
out["d2v"] = {k: v.numpy() for k, v in d2v.items()}
else:
npv = pricer_montecarlo(**v, dw=dw).numpy()
out["npv"] = npv.numpy()
return out
out = calculate_montecarlo()
pprint(out)
from py_vollib import black_scholes
from py_vollib.black_scholes.greeks import analytical
print('npv='+str(black_scholes.black_scholes('c', 100, 110, 1, 0, 0.3)))
print('dv S0='+str(analytical.delta('c', 100, 110, 1, 0, 0.3)))
print('d2v S0='+str(analytical.gamma('c', 100, 110, 1, 0, 0.3)))
print('dv implied_vol='+str(analytical.vega('c', 100, 110, 1, 0, 0.3)))
print('dv time_to_expiry='+str(analytical.theta('c', 100, 110, 1, 0, 0.3)))
Output: 'd2v': {'S0': 0.0..., should be close to 0.013112390443974165 (plus some stochastic noise).
{'d2v': {'S0': 0.0,
'implied_vol': 0.3933603,
'strike': 0.0,
'time_to_expiry': 0.059004053},
'dv': {'S0': 0.43342653,
'implied_vol': 39.336025,
'strike': -0.32001525,
'time_to_expiry': 5.9004045},
'npv': 8.140971}
npv=8.141012048964207
dv S0=0.4334094123285094
d2v S0=0.013112390443974165
dv implied_vol=0.39337171331922494
dv time_to_expiry=-0.01616596082133801
noob question here, please bear with me:
I'm trying to use numpy.polynomial.polynomial.Polynomial.fit(x,y,deg).convert().coef to get the coefficients for a zero-intersecting polynomial y = ax**2 + bx + c (where c=0) fit to a set of measured data. When I use deg=2 or deg=[0,1,2] I get nicely fitting coefficients for a,b,and c. However, when I use deg = [1,2] in order to force c=0, I still get three coefficients and they don't fit at all. What am I doing wrong?
Here is a code example with real data:
import numpy as np
from numpy.polynomial.polynomial import Polynomial as p
x = np.array([0, .1, .5, 1, 2])
y_series = np.array([[2, 319, 1693, 3713, 8695],
[3, 327, 1828, 4131, 10111],
[3, 304, 1653, 3617, 8678],
[4,300,1675,3745,8922],
[3, 298,1661,3653,8694],
[5, 304,1642,3686,8670],
[3, 313,1688,3724,8657],
[5, 315,1736,3821,8963],
[3, 247,1300,2767,6376]
])
for y in y_series:
print('x: ', x,'y: ', y)
print('deg=2: ', p.fit(x, y, deg=2).convert().coef)
print('deg=[0,1,2]:', p.fit(x, y, deg=[0,1,2]).convert().coef)
print('deg=[1,2]: ', p.fit(x, y, deg=[1,2]).convert().coef)
print('')
Similar to a previous post you are having trouble with the window argument of the Polynomial class. The coefficients are actually zero in your defined window, which is the default one namely [ -1, 1 ]. If one print the coefficients before calling convert() it is actually zero. Providing the window solves the issue.
Have a look at this:
import numpy as np
from numpy.polynomial.polynomial import Polynomial
def parabola( x, a, b, c , s=0 ):
if isinstance( x, ( int, float, complex ) ):
r = np.random.normal( scale=s )
else:
r = np.random.normal( scale=s, size=len( x ) )
return a + b * x + c * x**2 + r
xl = np.linspace( -2, 3, 15 )
yl = parabola( xl, 0.01, 0.8, 0.21, s=0.1 )
print("\n p1: ")
p1 = Polynomial.fit( xl, yl, deg=[0,1,2] )
print( p1.coef )
print( p1.convert().coef )
print("\n p2: ")
p2 = Polynomial.fit( xl, yl, deg=[1,2] )
print( p2.coef )
print( p2.convert().coef )
print( p2.domain )
print( p2.window )
print("\n p3: ")
p3 = Polynomial.fit( xl, yl, deg=[1,2], window=[ min( xl ), max( xl ) ] )
print( p3.coef )
I am trying to convert the R3Det Model that outputs rotated bounding boxes to a TensorFlow Lite model for on device inference on mobile devices. The problem that I am facing is that a part of the inference model uses python code wrapped by tf.py_func which is not serializable. I am trying to convert the function to TensorFlow but it contains a for loop and some OpenCV funtion calls, and I have no idea how to convert these into TensorFlow code. I would appreciate it, if anybody can help me out with this. The python function is given below.
def nms_rotate_cpu(boxes, scores, iou_threshold, max_output_size):
"""
:param boxes: format [x_c, y_c, w, h, theta]
:param scores: scores of boxes
:param threshold: iou threshold (0.7 or 0.5)
:param max_output_size: max number of output
:return: the remaining index of boxes
"""
keep = []
order = scores.argsort()[::-1]
num = boxes.shape[0]
suppressed = np.zeros((num), dtype=np.int)
for _i in range(num):
if len(keep) >= max_output_size:
break
i = order[_i]
if suppressed[i] == 1:
continue
keep.append(i)
r1 = ((boxes[i, 0], boxes[i, 1]), (boxes[i, 2], boxes[i, 3]), boxes[i, 4])
area_r1 = boxes[i, 2] * boxes[i, 3]
for _j in range(_i + 1, num):
j = order[_j]
if suppressed[i] == 1:
continue
if np.sqrt((boxes[i, 0] - boxes[j, 0])**2 + (boxes[i, 1] - boxes[j, 1])**2) > (boxes[i, 2] + boxes[j, 2] + boxes[i, 3] + boxes[j, 3]):
inter = 0.0
else:
r2 = ((boxes[j, 0], boxes[j, 1]), (boxes[j, 2], boxes[j, 3]), boxes[j, 4])
area_r2 = boxes[j, 2] * boxes[j, 3]
inter = 0.0
try:
int_pts = cv2.rotatedRectangleIntersection(r1, r2)[1]
if int_pts is not None:
order_pts = cv2.convexHull(int_pts, returnPoints=True)
int_area = cv2.contourArea(order_pts)
inter = int_area * 1.0 / (area_r1 + area_r2 - int_area + cfgs.EPSILON)
except:
"""
cv2.error: /io/opencv/modules/imgproc/src/intersection.cpp:247:
error: (-215) intersection.size() <= 8 in function rotatedRectangleIntersection
"""
# print(r1)
# print(r2)
inter = 0.9999
if inter >= iou_threshold:
suppressed[j] = 1
return np.array(keep, np.int64)
Given a numpy ndarray with dimensions m by n (where n>m), how can I find the linearly independent columns?
One way is to use the LU decomposition. The factor U will be of the same size as your matrix, but will be upper-triangular. In each row of U, pick the first nonzero element: these are pivot elements, which belong to linearly independent columns. A self-contained example:
import numpy as np
from scipy.linalg import lu
A = np.array([[1, 2, 3], [2, 4, 2]]) # example for testing
U = lu(A)[2]
lin_indep_columns = [np.flatnonzero(U[i, :])[0] for i in range(U.shape[0])]
Output: [0, 2], which means the 0th and 2nd columns of A form a basis for its column space.
#user6655984's answer inspired this code, where I developed a function instead of the author's last line of code (finding pivot columns of U) so that it can handle more diverse A's.
Here it is:
import numpy as np
from scipy import linalg as LA
np.set_printoptions(precision=1, suppress=True)
A = np.array([[1, 4, 1, -1],
[2, 5, 1, -2],
[3, 6, 1, -3]])
P, L, U = LA.lu(A)
print('P', P, '', 'L', L, '', 'U', U, sep='\n')
Output:
P
[[0. 1. 0.]
[0. 0. 1.]
[1. 0. 0.]]
L
[[1. 0. 0. ]
[0.3 1. 0. ]
[0.7 0.5 1. ]]
U
[[ 3. 6. 1. -3. ]
[ 0. 2. 0.7 -0. ]
[ 0. 0. -0. -0. ]]
I came up with this function:
def get_indices_for_linearly_independent_columns_of_A(U: np.ndarray) -> list:
# I should first convert all "-0."s to "0." so that nonzero() can find them.
U_copy = U.copy()
U_copy[abs(U_copy) < 1.e-7] = 0
# Because some rows in U may not have even one nonzero element,
# I have to find the index for the first one in two steps.
index_of_all_nonzero_cols_in_each_row = (
[U_copy[i, :].nonzero()[0] for i in range(U_copy.shape[0])]
)
index_of_first_nonzero_col_in_each_row = (
[indices[0] for indices in index_of_all_nonzero_cols_in_each_row
if len(indices) > 0]
)
# Because two rows or more may have the same indices
# for their first nonzero element, I should remove duplicates.
unique_indices = sorted(list(set(index_of_first_nonzero_col_in_each_row)))
return unique_indices
Finally:
col_sp_A = A[:, get_indices_for_linearly_independent_columns_of_A(U)]
print(col_sp_A)
Output:
[[1 4]
[2 5]
[3 6]]
Try this one
def LU_decomposition(A):
"""
Perform LU decompostion of a given matrix
Args:
A: the given matrix
Returns: P, L and U, s.t. PA = LU
"""
assert A.shape[0] == A.shape[1]
N = A.shape[0]
P_idx = np.arange(0, N, dtype=np.int16).reshape(-1, 1)
for i in range(N - 1):
pivot_loc = np.argmax(np.abs(A[i:, [i]])) + i
if pivot_loc != i:
A[[i, pivot_loc], :] = A[[pivot_loc, i], :]
P_idx[[i, pivot_loc], :] = P_idx[[pivot_loc, i], :]
A[i + 1:, i] /= A[i, i]
A[i + 1:, i + 1:] -= A[i + 1:, [i]] * A[[i], i + 1:]
U, L, P = np.zeros_like(A), np.identity(N), np.zeros((N, N), dtype=np.int16)
for i in range(N):
L[i, :i] = A[i, :i]
U[i, i:] = A[i, i:]
P[i, P_idx[i][0]] = 1
return P.astype(np.float64), L, U
def get_bases(A):
assert A.ndim == 2
Q = gaussian_elimination(A)
M, N = Q.shape
pivot_idxs = []
for i in range(M):
j = i
while j < N and abs(Q[i, j]) < 1e-5:
j += 1
if j < N:
pivot_idxs.append(j)
return A[:, list(set(pivot_idxs))]
I am looking for a TensorFlow way of implementing something similar to Python's list.index() function.
Given a matrix and a value to find, I want to know the first occurrence of the value in each row of the matrix.
For example,
m is a <batch_size, 100> matrix of integers
val = 23
result = [0] * batch_size
for i, row_elems in enumerate(m):
result[i] = row_elems.index(val)
I cannot assume that 'val' appears only once in each row, otherwise I would have implemented it using tf.argmax(m == val). In my case, it is important to get the index of the first occurrence of 'val' and not any.
It seems that tf.argmax works like np.argmax (according to the test), which will return the first index when there are multiple occurrences of the max value.
You can use tf.argmax(tf.cast(tf.equal(m, val), tf.int32), axis=1) to get what you want. However, currently the behavior of tf.argmax is undefined in case of multiple occurrences of the max value.
If you are worried about undefined behavior, you can apply tf.argmin on the return value of tf.where as #Igor Tsvetkov suggested.
For example,
# test with tensorflow r1.0
import tensorflow as tf
val = 3
m = tf.placeholder(tf.int32)
m_feed = [[0 , 0, val, 0, val],
[val, 0, val, val, 0],
[0 , val, 0, 0, 0]]
tmp_indices = tf.where(tf.equal(m, val))
result = tf.segment_min(tmp_indices[:, 1], tmp_indices[:, 0])
with tf.Session() as sess:
print(sess.run(result, feed_dict={m: m_feed})) # [2, 0, 1]
Note that tf.segment_min will raise InvalidArgumentError when there is some row containing no val. In your code row_elems.index(val) will raise exception too when row_elems don't contain val.
Looks a little ugly but works (assuming m and val are both tensors):
idx = list()
for t in tf.unpack(m, axis=0):
idx.append(tf.reduce_min(tf.where(tf.equal(t, val))))
idx = tf.pack(idx, axis=0)
EDIT:
As Yaroslav Bulatov mentioned, you could achieve the same result with tf.map_fn:
def index1d(t):
return tf.reduce_min(tf.where(tf.equal(t, val)))
idx = tf.map_fn(index1d, m, dtype=tf.int64)
Here is another solution to the problem, assuming there is a hit on every row.
import tensorflow as tf
val = 3
m = tf.constant([
[0 , 0, val, 0, val],
[val, 0, val, val, 0],
[0 , val, 0, 0, 0]])
# replace all entries in the matrix either with its column index, or out-of-index-number
match_indices = tf.where( # [[5, 5, 2, 5, 4],
tf.equal(val, m), # [0, 5, 2, 3, 5],
x=tf.range(tf.shape(m)[1]) * tf.ones_like(m), # [5, 1, 5, 5, 5]]
y=(tf.shape(m)[1])*tf.ones_like(m))
result = tf.reduce_min(match_indices, axis=1)
with tf.Session() as sess:
print(sess.run(result)) # [2, 0, 1]
Here is a solution which also considers the case the element is not included by the matrix (solution from github repository of DeepMind)
def get_first_occurrence_indices(sequence, eos_idx):
'''
args:
sequence: [batch, length]
eos_idx: scalar
'''
batch_size, maxlen = sequence.get_shape().as_list()
eos_idx = tf.convert_to_tensor(eos_idx)
tensor = tf.concat(
[sequence, tf.tile(eos_idx[None, None], [batch_size, 1])], axis = -1)
index_all_occurrences = tf.where(tf.equal(tensor, eos_idx))
index_all_occurrences = tf.cast(index_all_occurrences, tf.int32)
index_first_occurrences = tf.segment_min(index_all_occurrences[:, 1],
index_all_occurrences[:, 0])
index_first_occurrences.set_shape([batch_size])
index_first_occurrences = tf.minimum(index_first_occurrences + 1, maxlen)
return index_first_occurrences
And:
import tensorflow as tf
mat = tf.Variable([[1,2,3,4,5], [2,3,4,5,6], [3,4,5,6,7], [0,0,0,0,0]], dtype = tf.int32)
idx = 3
first_occurrences = get_first_occurrence_indices(mat, idx)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
sess.run(first_occurrence) # [3, 2, 1, 5]