Tensorflow 2: Nested gradient tape produces wrong second pathwise derivative - tensorflow

there is a fair number of questions on gradients out there, but I haven't been able to fix my problem. In a nutshell: Trying to run a Monte Carlo simulation and get pathwise differentials. There are a few tutorials out there, but they do run into the same problem as my own code. I have boiled it down into the toy example below.
The second order derivative called gamma is wrong however (I'll highlight it below). So something is wrong with my nested gradient tape. Having the tape watch the variable has no effect actually. There must be something I am not aware of here. Any hint much appreciated.
edit: I have cross checked this with a deterministic function and the code works just fine. Second derivatives are calculated correctly using gradient tape. So no idea why it doesn't work on a Monte Carlo.
import numpy as np
import pandas as pd
import tensorflow as tf
from pprint import pprint
DTYPE = tf.float32
SEED = 3232
S0 = tf.Variable(100, dtype=DTYPE)
strike = tf.Variable(110, dtype=DTYPE)
time_to_expiry = tf.Variable(1, dtype=DTYPE)
implied_vol = tf.Variable(0.3, dtype=DTYPE)
v = dict(S0=S0,strike=strike,time_to_expiry=time_to_expiry,implied_vol=implied_vol)
#tf.function
def brownian(S0, dt, sigma, mu, dw):
dt_sqrt = tf.math.sqrt(dt)
shock = sigma * dt_sqrt * dw
drift = (mu - (sigma ** 2) / 2)
bm = tf.math.exp(drift * dt + shock)
out = S0 * tf.math.cumprod(bm, axis=1)
return out
#tf.function
def pricer_montecarlo(S0, strike, time_to_expiry, implied_vol, dw):
sigma = implied_vol
T = time_to_expiry
r = tf.constant(0.0,dtype=DTYPE)
K = strike
dt = T / dw.shape[1]
st = brownian(S0, dt, sigma, r, dw)
payout = tf.math.maximum(st[:, -1] - K, 0)
npv = tf.exp(-r * T) * tf.reduce_mean(payout)
return npv
def calculate_montecarlo(greeks=True):
nsims = 10**7
nobs = 2
dw = tf.random.normal((nsims, nobs), seed=SEED)
out = dict()
if greeks:
with tf.GradientTape() as g2:
g2.watch(v['S0'])
with tf.GradientTape() as g1:
g1.watch(v['S0'])
npv = pricer_montecarlo(**v, dw=dw)
dv = g1.gradient(npv, v)
g2.watch(dv)
d2v = g2.gradient(dv['S0'], v)
out["dv"] = {k: v.numpy() for k, v in dv.items()}
out["d2v"] = {k: v.numpy() for k, v in d2v.items()}
else:
npv = pricer_montecarlo(**v, dw=dw).numpy()
out["npv"] = npv.numpy()
return out
out = calculate_montecarlo()
pprint(out)
from py_vollib import black_scholes
from py_vollib.black_scholes.greeks import analytical
print('npv='+str(black_scholes.black_scholes('c', 100, 110, 1, 0, 0.3)))
print('dv S0='+str(analytical.delta('c', 100, 110, 1, 0, 0.3)))
print('d2v S0='+str(analytical.gamma('c', 100, 110, 1, 0, 0.3)))
print('dv implied_vol='+str(analytical.vega('c', 100, 110, 1, 0, 0.3)))
print('dv time_to_expiry='+str(analytical.theta('c', 100, 110, 1, 0, 0.3)))
Output: 'd2v': {'S0': 0.0..., should be close to 0.013112390443974165 (plus some stochastic noise).
{'d2v': {'S0': 0.0,
'implied_vol': 0.3933603,
'strike': 0.0,
'time_to_expiry': 0.059004053},
'dv': {'S0': 0.43342653,
'implied_vol': 39.336025,
'strike': -0.32001525,
'time_to_expiry': 5.9004045},
'npv': 8.140971}
npv=8.141012048964207
dv S0=0.4334094123285094
d2v S0=0.013112390443974165
dv implied_vol=0.39337171331922494
dv time_to_expiry=-0.01616596082133801

Related

ValueError: cannot reshape array of size 692224 into shape (1,3,416,416)-yolov5 cpu error

I was trying to run my yolov5 custom model on cpu and I got this error.
this is the github page I have used : https://github.com/Amelia0911/onnxruntime-for-yolov5
import onnxruntime
from models.utils import *
import time
IMAGE_SIZE = (416, 416)
CONF_TH = 0.3
NMS_TH = 0.45
CLASSES = 80
model = onnxruntime.InferenceSession("models_train/bestnone.onnx")
anchor_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
stride = [8, 16, 32]
def draw(img, boxinfo, dst, id):
for *xyxy, conf, cls in boxinfo:
label = '{}|{}'.format(int(cls), '%.2f' % conf)
plot_one_box(xyxy, img, label=label, color=[0, 0, 255])
cv2.imencode('.jpg', img)[1].tofile(dst)
def detect(image):
img = cv2.resize(image,IMAGE_SIZE)
img = img.transpose(2, 0, 1)
dataset = (img, image)
img = dataset[0].astype('float32')
img_size = [dataset[0].shape[1], dataset[0].shape[2]]
img /= 255.0
img = img.reshape(1, 3, img_size[0], img_size[1])
inputs = {model.get_inputs()[0].name: img}
pred = torch.tensor(model.run(None, inputs)[0])
anchor = torch.tensor(anchor_list).float().view(3, -1, 2)
area = img_size[0]*img_size[1]
size = [int(area/stride[0]**2), int(area/stride[1]**2), int(area/stride[2]**2)]
feature = [[int(j/stride[i]) for j in img_size] for i in range(3)]
y = []
y.append(pred[:, :size[0]*3, :])
y.append(pred[:, size[0]*3:size[0]*3+size[1]*3, :])
y.append(pred[:, size[0]*3+size[1]*3:, :])
grid = []
for k, f in enumerate(feature):
grid.append([[i, j] for j in range(f[0]) for i in range(f[1])])
z = []
for i in range(3):
src = y[i]
xy = src[..., 0:2] * 2. - 0.5
wh = (src[..., 2:4] * 2) ** 2
dst_xy = []
dst_wh = []
for j in range(3):
dst_xy.append((xy[:, j*size[i]:(j+1)*size[i], :] + torch.tensor(grid[i])) * stride[i])
dst_wh.append(wh[:, j*size[i]:(j+1)*size[i], :] * anchor[i][j])
src[..., 0:2] = torch.from_numpy(np.concatenate((dst_xy[0], dst_xy[1], dst_xy[2]), axis=1))
src[..., 2:4] = torch.from_numpy(np.concatenate((dst_wh[0], dst_wh[1], dst_wh[2]), axis=1))
z.append(src.view(1, -1, CLASSES+5)) #85
pred = torch.cat(z, 1)
pred = nms(pred, CONF_TH, NMS_TH)
for det in pred:
if det is not None and len(det):
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], dataset[1].shape).round()
if det == None:
return np.array([])
return det
if __name__ == '__main__':
import time
src = 'Temp-640x640.jpg'
t1 = time.time()
img = cv2.imdecode(np.fromfile(src, dtype=np.uint8), -1)
print(IMAGE_SIZE)
results = detect(img)
t2 = time.time()
print(results)
print("onnxruntime time = ", t2 - t1)
if results is not None and len(results):
draw(img, results, 'dst3.jpg', str(id))
print('Down!')
when I run this code I got the following error:
File "C:\Users\acer\.spyder-py3\metallic surface defect detection\3_onnx_cpu_detec.py", line 85, in <module>
results = detect(img)
File "C:\Users\acer\.spyder-py3\metallic surface defect detection\3_onnx_cpu_detec.py", line 30, in detect
img = img.reshape(1, 3, img_size[0], img_size[1])
ValueError: cannot reshape array of size 692224 into shape (1,3,416,416)
I think it is a color channel issue. I have tried to fix it .But it doesn't work .If someone know how to fix it please inform me.Thanks in advance

draw a line in tensorflow

I want to create a human pose skeleton estimation network and for this, I have a two-part network, first part generates 16 heatmaps as output(each heatmap for different joint and hence a key point can be extracted), using these 16 key points I wish to create a human skeleton and feed it to second half of my network. My problem is, how do I draw lines between the key points to create the skeleton? I couldn't find a way to do it on a tensor object using tensorflow or keras.
I know i'm a bit late but here is some code that I think does what you're after (in TFv2.3). Hopefully it will save someone time in the future!
It uses solely tensorflow ops, so you can use it in data loaders etc. The real pain here is that Tensorflow doesn't allow Eager Assignment, so you can't just update tensors by index. This works around that by creating two sparse tensors, one for the mask (where to apply the line) and another for the new_values (what value to apply at the line). The code for simply designing the line might not be applicable in your case (based on https://stackoverflow.com/a/47381058) but ported away from numpy.
import tensorflow as tf
def trapez(y, y0, w):
return tf.clip_by_value(tf.minimum(y + 1 + w/2 - y0, -y + 1 + w/2 + y0), 0, 1)
def apply_output(img, yy, xx, val):
stack = tf.stack([yy, xx], axis=1)
stack = tf.cast(stack, tf.int64)
values = tf.ones(stack.shape[0], tf.float32)
mask = tf.sparse.SparseTensor(indices=stack, values=values, dense_shape=img.shape)
mask = tf.sparse.reorder(mask)
mask = tf.sparse.to_dense(mask)
mask = tf.cast(mask, tf.float32)
new_values = tf.sparse.SparseTensor(indices=stack, values=val, dense_shape=img.shape)
new_values = tf.sparse.reorder(new_values)
new_values = tf.sparse.to_dense(new_values)
img = img * (1 - mask) + new_values * mask
img = tf.cast(tf.expand_dims(img * 255, axis=-1), tf.uint8)
return img
def weighted_line(img, r0, c0, r1, c1, w):
output = img
x = tf.range(c0, c1 + 1, dtype=tf.float32)
slope = (r1-r0) / (c1-c0)
w *= tf.sqrt(1 + tf.abs(slope)) / 2
y = x * slope + (c1*r0-c0*r1) / (c1-c0)
thickness = tf.math.ceil(w/2)
yy = (tf.reshape(tf.math.floor(y), [-1, 1]) + tf.reshape(tf.range(-thickness-1, thickness+2), [1, -1]))
xx = tf.repeat(x, yy.shape[1])
values = tf.reshape(trapez(yy, tf.reshape(y, [-1, 1]), w), [-1])
yy = tf.reshape(yy, [-1])
limits_y = tf.math.logical_and(yy >= 0, yy < img.shape[0])
limits_x = tf.math.logical_and(xx >= 0, xx < img.shape[1])
limits = tf.math.logical_and(limits_y, limits_x)
limits = tf.math.logical_and(limits, values > 0)
yy = tf.cast(yy[limits], tf.float32)
xx = tf.cast(xx[limits], tf.float32)
return yy, xx, values[limits], apply_output(output, yy, xx, values[limits])
Just for a sanity check, you can call it with the following, and display it using opencv
if __name__ == "__main__":
IMG = tf.zeros((500, 500), tf.float32)
yy, xx, vals, FINAL_IMG = weighted_line(IMG, 10, 20, 100, 200, 5)
jpeg_string = tf.io.encode_jpeg(FINAL_IMG)
tf.io.write_file("output.jpg", jpeg_string)
import cv2
img = cv2.imread("output.jpg")
cv2.imshow("Output", img)
cv2.waitKey(0)

Pairwise distance between a set of Matrices in Keras/Tensorflow

I want to calculate pairwise distance between a set of Tensor (e.g 4 Tensor). Each matrix is 2D Tensor. I don't know how to do this in vectorize format. I wrote following sudo-code to determine what I need:
E.shape => [4,30,30]
sum = 0
for i in range(4):
for j in range(4):
res = calculate_distance(E[i],E[j]) # E[i] is one the 30*30 Tensor
sum = sum + reduce_sum(res)
Here is my last try:
x_ = tf.expand_dims(E, 0)
y_ = tf.expand_dims(E, 1)
s = x_ - y_
P = tf.reduce_sum(tf.norm(s, axis=[-2, -1]))
This code works But I don't know how do this in a Batch. For instance when E.shape is [BATCH_SIZE * 4 * 30 * 30] my code doesn't work and Out Of Memory will happen. How can I do this efficiently?
Edit: After a day, I find a solution. it's not perfect but works:
res = tf.map_fn(lambda x: tf.map_fn(lambda y: tf.map_fn(lambda z: tf.norm(z - x), x), x), E)
res = tf.reduce_mean(tf.square(res))
Your solution with expand_dims should be okay if your batch size is not too large. However, given that your original pseudo code loops over range(4), you should probably expand axes 1 and 2, instead of 0 and 1.
You can check the shape of the tensors to ensure that you're specifying the correct axes. For example,
batch_size = 8
E_np = np.random.rand(batch_size, 4, 30, 30)
E = K.variable(E_np) # shape=(8, 4, 30, 30)
x_ = K.expand_dims(E, 1)
y_ = K.expand_dims(E, 2)
s = x_ - y_ # shape=(8, 4, 4, 30, 30)
distances = tf.norm(s, axis=[-2, -1]) # shape=(8, 4, 4)
P = K.sum(distances, axis=[-2, -1]) # shape=(8,)
Now P will be the sum of pairwise distances between the 4 matrices for each of the 8 samples.
You can also verify that the values in P is the same as what would be computed in your pseudo code:
answer = []
for batch_idx in range(batch_size):
s = 0
for i in range(4):
for j in range(4):
a = E_np[batch_idx, i]
b = E_np[batch_idx, j]
s += np.sqrt(np.trace(np.dot(a - b, (a - b).T)))
answer.append(s)
print(answer)
[149.45960605637578, 147.2815068236368, 144.97487402393705, 146.04866735065312, 144.25537059201062, 148.9300986019226, 146.61229889228133, 149.34259789169045]
print(K.eval(P).tolist())
[149.4595947265625, 147.281494140625, 144.97488403320312, 146.04867553710938, 144.25537109375, 148.9300994873047, 146.6123046875, 149.34259033203125]
Tensorflow allows to compute the Frobenius norm via tf.norm function. In case of 2D matrices, it's equivalent to 1-norm.
The following solution isn't vectorized and assumes that the first dimension in E is known statically:
E = tf.random_normal(shape=[5, 3, 3], dtype=tf.float32)
F = tf.split(E, E.shape[0])
total = tf.reduce_sum([tf.norm(tensor=(lhs-rhs), ord=1, axis=(-2, -1)) for lhs in F for rhs in F])
Update:
An optimized vectorized version of the same code:
E = tf.random_normal(shape=[1024, 4, 30, 30], dtype=tf.float32)
lhs = tf.expand_dims(E, axis=1)
rhs = tf.expand_dims(E, axis=2)
total = tf.reduce_sum(tf.norm(tensor=(lhs - rhs), ord=1, axis=(-2, -1)))
Memory concerns: upon evaluating this code,
tf.contrib.memory_stats.MaxBytesInUse() reports that the peak memory consumption is 73729792 = 74Mb, which indicates relatively moderate overhead (the raw lhs-rhs tensor is 59Mb). Your OOM is most likely caused by the duplication of BATCH_SIZE dimension when you compute s = x_ - y_, because your batch size is much larger than the number of matrices (1024 vs 4).

Why does the cost in my implementation of a deep neural network increase after a few iterations?

I am a beginner in machine learning and neural networks. Recently, after watching Andrew Ng's lectures on deep learning, I tried to implement a binary classifier using deep neural networks on my own.
However, the cost of the function is expected to decrease after each iteration.
In my program, it decreases slightly in the beginning, but rapidly increases later. I tried to make changes in learning rate and number of iterations, but to no avail. I am very confused.
Here is my code
1. Neural network classifier class
class NeuralNetwork:
def __init__(self, X, Y, dimensions, alpha=1.2, iter=3000):
self.X = X
self.Y = Y
self.dimensions = dimensions # Including input layer and output layer. Let example be dimensions=4
self.alpha = alpha # Learning rate
self.iter = iter # Number of iterations
self.length = len(self.dimensions)-1
self.params = {} # To store parameters W and b for each layer
self.cache = {} # To store cache Z and A for each layer
self.grads = {} # To store dA, dZ, dW, db
self.cost = 1 # Initial value does not matter
def initialize(self):
np.random.seed(3)
# If dimensions is 4, then layer 0 and 3 are input and output layers
# So we only need to initialize w1, w2 and w3
# There is no need of w0 for input layer
for l in range(1, len(self.dimensions)):
self.params['W'+str(l)] = np.random.randn(self.dimensions[l], self.dimensions[l-1])*0.01
self.params['b'+str(l)] = np.zeros((self.dimensions[l], 1))
def forward_propagation(self):
self.cache['A0'] = self.X
# For last layer, ie, the output layer 3, we need to activate using sigmoid
# For layer 1 and 2, we need to use relu
for l in range(1, len(self.dimensions)-1):
self.cache['Z'+str(l)] = np.dot(self.params['W'+str(l)], self.cache['A'+str(l-1)]) + self.params['b'+str(l)]
self.cache['A'+str(l)] = relu(self.cache['Z'+str(l)])
l = len(self.dimensions)-1
self.cache['Z'+str(l)] = np.dot(self.params['W'+str(l)], self.cache['A'+str(l-1)]) + self.params['b'+str(l)]
self.cache['A'+str(l)] = sigmoid(self.cache['Z'+str(l)])
def compute_cost(self):
m = self.Y.shape[1]
A = self.cache['A'+str(len(self.dimensions)-1)]
self.cost = -1/m*np.sum(np.multiply(self.Y, np.log(A)) + np.multiply(1-self.Y, np.log(1-A)))
self.cost = np.squeeze(self.cost)
def backward_propagation(self):
A = self.cache['A' + str(len(self.dimensions) - 1)]
m = self.X.shape[1]
self.grads['dA'+str(len(self.dimensions)-1)] = -(np.divide(self.Y, A) - np.divide(1-self.Y, 1-A))
# Sigmoid derivative for final layer
l = len(self.dimensions)-1
self.grads['dZ' + str(l)] = self.grads['dA' + str(l)] * sigmoid_prime(self.cache['Z' + str(l)])
self.grads['dW' + str(l)] = 1 / m * np.dot(self.grads['dZ' + str(l)], self.cache['A' + str(l - 1)].T)
self.grads['db' + str(l)] = 1 / m * np.sum(self.grads['dZ' + str(l)], axis=1, keepdims=True)
self.grads['dA' + str(l - 1)] = np.dot(self.params['W' + str(l)].T, self.grads['dZ' + str(l)])
# Relu derivative for previous layers
for l in range(len(self.dimensions)-2, 0, -1):
self.grads['dZ'+str(l)] = self.grads['dA'+str(l)] * relu_prime(self.cache['Z'+str(l)])
self.grads['dW'+str(l)] = 1/m*np.dot(self.grads['dZ'+str(l)], self.cache['A'+str(l-1)].T)
self.grads['db'+str(l)] = 1/m*np.sum(self.grads['dZ'+str(l)], axis=1, keepdims=True)
self.grads['dA'+str(l-1)] = np.dot(self.params['W'+str(l)].T, self.grads['dZ'+str(l)])
def update_parameters(self):
for l in range(1, len(self.dimensions)):
self.params['W'+str(l)] = self.params['W'+str(l)] - self.alpha*self.grads['dW'+str(l)]
self.params['b'+str(l)] = self.params['b'+str(l)] - self.alpha*self.grads['db'+str(l)]
def train(self):
np.random.seed(1)
self.initialize()
for i in range(self.iter):
#print(self.params)
self.forward_propagation()
self.compute_cost()
self.backward_propagation()
self.update_parameters()
if i % 100 == 0:
print('Cost after {} iterations is {}'.format(i, self.cost))
2. Testing code for odd or even number classifier
import numpy as np
from main import NeuralNetwork
X = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
Y = np.array([[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]])
clf = NeuralNetwork(X, Y, [1, 1, 1], alpha=0.003, iter=7000)
clf.train()
3. Helper Code
import math
import numpy as np
def sigmoid_scalar(x):
return 1/(1+math.exp(-x))
def sigmoid_prime_scalar(x):
return sigmoid_scalar(x)*(1-sigmoid_scalar(x))
def relu_scalar(x):
if x > 0:
return x
else:
return 0
def relu_prime_scalar(x):
if x > 0:
return 1
else:
return 0
sigmoid = np.vectorize(sigmoid_scalar)
sigmoid_prime = np.vectorize(sigmoid_prime_scalar)
relu = np.vectorize(relu_scalar)
relu_prime = np.vectorize(relu_prime_scalar)
Output
I believe your cross-entropy derivative is wrong. Instead of this:
# WRONG!
self.grads['dA'+str(len(self.dimensions)-1)] = -(np.divide(self.Y, A) - np.divide(1-self.Y, A))
... do this:
# CORRECT
self.grads['dA'+str(len(self.dimensions)-1)] = np.divide(A - self.Y, (1 - A) * A)
See these lecture notes for the details. I think you meant the formula (5), but forgot 1-A. Anyway, use formula (6).

Avoiding optimization pitfalls when modeling an ordinal predicted variable in PyMC3

I am trying to model an ordinal predicted variable using PyMC3 based on the approach in chapter 23 of Doing Bayesian Data Analysis. I would like to determine a good starting value using find_MAP, but am receiving an optimization error.
The model:
import pymc3 as pm
import numpy as np
import theano
import theano.tensor as tt
# Some helper functions
def cdf(x, location=0, scale=1):
epsilon = np.array(1e-32, dtype=theano.config.floatX)
location = tt.cast(location, theano.config.floatX)
scale = tt.cast(scale, theano.config.floatX)
div = tt.sqrt(2 * scale ** 2 + epsilon)
div = tt.cast(div, theano.config.floatX)
erf_arg = (x - location) / div
return .5 * (1 + tt.erf(erf_arg + epsilon))
def percent_to_thresh(idx, vect):
return 5 * tt.sum(vect[:idx + 1]) + 1.5
def full_thresh(thresh):
idxs = tt.arange(thresh.shape[0] - 1)
thresh_mod, updates = theano.scan(fn=percent_to_thresh,
sequences=[idxs],
non_sequences=[thresh])
return tt.concatenate([[-1 * np.inf, 1.5], thresh_mod, [6.5, np.inf]])
def compute_ps(thresh, location, scale):
f_thresh = full_thresh(thresh)
return cdf(f_thresh[1:], location, scale) - cdf(f_thresh[:-1], location, scale)
# Generate data
real_ps = [0.05, 0.05, 0.1, 0.1, 0.2, 0.3, 0.2]
data = np.random.choice(7, size=1000, p=real_ps)
# Run model
with pm.Model() as model:
mu = pm.Normal('mu', mu=4, sd=3)
sigma = pm.Uniform('sigma', lower=0.1, upper=70)
thresh = pm.Dirichlet('thresh', a=np.ones(5))
cat_p = compute_ps(thresh, mu, sigma)
results = pm.Categorical('results', p=cat_p, observed=data)
with model:
start = pm.find_MAP()
trace = pm.sample(2000, start=start)
When running this, I receive the following error:
Applied interval-transform to sigma and added transformed sigma_interval_ to model.
Applied stickbreaking-transform to thresh and added transformed thresh_stickbreaking_ to model.
Traceback (most recent call last):
File "cm_net_log.v1-for_so.py", line 53, in <module>
start = pm.find_MAP()
File "/usr/local/lib/python3.5/site-packages/pymc3/tuning/starting.py", line 133, in find_MAP
specific_errors)
ValueError: Optimization error: max, logp or dlogp at max have non-finite values. Some values may be outside of distribution support. max: {'thresh_stickbreaking_': array([-1.04298465, -0.48661088, -0.84326554, -0.44833646]), 'sigma_interval_': array(-2.220446049250313e-16), 'mu': array(7.68422528308479)} logp: array(-3506.530143064723) dlogp: array([ 1.61013190e-06, nan, -6.73994118e-06,
-6.93873894e-06, 6.03358122e-06, 3.18954680e-06])Check that 1) you don't have hierarchical parameters, these will lead to points with infinite density. 2) your distribution logp's are properly specified. Specific issues:
My questions:
How can I determine why dlogp is nan at certain points?
Is there a different way that I can express this model to avoid dlogp being nan?
Also worth noting:
This model runs fine if I don't find_MAP and use a Metropolis sampler. However, I'd like to have the flexibility of using other samplers as this model becomes more complex.
I have a suspicion that the issue is due to the relationship between the thresholds and the normal distribution, but I don't know how to disentangle them for the optimization.
Regarding question 2: I expressed the model for the ordinal predicted variable (single group) differently; I used the Theano #as_op decorator for a function that calculates probabilities for the outcomes. That also explains why I cannot use find_MAP() or gradient based samplers: Theano cannot calculate a gradient for the custom function. (http://pymc-devs.github.io/pymc3/notebooks/getting_started.html#Arbitrary-deterministics)
# Number of outcomes
nYlevels = df.Y.cat.categories.size
thresh = [k + .5 for k in range(1, nYlevels)]
thresh_obs = np.ma.asarray(thresh)
thresh_obs[1:-1] = np.ma.masked
#as_op(itypes=[tt.dvector, tt.dscalar, tt.dscalar], otypes=[tt.dvector])
def outcome_probabilities(theta, mu, sigma):
out = np.empty(nYlevels)
n = norm(loc=mu, scale=sigma)
out[0] = n.cdf(theta[0])
out[1] = np.max([0, n.cdf(theta[1]) - n.cdf(theta[0])])
out[2] = np.max([0, n.cdf(theta[2]) - n.cdf(theta[1])])
out[3] = np.max([0, n.cdf(theta[3]) - n.cdf(theta[2])])
out[4] = np.max([0, n.cdf(theta[4]) - n.cdf(theta[3])])
out[5] = np.max([0, n.cdf(theta[5]) - n.cdf(theta[4])])
out[6] = 1 - n.cdf(theta[5])
return out
with pm.Model() as ordinal_model_single:
theta = pm.Normal('theta', mu=thresh, tau=np.repeat(.5**2, len(thresh)),
shape=len(thresh), observed=thresh_obs, testval=thresh[1:-1])
mu = pm.Normal('mu', mu=nYlevels/2.0, tau=1.0/(nYlevels**2))
sigma = pm.Uniform('sigma', nYlevels/1000.0, nYlevels*10.0)
pr = outcome_probabilities(theta, mu, sigma)
y = pm.Categorical('y', pr, observed=df.Y.cat.codes.as_matrix())
http://nbviewer.jupyter.org/github/JWarmenhoven/DBDA-python/blob/master/Notebooks/Chapter%2023.ipynb