Sequential sampling from conditional multivariate normal - gaussian-process

I'm trying to sequentially sample from a Gaussian Process prior.
The problem is that the samples eventually converge to zero or diverge to infinity.
I'm using the basic conditionals described e.g. here
Note: the kernel(X,X) function returns the squared exponential kernel with isometric noise.
Here is my code:
n = 32
x_grid = np.linspace(-5,5,n)
x_all = []
y_all = []
for x in x_grid:
x_all = [x] + x_all
X = np.array(x_all).reshape(-1, 1)
# Mean and covariance of the prior
mu = np.zeros((X.shape), np.float)
cov = kernel(X, X)
if len(mu)==1: # first sample is not conditional
y = np.random.randn()*cov + mu
else:
# condition on all previous samples
u1 = mu[0]
u2 = mu[1:]
y2 = np.atleast_2d(np.array(y_all)).T
C11 = cov[:1,:1] # dependent sample
C12 = np.atleast_2d(cov[0,1:])
C21 = np.atleast_2d(cov[1:,0]).T
C22 = np.atleast_2d(cov[1:, 1:])
C22_ = la.inv(C22)
u = u1 + np.dot(C12, np.dot(C22_, (y2 - u2)))
C22_xC21 = np.dot(C22_, C21)
C_minus = np.dot(C12, C22_xC21) # this weirdly becomes larger than C!
C = C11 - C_minus
y = u + np.random.randn()*C
y_all = [y.flatten()[0]] + y_all
Here's an example with 32 samples, where it collapses:
enter image description here
Here's an example with 34 samples, where it explodes:
enter image description here
(for this particular kernel, 34 is the number of samples at which (or more) the samples start to diverge.

Related

Need to plot multiple values over each number of iterations (python help)

I'm trying to plot the multiple values one gets for 'f_12' over a certain number of iterations. It should look something like points with high oscillations when there is low iterations 'N' and then it converges to a rough value of 0.204. I'm getting the correct outputs for 'f_12' but I'm having a really hard time doing the plots. New to python here.
start = time.time()
# looking for F_12 via monte carlo method
# Inputs
# N = number of rays to generate
N = 1000
# w = width of plates
w = 1
# h = vertical seperation of plates
# L = horizontal offset of plates (L=w=h)
L = 1
h = 1
p_points = 100
# counter for number of rays and number of hits
rays = 0
hits = 0
while rays < N:
rays = rays + 1
# random origin of rays along w on surface 1
Rx = random.uniform(0, 1)
Rt = random.uniform(0, 1)
Rph = random.uniform(0, 1)
x1 = Rx * w
# polar and azimuth angles - random ray directions
theta = np.arcsin(np.sqrt(Rt))
phi = 2*np.pi*Rph
# theta = np.arcsin(Rt)
xi = x1 + h*np.tan(theta)*np.cos(phi)
if xi >= L and xi <= (L+w):
hit = 1
else:
hit = 0
hits = hits + hit
gap = N/ p_points
r = rays%gap
if r == 0:
F = hits/ rays
plt.figure(figsize=(8, 4))
plt.plot(N, F, linewidth=2)
plt.xlabel("N - Rays")
plt.ylabel("F_12")
plt.show()
f_12 = hits/ N
print(f"F_12 = {f_12} at N = {N} iterations")
# Grab Currrent Time After Running the Code
end = time.time()
#Subtract Start Time from The End Time
total_time = end - start
f_time = round(total_time)
print(f"Running time = {f_time} seconds")

How to implement a weighted product of likelihoods for multiple random variables in pyMC3?

I need to build a super-likelihood function from several random variables. The distribution of each variable is standard.
Using two random variables as an example, the target super-likelihood is like this:
S = F1^w1 * F2^w2 (s.t. w1 + w2 = 1)
or equivalently,
logS = w1 logF1 + w2 log F1 (s.t. w1 + w2 = 1).
Where F1 ~ Normal distribution and F2 ~ Bernoulli distribution
I use the following codes
data = <load my data>
[w1,w2] = [0.5,0.5]
with Model() as model:
mu = pm.Uniform('mu',lower=0,upper=1)
sd = pm.Uniform('sd',lower=0,upper=1)
p = pm.Uniform('p',lower=0,upper=1)
F1 = pm.Normal("F1", mu = mu, sigma = sd)
F1 = pm.Bernoulli("F2",p)
S = pm.Deterministic('S',F1**w1*F2**w2, observed=data)
step = Metropolis()
trace = pm.sample(2000, step=step)
But it does not work.
Please help to implement such a weighted likelihood model in pyMC3.
Seems like you are looking for the Mixture function: https://docs.pymc.io/api/distributions/mixture.html#pymc3.distributions.mixture.Mixture

Probabilistic Record Linkage in Pandas

I have two dataframes (X & Y). I would like to link them together and to predict the probability that each potential match is correct.
X = pd.DataFrame({'A': ["One", "Two", "Three"]})
Y = pd.DataFrame({'A': ["One", "To", "Free"]})
Method A
I have not yet fully understood the theory but there is an approach presented in:
Sayers, A., Ben-Shlomo, Y., Blom, A.W. and Steele, F., 2015. Probabilistic record linkage. International journal of epidemiology, 45(3), pp.954-964.
Here is my attempt to implementat it in Pandas:
# Probability that Matches are True Matches
m = 0.95
# Probability that non-Matches are True non-Matches
u = min(len(X), len(Y)) / (len(X) * len(Y))
# Priors
M_Pr = u
U_Pr = 1 - M_Pr
O_Pr = M_Pr / U_Pr # Prior odds of a match
# Combine the dataframes
X['key'] = 1
Y['key'] = 1
Z = pd.merge(X, Y, on='key')
Z = Z.drop('key',axis=1)
X = X.drop('key',axis=1)
Y = Y.drop('key',axis=1)
# Levenshtein distance
def Levenshtein_distance(s1, s2):
if len(s1) > len(s2):
s1, s2 = s2, s1
distances = range(len(s1) + 1)
for i2, c2 in enumerate(s2):
distances_ = [i2+1]
for i1, c1 in enumerate(s1):
if c1 == c2:
distances_.append(distances[i1])
else:
distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
distances = distances_
return distances[-1]
L_D = np.vectorize(Levenshtein_distance, otypes=[float])
Z["D"] = L_D(Z['A_x'], Z['A_y'])
# Max string length
def Max_string_length(X, Y):
return max(len(X), len(Y))
M_L = np.vectorize(Max_string_length, otypes=[float])
Z["L"] = M_L(Z['A_x'], Z['A_y'])
# Agreement weight
def Agreement_weight(D, L):
return 1 - ( D / L )
A_W = np.vectorize(Agreement_weight, otypes=[float])
Z["C"] = A_W(Z['D'], Z['L'])
# Likelihood ratio
def Likelihood_ratio(C):
return (m/u) - ((m/u) - ((1-m) / (1-u))) * (1-C)
L_R = np.vectorize(Likelihood_ratio, otypes=[float])
Z["G"] = L_R(Z['C'])
# Match weight
def Match_weight(G):
return math.log(G) * math.log(2)
M_W = np.vectorize(Match_weight, otypes=[float])
Z["R"] = M_W(Z['G'])
# Posterior odds
def Posterior_odds(R):
return math.exp( R / math.log(2)) * O_Pr
P_O = np.vectorize(Posterior_odds, otypes=[float])
Z["O"] = P_O(Z['R'])
# Probability
def Probability(O):
return O / (1 + O)
Pro = np.vectorize(Probability, otypes=[float])
Z["P"] = Pro(Z['O'])
I have verified that this gives the same results as in the paper. Here is a sensitivity check on m, showing that it doesn't make a lot of difference:
Method B
These assumptions won't apply to all applications but in some cases each row of X should match a row of Y. In that case:
The probabilities should sum to 1
If there are many credible candidates to match to then that should reduce the probability of getting the right one
then:
X["I"] = X.index
# Combine the dataframes
X['key'] = 1
Y['key'] = 1
Z = pd.merge(X, Y, on='key')
Z = Z.drop('key',axis=1)
X = X.drop('key',axis=1)
Y = Y.drop('key',axis=1)
# Levenshtein distance
def Levenshtein_distance(s1, s2):
if len(s1) > len(s2):
s1, s2 = s2, s1
distances = range(len(s1) + 1)
for i2, c2 in enumerate(s2):
distances_ = [i2+1]
for i1, c1 in enumerate(s1):
if c1 == c2:
distances_.append(distances[i1])
else:
distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
distances = distances_
return distances[-1]
L_D = np.vectorize(Levenshtein_distance, otypes=[float])
Z["D"] = L_D(Z['A_x'], Z['A_y'])
# Max string length
def Max_string_length(X, Y):
return max(len(X), len(Y))
M_L = np.vectorize(Max_string_length, otypes=[float])
Z["L"] = M_L(Z['A_x'], Z['A_y'])
# Agreement weight
def Agreement_weight(D, L):
return 1 - ( D / L )
A_W = np.vectorize(Agreement_weight, otypes=[float])
Z["C"] = A_W(Z['D'], Z['L'])
# Normalised Agreement Weight
T = Z .groupby('I') .agg({'C' : sum})
D = pd.DataFrame(T)
D.columns = ['T']
J = Z.set_index('I').join(D)
J['P1'] = J['C'] / J['T']
Comparing it against Method A:
Method C
This combines method A with method B:
# Normalised Probability
U = Z .groupby('I') .agg({'P' : sum})
E = pd.DataFrame(U)
E.columns = ['U']
K = Z.set_index('I').join(E)
K['P1'] = J['P1']
K['P2'] = K['P'] / K['U']
We can see that method B (P1) doesn't take account of uncertainty whereas method C (P2) does.

How to avoid dying weights/gradients in custom LSTM cell in tensorflow. What shall be ideal loss function?

I am trying to train a name generation LSTM network. I am not using pre-defined tensorflow cells (like tf.contrib.rnn.BasicLSTMCell, etc). I have created LSTM cell myself. But the error is not reducing beyond a limit. It only decreases 30% from what it is initially (when random weights were used in forward propagation) and then it starts increasing. Also, the gradients and weights become very small after few thousand training steps.
I think the reason for non-convergence can be one of two:
1. The design of tensorflow graph i have created OR
2. The loss function i used.
I am feeding one hot vectors of each character of the word for each time-step of the network. The code i have used for graph generation and loss function is as follows. Tx is the number of time steps in RNN, n_x,n_a,n_y are length of the input vectors, LSTM cell vector and output vector respectively.
Will be great if someone can help me in identifying what i am doing wrong here.
n_x = vocab_size
n_y = vocab_size
n_a = 100
Tx = 50
Ty = Tx
with open("trainingnames_file.txt") as f:
examples = f.readlines()
examples = [x.lower().strip() for x in examples]
X0 = [[char_to_ix[x1] for x1 in list(x)] for x in examples]
X1 = np.array([np.concatenate([np.array(x), np.zeros([Tx-len(x)])]) for x in X0], dtype=np.int32).T
Y0 = [(x[1:] + [char_to_ix["\n"]]) for x in X0]
Y1 = np.array([np.concatenate([np.array(y), np.zeros([Ty-len(y)])]) for y in Y0], dtype=np.int32).T
m = len(X0)
Wf = tf.get_variable(name="Wf", shape = [n_a,(n_a+n_x)])
Wu = tf.get_variable(name="Wu", shape = [n_a,(n_a+n_x)])
Wc = tf.get_variable(name="Wc", shape = [n_a,(n_a+n_x)])
Wo = tf.get_variable(name="Wo", shape = [n_a,(n_a+n_x)])
Wy = tf.get_variable(name="Wy", shape = [n_y,n_a])
bf = tf.get_variable(name="bf", shape = [n_a,1])
bu = tf.get_variable(name="bu", shape = [n_a,1])
bc = tf.get_variable(name="bc", shape = [n_a,1])
bo = tf.get_variable(name="bo", shape = [n_a,1])
by = tf.get_variable(name="by", shape = [n_y,1])
X_input = tf.placeholder(dtype = tf.int32, shape = [Tx,None])
Y_input = tf.placeholder(dtype = tf.int32, shape = [Ty,None])
X = tf.one_hot(X_input, axis = 0, depth = n_x)
Y = tf.one_hot(Y_input, axis = 0, depth = n_y)
X.shape
a_prev = tf.zeros(shape = [n_a,m])
c_prev = tf.zeros(shape = [n_a,m])
a_all = []
c_all = []
for i in range(Tx):
ac = tf.concat([a_prev,tf.squeeze(tf.slice(input_=X,begin=[0,i,0],size=[n_x,1,m]))], axis=0)
ct = tf.tanh(tf.matmul(Wc,ac) + bc)
tug = tf.sigmoid(tf.matmul(Wu,ac) + bu)
tfg = tf.sigmoid(tf.matmul(Wf,ac) + bf)
tog = tf.sigmoid(tf.matmul(Wo,ac) + bo)
c = tf.multiply(tug,ct) + tf.multiply(tfg,c_prev)
a = tf.multiply(tog,tf.tanh(c))
y = tf.nn.softmax(tf.matmul(Wy,a) + by, axis = 0)
a_all.append(a)
c_all.append(c)
a_prev = a
c_prev = c
y_ex = tf.expand_dims(y,axis=1)
if i == 0:
y_all = y_ex
else:
y_all = tf.concat([y_all,y_ex], axis=1)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y,logits=y_all,dim=0))
opt = tf.train.AdamOptimizer()
train = opt.minimize(loss)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
o = sess.run(loss, feed_dict = {X_input:X1,Y_input:Y1})
print(o.shape)
print(o)
sess.run(train, feed_dict = {X_input:X1,Y_input:Y1})
o = sess.run(loss, feed_dict = {X_input:X1,Y_input:Y1})
print(o)

Explain np.polyfit and np.polyval for a scatter plot

I have to make a scatter plot and liner fit to my data. prediction_08.Dem_Adv and prediction_08.Dem_Win are two column of datas. I know that np.polyfit returns coefficients. But what is np.polyval doing here? I saw the documentation, but the explanation is confusing. can some one explain to me clearly
plt.plot(prediction_08.Dem_Adv, prediction_08.Dem_Win, 'o')
plt.xlabel("2008 Gallup Democrat Advantage")
plt.ylabel("2008 Election Democrat Win")
fit = np.polyfit(prediction_08.Dem_Adv, prediction_08.Dem_Win, 1)
x = np.linspace(-40, 80, 10)
y = np.polyval(fit, x)
plt.plot(x, y)
print fit
np.polyval is applying the polynomial function which you got using polyfit. If you get y = mx+ c relationship. The np.polyval function will multiply your x values with fit[0] and add fit[1]
Polyval according to Docs:
N = len(p)
y = p[0]*x**(N-1) + p[1]*x**(N-2) + ... + p[N-2]*x + p[N-1]
If the relationship is y = ax**2 + bx + c,
fit = np.polyfit(x,y,2)
a = fit[0]
b = fit[1]
c = fit[2]
If you do not want to use the polyval function:
y = a*(x**2) + b*(x) + c
This will create the same output as polyval.