How to use defined function in functional API keras - tensorflow

I'm super confusing with lambda layer during functional API
please tell me which is correct
For example
def magic(x):
x2=x+x
return x2
D = Input((n,))
E = Lambda(magic)(D)
d = Model(~~~~)
or
D = Input((n,))
E = magic(D)
d = Model(~~~~)
and another case
def magic():
x2=np.randn(3,1)
return x2
D = Input((n,))
E = Lambda(magic)
d = Model(~~~~)
or
D = Input((n,))
E = magic()
d = Model(~~~~)
please save me !!

You can use the first way if you want to wrap your custom operation as a Layer like follows
def magic(x):
x2 = x + x
return x2
inp = tf.keras.Input(shape=(2,))
x = tf.keras.layers.Dense(4, name="id_1")(inp)
# Ways to Define Custom Layer
x = tf.keras.layers.Lambda(lambda val: magic(val))(x)
# or
# x = tf.keras.layers.Lambda(magic)(x)
# or
# x = tf.keras.layers.Lambda(lambda val: (val + val))(x)
x = tf.keras.layers.Dense(4, name="id_2")(x)
model = tf.keras.Model(inputs=inp, outputs=x, name="inner_model")
print(model.summary())

Related

Probabilistic Record Linkage in Pandas

I have two dataframes (X & Y). I would like to link them together and to predict the probability that each potential match is correct.
X = pd.DataFrame({'A': ["One", "Two", "Three"]})
Y = pd.DataFrame({'A': ["One", "To", "Free"]})
Method A
I have not yet fully understood the theory but there is an approach presented in:
Sayers, A., Ben-Shlomo, Y., Blom, A.W. and Steele, F., 2015. Probabilistic record linkage. International journal of epidemiology, 45(3), pp.954-964.
Here is my attempt to implementat it in Pandas:
# Probability that Matches are True Matches
m = 0.95
# Probability that non-Matches are True non-Matches
u = min(len(X), len(Y)) / (len(X) * len(Y))
# Priors
M_Pr = u
U_Pr = 1 - M_Pr
O_Pr = M_Pr / U_Pr # Prior odds of a match
# Combine the dataframes
X['key'] = 1
Y['key'] = 1
Z = pd.merge(X, Y, on='key')
Z = Z.drop('key',axis=1)
X = X.drop('key',axis=1)
Y = Y.drop('key',axis=1)
# Levenshtein distance
def Levenshtein_distance(s1, s2):
if len(s1) > len(s2):
s1, s2 = s2, s1
distances = range(len(s1) + 1)
for i2, c2 in enumerate(s2):
distances_ = [i2+1]
for i1, c1 in enumerate(s1):
if c1 == c2:
distances_.append(distances[i1])
else:
distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
distances = distances_
return distances[-1]
L_D = np.vectorize(Levenshtein_distance, otypes=[float])
Z["D"] = L_D(Z['A_x'], Z['A_y'])
# Max string length
def Max_string_length(X, Y):
return max(len(X), len(Y))
M_L = np.vectorize(Max_string_length, otypes=[float])
Z["L"] = M_L(Z['A_x'], Z['A_y'])
# Agreement weight
def Agreement_weight(D, L):
return 1 - ( D / L )
A_W = np.vectorize(Agreement_weight, otypes=[float])
Z["C"] = A_W(Z['D'], Z['L'])
# Likelihood ratio
def Likelihood_ratio(C):
return (m/u) - ((m/u) - ((1-m) / (1-u))) * (1-C)
L_R = np.vectorize(Likelihood_ratio, otypes=[float])
Z["G"] = L_R(Z['C'])
# Match weight
def Match_weight(G):
return math.log(G) * math.log(2)
M_W = np.vectorize(Match_weight, otypes=[float])
Z["R"] = M_W(Z['G'])
# Posterior odds
def Posterior_odds(R):
return math.exp( R / math.log(2)) * O_Pr
P_O = np.vectorize(Posterior_odds, otypes=[float])
Z["O"] = P_O(Z['R'])
# Probability
def Probability(O):
return O / (1 + O)
Pro = np.vectorize(Probability, otypes=[float])
Z["P"] = Pro(Z['O'])
I have verified that this gives the same results as in the paper. Here is a sensitivity check on m, showing that it doesn't make a lot of difference:
Method B
These assumptions won't apply to all applications but in some cases each row of X should match a row of Y. In that case:
The probabilities should sum to 1
If there are many credible candidates to match to then that should reduce the probability of getting the right one
then:
X["I"] = X.index
# Combine the dataframes
X['key'] = 1
Y['key'] = 1
Z = pd.merge(X, Y, on='key')
Z = Z.drop('key',axis=1)
X = X.drop('key',axis=1)
Y = Y.drop('key',axis=1)
# Levenshtein distance
def Levenshtein_distance(s1, s2):
if len(s1) > len(s2):
s1, s2 = s2, s1
distances = range(len(s1) + 1)
for i2, c2 in enumerate(s2):
distances_ = [i2+1]
for i1, c1 in enumerate(s1):
if c1 == c2:
distances_.append(distances[i1])
else:
distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
distances = distances_
return distances[-1]
L_D = np.vectorize(Levenshtein_distance, otypes=[float])
Z["D"] = L_D(Z['A_x'], Z['A_y'])
# Max string length
def Max_string_length(X, Y):
return max(len(X), len(Y))
M_L = np.vectorize(Max_string_length, otypes=[float])
Z["L"] = M_L(Z['A_x'], Z['A_y'])
# Agreement weight
def Agreement_weight(D, L):
return 1 - ( D / L )
A_W = np.vectorize(Agreement_weight, otypes=[float])
Z["C"] = A_W(Z['D'], Z['L'])
# Normalised Agreement Weight
T = Z .groupby('I') .agg({'C' : sum})
D = pd.DataFrame(T)
D.columns = ['T']
J = Z.set_index('I').join(D)
J['P1'] = J['C'] / J['T']
Comparing it against Method A:
Method C
This combines method A with method B:
# Normalised Probability
U = Z .groupby('I') .agg({'P' : sum})
E = pd.DataFrame(U)
E.columns = ['U']
K = Z.set_index('I').join(E)
K['P1'] = J['P1']
K['P2'] = K['P'] / K['U']
We can see that method B (P1) doesn't take account of uncertainty whereas method C (P2) does.

Tensorflow: How to update only single variable at a time out of many variables based on conditions

k1 = tf.Variable(10.0)
k2 = tf.Variable(10.0)
pred = tf.pow(B, ?) / C
cost = tf.pow(pred_s1 - Y, 2)
optimizer = tf.train.AdamOptimizer(0.01).minimize(cost)
sess.run(optimizer, feed_dict{A:a, B:b, C:c})
Update:
pred = tf.pow(B, k1) / C if A == 0
pred = tf.pow(B, k2) / C if A == 1
Single prediction function which updates only one variable based on the value fed into placeholder 'A'
s1 = tf.Variable(tf.random_normal([1]))
s2 = tf.Variable(tf.random_normal([1]))
s3 = tf.Variable(tf.random_normal([1]))
s4 = tf.Variable(tf.random_normal([1]))
s5 = tf.Variable(tf.random_normal([1]))
D = tf.placeholder("float")
s2_s = tf.where(tf.logical_and(1.9<D,D<2.1),x=s2,y=s1)
s3_s = tf.where(tf.logical_and(2.9<D,D<3.1),x=s3,y=s2_s)
s4_s = tf.where(tf.logical_and(3.9<D,D<4.1),x=s4,y=s3_s)
s5_s = tf.where(tf.logical_and(4.9<D,D<5.1),x=s5,y=s4_s)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print(sess.run([s1])[0], sess.run([s2])[0], sess.run([s3])[0], sess.run([s4])[0], sess.run([s5])[0])
print(sess.run(s5_s, feed_dict={D:5}))
sess.close()
Just use
pred = tf.pow(B, A*k2 + (1-A)* k1) / C
Which gives the switch. An alternative would be tf.where.

How to avoid dying weights/gradients in custom LSTM cell in tensorflow. What shall be ideal loss function?

I am trying to train a name generation LSTM network. I am not using pre-defined tensorflow cells (like tf.contrib.rnn.BasicLSTMCell, etc). I have created LSTM cell myself. But the error is not reducing beyond a limit. It only decreases 30% from what it is initially (when random weights were used in forward propagation) and then it starts increasing. Also, the gradients and weights become very small after few thousand training steps.
I think the reason for non-convergence can be one of two:
1. The design of tensorflow graph i have created OR
2. The loss function i used.
I am feeding one hot vectors of each character of the word for each time-step of the network. The code i have used for graph generation and loss function is as follows. Tx is the number of time steps in RNN, n_x,n_a,n_y are length of the input vectors, LSTM cell vector and output vector respectively.
Will be great if someone can help me in identifying what i am doing wrong here.
n_x = vocab_size
n_y = vocab_size
n_a = 100
Tx = 50
Ty = Tx
with open("trainingnames_file.txt") as f:
examples = f.readlines()
examples = [x.lower().strip() for x in examples]
X0 = [[char_to_ix[x1] for x1 in list(x)] for x in examples]
X1 = np.array([np.concatenate([np.array(x), np.zeros([Tx-len(x)])]) for x in X0], dtype=np.int32).T
Y0 = [(x[1:] + [char_to_ix["\n"]]) for x in X0]
Y1 = np.array([np.concatenate([np.array(y), np.zeros([Ty-len(y)])]) for y in Y0], dtype=np.int32).T
m = len(X0)
Wf = tf.get_variable(name="Wf", shape = [n_a,(n_a+n_x)])
Wu = tf.get_variable(name="Wu", shape = [n_a,(n_a+n_x)])
Wc = tf.get_variable(name="Wc", shape = [n_a,(n_a+n_x)])
Wo = tf.get_variable(name="Wo", shape = [n_a,(n_a+n_x)])
Wy = tf.get_variable(name="Wy", shape = [n_y,n_a])
bf = tf.get_variable(name="bf", shape = [n_a,1])
bu = tf.get_variable(name="bu", shape = [n_a,1])
bc = tf.get_variable(name="bc", shape = [n_a,1])
bo = tf.get_variable(name="bo", shape = [n_a,1])
by = tf.get_variable(name="by", shape = [n_y,1])
X_input = tf.placeholder(dtype = tf.int32, shape = [Tx,None])
Y_input = tf.placeholder(dtype = tf.int32, shape = [Ty,None])
X = tf.one_hot(X_input, axis = 0, depth = n_x)
Y = tf.one_hot(Y_input, axis = 0, depth = n_y)
X.shape
a_prev = tf.zeros(shape = [n_a,m])
c_prev = tf.zeros(shape = [n_a,m])
a_all = []
c_all = []
for i in range(Tx):
ac = tf.concat([a_prev,tf.squeeze(tf.slice(input_=X,begin=[0,i,0],size=[n_x,1,m]))], axis=0)
ct = tf.tanh(tf.matmul(Wc,ac) + bc)
tug = tf.sigmoid(tf.matmul(Wu,ac) + bu)
tfg = tf.sigmoid(tf.matmul(Wf,ac) + bf)
tog = tf.sigmoid(tf.matmul(Wo,ac) + bo)
c = tf.multiply(tug,ct) + tf.multiply(tfg,c_prev)
a = tf.multiply(tog,tf.tanh(c))
y = tf.nn.softmax(tf.matmul(Wy,a) + by, axis = 0)
a_all.append(a)
c_all.append(c)
a_prev = a
c_prev = c
y_ex = tf.expand_dims(y,axis=1)
if i == 0:
y_all = y_ex
else:
y_all = tf.concat([y_all,y_ex], axis=1)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y,logits=y_all,dim=0))
opt = tf.train.AdamOptimizer()
train = opt.minimize(loss)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
o = sess.run(loss, feed_dict = {X_input:X1,Y_input:Y1})
print(o.shape)
print(o)
sess.run(train, feed_dict = {X_input:X1,Y_input:Y1})
o = sess.run(loss, feed_dict = {X_input:X1,Y_input:Y1})
print(o)

How to deal with many columns in Tensorflow

I am studying Tensorflow, and I have a question.
Original code is that
Columns = ['size' , 'room', 'price']
x1 = tf.Variable(np.array(columns['size']).astype(np.float32))
x2 = tf.Variable(np.array(columns['room']).astype(np.float32))
y = tf.Variable(np.array(columns['price']).astype(np.float32))enter code here
train_X1 = np.asarray([i[1] for i in data.loc[:,['size']].to_records()],dtype="float")
train_X2 = np.asarray([i[1] for i in data.loc[:,['room']].to_records()],dtype="float")
train_X = np.asarray([i[1] for i in data.loc[:,'size':'room'].to_records()],dtype="float")
train_Y = np.asarray([i[1] for i in data.loc[:,['price']].to_records()],dtype="float")
n_samples = train_X.shape[0]
X1 = tf.placeholder("float")
X2 = tf.placeholder("float")
Y = tf.placeholder("float")
W1 = tf.Variable(rng.randn(), name="weight1")
W2 = tf.Variable(rng.randn(), name="weight2")
b = tf.Variable(rng.randn(), name="bias")
sum_list = [tf.multiply(X1,W1),tf.multiply(X2,W2)]
pred_X = tf.add_n(sum_list)
pred = tf.add(pred_X,b)
cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
If I have many columns like that
Columns = ['price','lotsize','bedrooms','bathrms', 'stories', 'garagepl', 'driveway', 'recroom', \
'fullbase', 'gashw', 'airco', 'prefarea']
How do i deal with many columns in Tensorflow?
(Independent variable = 'price', dependent variable = else)
Do I have to make each train_set and W with columns?

NameError when running GMRes following FEniCS discretisation

I've discretised a diffusion equation with FEniCS as follows:
def DiscretiseEquation(h):
mesh = UnitSquareMesh(h, h)
V = FunctionSpace(mesh, 'Lagrange', 1)
def on_boundary(x, on_boundary):
return on_boundary
bc_value = Constant(0.0)
boundary_condition = DirichletBC(V, bc_value, on_boundary)
class RandomDiffusionField(Expression):
def __init__(self, m, n, element):
self._rand_field = np.exp(-np.random.randn(m, n))
self._m = m
self._n = n
self._ufl_element = element
def eval(self, value, x):
x_index = np.int(np.floor(self._m * x[0]))
y_index = np.int(np.floor(self._n * x[1]))
i = min(x_index, self._m - 1)
j = min(y_index, self._n - 1)
value[0] = self._rand_field[i, j]
def value_shape(self):
return(1, )
class RandomRhs(Expression):
def __init__(self, m, n, element):
self._rand_field = np.random.randn(m, n)
self._m = m
self._n = n
self._ufl_element = element
def eval(self, value, x):
x_index = np.int(np.floor(self._m * x[0]))
y_index = np.int(np.floor(self._n * x[1]))
i = min(x_index, self._m - 1)
j = min(y_index, self._n - 1)
value[0] = self._rand_field[i, j]
def value_shape(self):
return (1, )
u = TrialFunction(V)
v = TestFunction(V)
random_field = RandomDiffusionField(100, 100, element=V.ufl_element())
zero = Expression("0", element=V.ufl_element())
one = Expression("1", element=V.ufl_element())
diffusion = as_matrix(((random_field, zero), (zero, one)))
a = inner(diffusion * grad(u), grad(v)) * dx
L = RandomRhs(h, h, element=V.ufl_element()) * v * dx
A = assemble(a)
b = assemble(L)
boundary_condition.apply(A, b)
A = as_backend_type(A).mat()
(indptr, indices, data) = A.getValuesCSR()
mat = csr_matrix((data, indices, indptr), shape=A.size)
rhs = b.array()
#Solving
x = spsolve(mat, rhs)
#Conversion to a FEniCS function
u = Function(V)
u.vector()[:] = x
I am running the GMRES solver as normal. The callback argument is a separate iteration counter I've defined.
DiscretiseEquation(100)
A = mat
b = rhs
x, info = gmres(A, b, callback = IterCount())
The routine returns a NameError, stating that 'mat' is not defined:
NameError Traceback (most recent call last)
<ipython-input-18-e096b2eea097> in <module>()
1 DiscretiseEquation(200)
----> 2 A = mat
3 b = rhs
4 x_200, info_200 = gmres(A, b, callback = IterCount())
5 gmres_res = closure_variables["residuals"]
NameError: name 'mat' is not defined
As far as I'm aware, it should be defined when I call the DiscretiseEquation function?