How to deal with many columns in Tensorflow - tensorflow

I am studying Tensorflow, and I have a question.
Original code is that
Columns = ['size' , 'room', 'price']
x1 = tf.Variable(np.array(columns['size']).astype(np.float32))
x2 = tf.Variable(np.array(columns['room']).astype(np.float32))
y = tf.Variable(np.array(columns['price']).astype(np.float32))enter code here
train_X1 = np.asarray([i[1] for i in data.loc[:,['size']].to_records()],dtype="float")
train_X2 = np.asarray([i[1] for i in data.loc[:,['room']].to_records()],dtype="float")
train_X = np.asarray([i[1] for i in data.loc[:,'size':'room'].to_records()],dtype="float")
train_Y = np.asarray([i[1] for i in data.loc[:,['price']].to_records()],dtype="float")
n_samples = train_X.shape[0]
X1 = tf.placeholder("float")
X2 = tf.placeholder("float")
Y = tf.placeholder("float")
W1 = tf.Variable(rng.randn(), name="weight1")
W2 = tf.Variable(rng.randn(), name="weight2")
b = tf.Variable(rng.randn(), name="bias")
sum_list = [tf.multiply(X1,W1),tf.multiply(X2,W2)]
pred_X = tf.add_n(sum_list)
pred = tf.add(pred_X,b)
cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
If I have many columns like that
Columns = ['price','lotsize','bedrooms','bathrms', 'stories', 'garagepl', 'driveway', 'recroom', \
'fullbase', 'gashw', 'airco', 'prefarea']
How do i deal with many columns in Tensorflow?
(Independent variable = 'price', dependent variable = else)
Do I have to make each train_set and W with columns?

Related

Pytorch equivalent of TensorFlow

I am trying to follow up on this code in Pytorch. I have been trying for days but reading tensor flow docs then PyTorch docs has made me totally confused.
input_data = Input(shape=(256, 64, 1), name=‘input’)
inner = Conv2D(32, (3, 3), padding=‘same’, name=‘conv1’, kernel_initializer=‘he_normal’)(input_data)
inner = BatchNormalization()(inner)
inner = Activation(‘relu’)(inner)
inner = MaxPooling2D(pool_size=(2, 2), name=‘max1’)(inner)
inner = Conv2D(64, (3, 3), padding=‘same’, name=‘conv2’, kernel_initializer=‘he_normal’)(inner)
inner = BatchNormalization()(inner)
inner = Activation(‘relu’)(inner)
inner = MaxPooling2D(pool_size=(2, 2), name=‘max2’)(inner)
inner = Dropout(0.3)(inner)
inner = Conv2D(128, (3, 3), padding=‘same’, name=‘conv3’, kernel_initializer=‘he_normal’)(inner)
inner = BatchNormalization()(inner)
inner = Activation(‘relu’)(inner)
inner = MaxPooling2D(pool_size=(1, 2), name=‘max3’)(inner)
inner = Dropout(0.3)(inner)
CNN to RNN
inner = Reshape(target_shape=((64, 1024)), name=‘reshape’)(inner)
inner = Dense(64, activation=‘relu’, kernel_initializer=‘he_normal’, name=‘dense1’)(inner)
RNN
inner = Bidirectional(LSTM(256, return_sequences=True), name = ‘lstm1’)(inner)
inner = Bidirectional(LSTM(256, return_sequences=True), name = ‘lstm2’)(inner)
OUTPUT
inner = Dense(num_of_characters, kernel_initializer=‘he_normal’,name=‘dense2’)(inner)
y_pred = Activation(‘softmax’, name=‘softmax’)(inner)
model = Model(inputs=input_data, outputs=y_pred)
I tried following up it in Pytorch, bit by bit
class Net(nn.Module):
def init(self):
super(Net,self).init()
self.input_data = input_size
self.conv1 = nn.Conv2d(32,3,3)
self.conv2 = nn.Conv2d(64,3,3)
self.conv3 = nn.Conv2d(128,3,3)
self.dropout = nn.Dropout(0.3)
self.maxp = torch.nn.MaxPool2d((2,2))
#CNN to RNN
self.linear1 = nn.Linear(256*62*62,64)
#RNN
self.lstm = torch.nn.LSTM(256, 10,bidirectional = True)
#output
self.linear2 = nn.Linear(64,num_of_chars)
def forward(self,x,input_size):
x = self.conv1(input_size)
x = nn.BatchNorm2d(x)
x = F.relu(x)
x = self.maxp(x)
x = self.conv2(x)
x = nn.BatchNorm2d(x)
x = F.relu(x)
x = self.maxp(x)
x = self.dropout(x)
x = self.conv3(x)
x = nn.BatchNorm2d(x)
x = F.relu(x)
x = self.maxp(x)
x = self.dropout(x)
x = x.view((64,1024))
x = self.linear1(x)
x = self.lstm(x)
x = self.lstm(x)
x = self.linear2(x)
x = nn.Softmax(x,dim=1)
return x
But the model summary is not at all same. I am very confused by the parameters. Any help would be appreciated. Tell me if you need anything. Thanks

How to use defined function in functional API keras

I'm super confusing with lambda layer during functional API
please tell me which is correct
For example
def magic(x):
x2=x+x
return x2
D = Input((n,))
E = Lambda(magic)(D)
d = Model(~~~~)
or
D = Input((n,))
E = magic(D)
d = Model(~~~~)
and another case
def magic():
x2=np.randn(3,1)
return x2
D = Input((n,))
E = Lambda(magic)
d = Model(~~~~)
or
D = Input((n,))
E = magic()
d = Model(~~~~)
please save me !!
You can use the first way if you want to wrap your custom operation as a Layer like follows
def magic(x):
x2 = x + x
return x2
inp = tf.keras.Input(shape=(2,))
x = tf.keras.layers.Dense(4, name="id_1")(inp)
# Ways to Define Custom Layer
x = tf.keras.layers.Lambda(lambda val: magic(val))(x)
# or
# x = tf.keras.layers.Lambda(magic)(x)
# or
# x = tf.keras.layers.Lambda(lambda val: (val + val))(x)
x = tf.keras.layers.Dense(4, name="id_2")(x)
model = tf.keras.Model(inputs=inp, outputs=x, name="inner_model")
print(model.summary())

Tensorflow: How to update only single variable at a time out of many variables based on conditions

k1 = tf.Variable(10.0)
k2 = tf.Variable(10.0)
pred = tf.pow(B, ?) / C
cost = tf.pow(pred_s1 - Y, 2)
optimizer = tf.train.AdamOptimizer(0.01).minimize(cost)
sess.run(optimizer, feed_dict{A:a, B:b, C:c})
Update:
pred = tf.pow(B, k1) / C if A == 0
pred = tf.pow(B, k2) / C if A == 1
Single prediction function which updates only one variable based on the value fed into placeholder 'A'
s1 = tf.Variable(tf.random_normal([1]))
s2 = tf.Variable(tf.random_normal([1]))
s3 = tf.Variable(tf.random_normal([1]))
s4 = tf.Variable(tf.random_normal([1]))
s5 = tf.Variable(tf.random_normal([1]))
D = tf.placeholder("float")
s2_s = tf.where(tf.logical_and(1.9<D,D<2.1),x=s2,y=s1)
s3_s = tf.where(tf.logical_and(2.9<D,D<3.1),x=s3,y=s2_s)
s4_s = tf.where(tf.logical_and(3.9<D,D<4.1),x=s4,y=s3_s)
s5_s = tf.where(tf.logical_and(4.9<D,D<5.1),x=s5,y=s4_s)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print(sess.run([s1])[0], sess.run([s2])[0], sess.run([s3])[0], sess.run([s4])[0], sess.run([s5])[0])
print(sess.run(s5_s, feed_dict={D:5}))
sess.close()
Just use
pred = tf.pow(B, A*k2 + (1-A)* k1) / C
Which gives the switch. An alternative would be tf.where.

How to avoid dying weights/gradients in custom LSTM cell in tensorflow. What shall be ideal loss function?

I am trying to train a name generation LSTM network. I am not using pre-defined tensorflow cells (like tf.contrib.rnn.BasicLSTMCell, etc). I have created LSTM cell myself. But the error is not reducing beyond a limit. It only decreases 30% from what it is initially (when random weights were used in forward propagation) and then it starts increasing. Also, the gradients and weights become very small after few thousand training steps.
I think the reason for non-convergence can be one of two:
1. The design of tensorflow graph i have created OR
2. The loss function i used.
I am feeding one hot vectors of each character of the word for each time-step of the network. The code i have used for graph generation and loss function is as follows. Tx is the number of time steps in RNN, n_x,n_a,n_y are length of the input vectors, LSTM cell vector and output vector respectively.
Will be great if someone can help me in identifying what i am doing wrong here.
n_x = vocab_size
n_y = vocab_size
n_a = 100
Tx = 50
Ty = Tx
with open("trainingnames_file.txt") as f:
examples = f.readlines()
examples = [x.lower().strip() for x in examples]
X0 = [[char_to_ix[x1] for x1 in list(x)] for x in examples]
X1 = np.array([np.concatenate([np.array(x), np.zeros([Tx-len(x)])]) for x in X0], dtype=np.int32).T
Y0 = [(x[1:] + [char_to_ix["\n"]]) for x in X0]
Y1 = np.array([np.concatenate([np.array(y), np.zeros([Ty-len(y)])]) for y in Y0], dtype=np.int32).T
m = len(X0)
Wf = tf.get_variable(name="Wf", shape = [n_a,(n_a+n_x)])
Wu = tf.get_variable(name="Wu", shape = [n_a,(n_a+n_x)])
Wc = tf.get_variable(name="Wc", shape = [n_a,(n_a+n_x)])
Wo = tf.get_variable(name="Wo", shape = [n_a,(n_a+n_x)])
Wy = tf.get_variable(name="Wy", shape = [n_y,n_a])
bf = tf.get_variable(name="bf", shape = [n_a,1])
bu = tf.get_variable(name="bu", shape = [n_a,1])
bc = tf.get_variable(name="bc", shape = [n_a,1])
bo = tf.get_variable(name="bo", shape = [n_a,1])
by = tf.get_variable(name="by", shape = [n_y,1])
X_input = tf.placeholder(dtype = tf.int32, shape = [Tx,None])
Y_input = tf.placeholder(dtype = tf.int32, shape = [Ty,None])
X = tf.one_hot(X_input, axis = 0, depth = n_x)
Y = tf.one_hot(Y_input, axis = 0, depth = n_y)
X.shape
a_prev = tf.zeros(shape = [n_a,m])
c_prev = tf.zeros(shape = [n_a,m])
a_all = []
c_all = []
for i in range(Tx):
ac = tf.concat([a_prev,tf.squeeze(tf.slice(input_=X,begin=[0,i,0],size=[n_x,1,m]))], axis=0)
ct = tf.tanh(tf.matmul(Wc,ac) + bc)
tug = tf.sigmoid(tf.matmul(Wu,ac) + bu)
tfg = tf.sigmoid(tf.matmul(Wf,ac) + bf)
tog = tf.sigmoid(tf.matmul(Wo,ac) + bo)
c = tf.multiply(tug,ct) + tf.multiply(tfg,c_prev)
a = tf.multiply(tog,tf.tanh(c))
y = tf.nn.softmax(tf.matmul(Wy,a) + by, axis = 0)
a_all.append(a)
c_all.append(c)
a_prev = a
c_prev = c
y_ex = tf.expand_dims(y,axis=1)
if i == 0:
y_all = y_ex
else:
y_all = tf.concat([y_all,y_ex], axis=1)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y,logits=y_all,dim=0))
opt = tf.train.AdamOptimizer()
train = opt.minimize(loss)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
o = sess.run(loss, feed_dict = {X_input:X1,Y_input:Y1})
print(o.shape)
print(o)
sess.run(train, feed_dict = {X_input:X1,Y_input:Y1})
o = sess.run(loss, feed_dict = {X_input:X1,Y_input:Y1})
print(o)

MNIST Classification: low accuracy (10%) and no change in loss

I'm learning TensorFlow and tired to apply on mnist database.
My question is (see attached image) :
what could cause such output for accuracy (improving and then degrading!) & Loss (almost constant!)
the accuracy isn't that great just hovering around 10%
Despite:
5 layer network (incl. output layer), with 200/10/60/30/10 neurons respectively
Is the network not learning ? despite 0.1 learning rate (which is quite high I believe)
Full code: https://github.com/vibhorj/tf > mnist-2.py
1) here's how the layers are defined:
K,L,M,N=200,100,60,30
""" Layer 1 """
with tf.name_scope('L1'):
w1 = tf.Variable(initial_value = tf.truncated_normal([28*28,K],mean=0,stddev=0.1), name = 'w1')
b1 = tf.Variable(initial_value = tf.truncated_normal([K],mean=0,stddev=0.1), name = 'b1')
""" Layer 2 """
with tf.name_scope('L2'):
w2 = tf.Variable(initial_value =tf.truncated_normal([K,L],mean=0,stddev=0.1), name = 'w2')
b2 = tf.Variable(initial_value = tf.truncated_normal([L],mean=0,stddev=0.1), name = 'b2')
""" Layer 3 """
with tf.name_scope('L3'):
w3 = tf.Variable(initial_value = tf.truncated_normal([L,M],mean=0,stddev=0.1), name = 'w3')
b3 = tf.Variable(initial_value = tf.truncated_normal([M],mean=0,stddev=0.1), name = 'b3')
""" Layer 4 """
with tf.name_scope('L4'):
w4 = tf.Variable(initial_value = tf.truncated_normal([M,N],mean=0,stddev=0.1), name = 'w4')
b4 = tf.Variable(initial_value = tf.truncated_normal([N],mean=0,stddev=0.1), name = 'b4')
""" Layer output """
with tf.name_scope('L_out'):
w_out = tf.Variable(initial_value = tf.truncated_normal([N,10],mean=0,stddev=0.1), name = 'w_out')
b_out = tf.Variable(initial_value = tf.truncated_normal([10],mean=0,stddev=0.1), name = 'b_out')
2) loss function
Y1 = tf.nn.sigmoid(tf.add(tf.matmul(X,w1),b1), name='Y1')
Y2 = tf.nn.sigmoid(tf.add(tf.matmul(Y1,w2),b2), name='Y2')
Y3 = tf.nn.sigmoid(tf.add(tf.matmul(Y2,w3),b3), name='Y3')
Y4 = tf.nn.sigmoid(tf.add(tf.matmul(Y3,w4),b4), name='Y4')
Y_pred_logits = tf.add(tf.matmul(Y4, w_out),b_out,name='logits')
Y_pred_prob = tf.nn.softmax(Y_pred_logits, name='probs')
error = -tf.matmul(Y
, tf.reshape(tf.log(Y_pred_prob),[10,-1]), name ='err')
loss = tf.reduce_mean(error, name = 'loss')
3) optimization function
opt = tf.train.GradientDescentOptimizer(0.1)
grads_and_vars = opt.compute_gradients(loss)
ctr = tf.Variable(0.0, name='ctr')
z = opt.apply_gradients(grads_and_vars, global_step=ctr)
4) Tensorboard code:
evt_file = tf.summary.FileWriter('/Users/vibhorj/python/-tf/g_mnist')
evt_file.add_graph(tf.get_default_graph())
s1 = tf.summary.scalar(name='accuracy', tensor=accuracy)
s2 = tf.summary.scalar(name='loss', tensor=loss)
m1 = tf.summary.merge([s1,s2])
5) run the session (test data is mnist.test.images & mnist.test.labels
with tf.Session() as sess:
sess.run(tf.variables_initializer(tf.global_variables()))
for i in range(300):
""" calc. accuracy on test data - TENSORBOARD before iteration beings """
summary = sess.run(m1, feed_dict=test_data)
evt_file.add_summary(summary, sess.run(ctr))
evt_file.flush()
""" fetch train data """
a_train, b_train = mnist.train.next_batch(batch_size=100)
train_data = {X: a_train , Y: b_train}
""" train """
sess.run(z, feed_dict = train_data)
Appreciate your time to provide any insight into it. I'm completely clueless hwo to proceed further (even tried initializing w & b with random_normal, played with learning rates [0.1,0.01, 0.001])
Cheers!
Please consider
Initializing biases to zeros
Using ReLU units instead of sigmoid - avoid saturation
Using Adam optimizer - faster learning
I feel that your network is quite large. You could do with a smaller network.
K,L,M,N=200,100,60,30
""" Layer 1 """
with tf.name_scope('L1'):
w1 = tf.Variable(initial_value = tf.truncated_normal([28*28,K],mean=0,stddev=0.1), name = 'w1')
b1 = tf.zeros([K])#tf.Variable(initial_value = tf.truncated_normal([K],mean=0,stddev=0.01), name = 'b1')
""" Layer 2 """
with tf.name_scope('L2'):
w2 = tf.Variable(initial_value =tf.truncated_normal([K,L],mean=0,stddev=0.1), name = 'w2')
b2 = tf.zeros([L])#tf.Variable(initial_value = tf.truncated_normal([L],mean=0,stddev=0.01), name = 'b2')
""" Layer 3 """
with tf.name_scope('L3'):
w3 = tf.Variable(initial_value = tf.truncated_normal([L,M],mean=0,stddev=0.1), name = 'w3')
b3 = tf.zeros([M]) #tf.Variable(initial_value = tf.truncated_normal([M],mean=0,stddev=0.01), name = 'b3')
""" Layer 4 """
with tf.name_scope('L4'):
w4 = tf.Variable(initial_value = tf.truncated_normal([M,N],mean=0,stddev=0.1), name = 'w4')
b4 = tf.zeros([N])#tf.Variable(initial_value = tf.truncated_normal([N],mean=0,stddev=0.1), name = 'b4')
""" Layer output """
with tf.name_scope('L_out'):
w_out = tf.Variable(initial_value = tf.truncated_normal([N,10],mean=0,stddev=0.1), name = 'w_out')
b_out = tf.zeros([10])#tf.Variable(initial_value = tf.truncated_normal([10],mean=0,stddev=0.1), name = 'b_out')
Y1 = tf.nn.relu(tf.add(tf.matmul(X,w1),b1), name='Y1')
Y2 = tf.nn.relu(tf.add(tf.matmul(Y1,w2),b2), name='Y2')
Y3 = tf.nn.relu(tf.add(tf.matmul(Y2,w3),b3), name='Y3')
Y4 = tf.nn.relu(tf.add(tf.matmul(Y3,w4),b4), name='Y4')
Y_pred_logits = tf.add(tf.matmul(Y4, w_out),b_out,name='logits')
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=Y_pred_logits, name='xentropy'))
opt = tf.train.GradientDescentOptimizer(0.01)
grads_and_vars = opt.compute_gradients(loss)
ctr = tf.Variable(0.0, name='ctr', trainable=False)
train_op = opt.minimize(loss, global_step=ctr)
for v in tf.trainable_variables():
print v.op.name
with tf.Session() as sess:
sess.run(tf.variables_initializer(tf.global_variables()))
for i in range(3000):
""" calc. accuracy on test data - TENSORBOARD before iteration beings """
#summary = sess.run(m1, feed_dict=test_data)
#evt_file.add_summary(summary, sess.run(ctr))
#evt_file.flush()
""" fetch train data """
a_train, b_train = mnist.train.next_batch(batch_size=100)
train_data = {X: a_train , Y: b_train}
""" train """
l = sess.run(loss, feed_dict = train_data)
print l
sess.run(train_op, feed_dict = train_data)