Connecting a 1D CNN layer to a LSTM layer in keras - tensorflow

I have a model build which is shown below:
def og_build_model_5layer(n_rows,n_cols):
input = Input(shape=(n_cols,n_rows),NAME='INP')
print('model_input shape:' , input.shape)
c1 = Conv1D(50, 3,name = 'conv_1',padding='same',kernel_initializer="glorot_uniform")(INP)
b1 = BatchNormalization(name = 'BN_1')(c1)
a1 = Activation('relu')(b1)
c2 = Conv1D(50,3,name = 'conv_2',padding='same',kernel_initializer="glorot_uniform")(a1)
b2 = BatchNormalization(name = 'BN_2')(c2)
a2 = Activation('relu')(b2)
c3 = Conv1D(50, 3,name = 'conv_3',padding='same',kernel_initializer="glorot_uniform")(a2)
b3 = BatchNormalization(name = 'BN_3')(c3)
a3 = Activation('relu')(b3)
c4 = Conv1D(50, 3,name = 'conv_4',padding='same',kernel_initializer="glorot_uniform")(a3)
b4 = BatchNormalization(name = 'BN_4')(c4)
a4 = Activation('relu')(b4)
c5 = Conv1D(50, 3,name = 'conv_5',padding='same',kernel_initializer="glorot_uniform")(a4)
b5 = BatchNormalization(name = 'BN_5')(c5)
a5 = Activation('relu')(b5)
######## ADD one LSTM layer HERE ##################
fl = Flatten(name='fl')(LSTM_OUTPUT)
den = Dense(30,name='dense_1')(fl)
drp = Dropout(0.5)(den)
output = Dense(1, activation='sigmoid')(drp)
opt = Adam(learning_rate=1e-4)
model = Model(inputs=INP, outputs=output, name='model')
extractor = Model(inputs=ecg_input,outputs = model.get_layer('fl').output)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
print(model.summary)
return model,extractor
Here I have 5 Conv1D layers (each accepting one image) and I want to add one LSTM layer that would take a sequence of 200 images together, and I want to train this CNN+LSTM model end to end. I am confused about how I will add the LSTM layer as that needs a sequence (of 200 processed inputs) where as the previous 5 layers will accept one input at a time. Any help here is appreciated. I know the concept of timedistributed conv1D however I do not want to use it. can this end-to-end training be done ?

Related

Masking Layer and Mask_Zero in Embedding in Keras

I have deployed a model where i have a sequence of pages visited by the customer and then the numerical features like time spent on that page etc. Now, to pass the model in keras, I encode the pages information in the form of embeddings and concatenate it with other numerical features and pass it to a LSTM Cell in Keras. Here, to ignore the padded value in embeddings, I am using a mask_zero = True argument in the keras layer and to ignore numerical features for that timestamp, using the masking layer in keras to skip if value = -99 (have used the -99 as the padded value for the numerical feature). Below is the model summary.
from keras.layers import Input, Embedding, Dot, Reshape, Dense, Dropout,Concatenate,Masking
##Input for the Sequential Data
input0 = Input(name=str(inputs[0]),shape=[max_len])
input1 = Input(name=str(inputs[1]),shape=[max_len])
input2 = Input(name=str(inputs[2]),shape=[max_len])
input3 = Input(name=str(inputs[3]),shape=[max_len])
##Input Profiles for the Timespent on each page
input_ts0 = Input(name=str(inputs_ts[0]),shape=[max_len,1])
input_ts1 = Input(name=str(inputs_ts[1]),shape=[max_len,1])
input_ts2 = Input(name=str(inputs_ts[2]),shape=[max_len,1])
input_ts3 = Input(name=str(inputs_ts[3]),shape=[max_len,1])
##Embedding Layer
embed0 = Embedding(def_val+1,50,input_length=max_len,mask_zero=True)(input0)
embed1 = Embedding(def_val+1,50,input_length=max_len,mask_zero=True)(input1)
embed2 = Embedding(def_val+1,50,input_length=max_len,mask_zero=True)(input2)
embed3 = Embedding(def_val+1,50,input_length=max_len,mask_zero=True)(input3)
##concatenate the embedding and time spent on each page
ts_eve_concat0 = Concatenate(name='Concatenated_eve_ts0')([embed0,input_ts0])
ts_eve_concat1 = Concatenate(name='Concatenated_eve_ts1')([embed1,input_ts1])
ts_eve_concat2 = Concatenate(name='Concatenated_eve_ts2')([embed2,input_ts2])
ts_eve_concat3 = Concatenate(name='Concatenated_eve_ts3')([embed3,input_ts3])
##Masking the TS input where there is no information
masking_0 = Masking(mask_value = -99)(ts_eve_concat0)
masking_1 = Masking(mask_value = -99)(ts_eve_concat1)
masking_2 = Masking(mask_value = -99)(ts_eve_concat2)
masking_3 = Masking(mask_value = -99)(ts_eve_concat3)
#LSTM on all the individual layers
lstm0 = LSTM(32)(masking_0)
lstm1 = LSTM(32)(masking_1)
lstm2 = LSTM(32)(masking_2)
lstm3 = LSTM(32)(masking_3)
##Concatenate all the LSTM Layers
concat_lstm = Concatenate(name='Concatenated_lstm')([lstm0,lstm1,lstm2,lstm3])
layer = Dense(64,name='FC1')(concat_lstm)
layer = Activation('relu')(layer)
layer = Dropout(0.3)(layer)
layer = Dense(32,name='FC2',activation='relu')(layer)
layer = Dropout(0.3)(layer)
layer = Dense(1,name='out_layer')(layer)
layer = Activation('sigmoid')(layer)
Is this approach correct or do I need to send the information in some other manner

If any one familiar with Andrew ng's Planer data classification with one hidden layer

How the value of X_assess dimension will be (2,3) in the below code, how did they get that..Am trying with my problem and got stuck over there
def forward_propagation_test_case():
np.random.seed(1)
X_assess = np.random.randn(2, 3)
b1 = np.random.randn(4,1)
b2 = np.array([[ -1.3]])
parameters = {'W1': np.array([[-0.00416758, -0.00056267],
[-0.02136196, 0.01640271],
[-0.01793436, -0.00841747],
[ 0.00502881, -0.01245288]]),
'W2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
'b1': b1,
'b2': b2}
return X_assess, parameters

Keras Functional API Multiple Input Shape Errors

My goal is to use a CNN to go through a picture, then add an array of extra data before the dense layers.
picIn = keras.Input(shape=x[0].shape)
conv1 = layers.Conv2D(32,kernel_size=3,padding='same',use_bias=False)(picIn)
batch1 = layers.BatchNormalization()(conv1)
leaky1 = layers.LeakyReLU(alpha=.3)(batch1)
conv2 = layers.Conv2D(32,kernel_size=3,padding='same',use_bias=False)(leaky1)
batch2 = layers.BatchNormalization()(conv2)
leaky2 = layers.LeakyReLU(alpha=.3)(batch2)
cdrop1 = layers.Dropout(.20)(leaky2)
conv3= layers.Conv2D(64,kernel_size=3,padding='same',use_bias=False)(cdrop1)
batch3 = layers.BatchNormalization()(conv3)
leaky3 = layers.LeakyReLU(alpha=.3)(batch3)
conv4 = layers.Conv2D(64,kernel_size=3,padding='same',use_bias=False)(leaky3)
batch4 = layers.BatchNormalization()(conv4)
leaky4 = layers.LeakyReLU(alpha=.3)(batch4)
cdrop2 = layers.Dropout(.20)(leaky4)
flat1 = layers.Flatten()(cdrop2)
rtheta1 = rtheta[trainCut]
rtheta1 = rtheta1.reshape(467526,1)
rtheta2 = rtheta[testCut]
rtheta2 = rtheta2.reshape(82247,1)
ip2 = keras.Input(shape=rtheta1.shape)
flat2 = layers.Flatten()(ip2)
merge = layers.Concatenate()([flat1,flat2])
hidden1 = layers.Dense(512,use_bias=False)(merge)
batch5 = layers.BatchNormalization()(hidden1)
leaky5 = layers.LeakyReLU(alpha=.3)(batch5)
ddrop1 = layers.Dropout(.20)(leaky5)
hidden2 = layers.Dense(512,use_bias=False)(ddrop1)
batch6 = layers.BatchNormalization()(hidden2)
leaky6 = layers.LeakyReLU(alpha=.3)(batch6)
ddrop2 = layers.Dropout(.20)(leaky6)
hidden3 = layers.Dense(512,use_bias=False)(merge)
batch7 = layers.BatchNormalization()(hidden1)
leaky7 = layers.LeakyReLU(alpha=.3)(batch5)
ddrop3 = layers.Dropout(.20)(leaky5)
output = layers.Dense(1)(ddrop3)
model = keras.Model(inputs = [picIn,ip2], outputs = output)
H = model.fit(x =[ x[trainCut],rtheta[trainCut]],y= y[trainCut],batch_size=args.bsize,validation_data=([x[testCut],rtheta[testCut]], y[testCut]),epochs=args.epochs)
I always get an error related to the shape of the inputs
Input 0 of layer dense is incompatible with the layer: expected axis -1 of input shape to have value 473926 but received input with shape [None, 6401]
Model was constructed with shape (None, 467526, 1) for input Tensor("input_2:0", shape=(None, 467526, 1), dtype=float32), but it was called on an input with incompatible shape (None, 1, 1).
Im confused on what exactly to do here.
x[traincut] is a matrix of size (467526,10,10,2)
rtheta1 is (467526,1) and so is y[traincut]
The validation data is the same except it is 82247 instead of 467526.
I have tried it without flattening after ip2 and I get a different error but I think the core issue is still the same.
Any help would be appreciated. Thanks!
Edit: The data was not the right shape, obviously, but I figured out how to fix it.
Are you ensuring that all of your training data's shape is uniform before you put it through and into the first tensor?

Understanding torch and implement on tensorflow

I am implementing tensorflow version of LBCNN which has the code here: https://github.com/juefeix/lbcnn.torch. The problem here is that when I try to rewrite in tensorflow, the cost function keeps high and fluctuating. It mires me 2 weeks that even I debug everything, I still don't know where am I wrong.
Torch code:
-- resnet.lua
local function createModel(opt)
local function basicblock(nChIn,nChOut,sz)
local s = nn.Sequential()
local shareConv = Convolution(nChIn,nChOut,sz,sz,1,
1,(sz-1)/2,(sz-1)/2)
s:add(SBatchNorm(nChIn))
s:add(shareConv)
s:add(ReLU())
s:add(Convolution(nChOut,nChIn,1,1))
local identity = nn.Identity()
local output= nn.Sequential(): add(nn.ConcatTable()
:add(s):add(identity)):add(nn.CAddTable(true))
return output
end
local sz = opt.convSize
local nInputPlane = opt.nInputPlane
local nChIn = opt.numChannels
local nChOut = opt.numWeights
-- define model to train
model = nn.Sequential()
model:add(Convolution(nInputPlane,nChIn,sz,sz,1,1,1,1))
model:add(SBatchNorm(nChIn))
model:add(ReLU(true))
for stages = 1,opt.depth do
model:add(basicblock(nChIn,nChOut,sz))
end
model:add(Avg(5,5,5,5))
-- stage 3 : standard 2-layer neural network
model:add(nn.Reshape(nChIn*opt.view))
model:add(nn.Dropout(0.5))
model:add(nn.Linear(nChIn*opt.view,
math.max(opt.nClasses,opt.full)))
model:add(cudnn.ReLU())
model:add(nn.Dropout(0.5))
model: add(nn.Linear (math.max(opt.full,opt.nClasses), opt.nClasses))
model:cuda()
return model
end
return createModel
Tensorflow code:
def cnn(prev_input, lbc_size, lbc_channels, output_channels):
shortcut = tf.identity(prev_input)
B = tf.contrib.layers.batch_norm(prev_input)
Z = tf.contrib.layers.conv2d(inputs = B, num_outputs = lbc_channels,
kernel_size = 3, stride = 1, padding = "SAME", activation_fn = None)
A1 = tf.nn.relu(Z)
A2 = tf.contrib.layers.conv2d(inputs = A1,
num_outputs=output_channels,
kernel_size = 1, stride = 1,
padding = "SAME", activation_fn = None)
A3 = tf.add(A2, shortcut)
return A3
def model(X, Keep_probability):
with tf.name_scope("Pre-Conv"):
X1 = tf.contrib.layers.conv2d(inputs = X, num_outputs =
output_channels,kernel_size = lbc_size, stride = 1,
padding = "SAME", activation_fn = None)
X2 = tf.contrib.layers.batch_norm(X1)
X3 = tf.nn.relu(X2)
X_in = X3
for i in range(conv_layers):
with tf.name_scope("conv"):
X_out,BB,ZZ,AA,AAA = cnn(X_in, lbc_size, lbc_channels,
out_channels)
X_in = X_out
with tf.name_scope("AvgPool"):
Z = tf.nn.avg_pool(value = X_in, ksize = [1, 5, 5, 1],
strides = [1, 5, 5, 1], padding = "VALID")
with tf.name_scope("Flatter"):
P = tf.contrib.layers.flatten(Z)
with tf.name_scope("Dropout"):
F1 = tf.nn.dropout(x = P, keep_prob = 0.5)
with tf.name_scope("Fully"):
F2 = tf.contrib.layers.fully_connected(inputs = F1,
num_outputs = fc_hidden_units, activation_fn = tf.nn.relu)
with tf.name_scope("Dropout"):
F3 = tf.nn.dropout(x = F2, keep_prob = 0.5)
with tf.name_scope("Fully"):
F4 = tf.contrib.layers.fully_connected(inputs = F3,
num_outputs = output_classes, activation_fn = None)
return F4
Assume all the parameters, I passed correctly. I just want to ask that whether 2 architecture is the same or not? One more thing is their code using SGD with momentum and weight_decay while I use AdamOptimizer, is this make the difference? Thank you so much.
I did not look over your entire code to check that indeed it's the same, but usually there is a difference in examples to convergence as you change the optimizer from momentum to adam. You should also need to retune hyperparameters to get good performance.

How can I print the cost function?

I'd like to ask how to print the cost value when there are two steps in model. Here is my code.
## SE_1st Hidden layer
W1 = tf.get_variable("W1", shape=[(2*spl+1)*feature_dim,layer_width_SE], initializer=tf.constant_initializer(value=W1_SE))
Variable = tf.get_variable("b1", shape=[layer_width_SE], initializer=tf.constant_initializer(value=b1_SE))
L11 = tf.nn.relu(tf.matmul(X, W1) + Variable)
L11 = tf.nn.dropout(L11, keep_prob=keep_prob)
## SE_2nd Hidden layer
W2 = tf.get_variable("W2", shape=[layer_width_SE,layer_width_SE], initializer=tf.constant_initializer(value=W2_SE))
Variable_1 = tf.get_variable("b2", shape=[layer_width_SE], initializer=tf.constant_initializer(value=b2_SE))
L12 = tf.nn.relu(tf.matmul(L11, W2)+ Variable_1)
L12 = tf.nn.dropout(L12, keep_prob=keep_prob)
## SE_3rd Hidden layer
W3 = tf.get_variable("W3", shape=[layer_width_SE, layer_width_SE], initializer=tf.constant_initializer(value=W3_SE))
Variable_2 = tf.get_variable("b3", shape=[layer_width_SE], initializer=tf.constant_initializer(value=b3_SE))
L13 = tf.nn.relu(tf.matmul(L12, W3) + Variable_2)
L13 = tf.nn.dropout(L13, keep_prob=keep_prob)
## SE_4th Hidden layer
W4 = tf.get_variable("W4", shape=[layer_width_SE,layer_width_SE], initializer=tf.constant_initializer(value=W4_SE))
Variable_3 = tf.get_variable("b4", shape=[layer_width_SE], initializer=tf.constant_initializer(value=b4_SE))
L14 = tf.nn.relu(tf.matmul(L13, W4)+ Variable_3)
L14 = tf.nn.dropout(L14, keep_prob=keep_prob)
## enhanced_speech_output layer
W5 = tf.get_variable("W5", shape=[layer_width_SE,feature_dim], initializer=tf.constant_initializer(value=W5_SE))
Variable_4 = tf.get_variable("b5", shape=[feature_dim], initializer=tf.constant_initializer(value=b5_SE))
SE_hypothesis = tf.matmul(L14, W5) + Variable_4
########################STOI DNN#########################
SE_hypothesis_append = tf.reshape(SE_hypothesis, [(batch_size_SE/frames), (feature_dim*frames)])
Y_append = tf.reshape(Y, [(batch_size_SE/frames), (feature_dim*frames)])
feature = tf.concat([SE_hypothesis_append, Y_append],axis=1)
## STOI_1st Hidden layer
W21 = tf.get_variable("W21", shape=[feature_dim*frames*2,layer_width_STOI], initializer=tf.constant_initializer(value=W1_STOI))
b21 = tf.get_variable("b21", shape=[layer_width_STOI], initializer=tf.constant_initializer(value=b1_STOI))
L21 = tf.nn.relu(tf.matmul(feature, W21) + b21)
L21 = tf.nn.dropout(L21, keep_prob=keep_prob)
## STOI_2nd Hidden layer
W22 = tf.get_variable("W22", shape=[layer_width_STOI,layer_width_STOI/2], initializer=tf.constant_initializer(value=W2_STOI))
b22 = tf.get_variable("b22", shape=[layer_width_STOI/2], initializer=tf.constant_initializer(value=b2_STOI))
L22 = tf.nn.relu(tf.matmul(L21, W22)+ b22)
L22 = tf.nn.dropout(L22, keep_prob=keep_prob)
## STOI_3rd Hidden layer
W23 = tf.get_variable("W23", shape=[layer_width_STOI/2,layer_width_STOI/4], initializer=tf.constant_initializer(value=W3_STOI))
b23 = tf.get_variable("b23", shape=[layer_width_STOI/4], initializer=tf.constant_initializer(value=b3_STOI))
L23 = tf.nn.relu(tf.matmul(L22, W23) + b23)
L23 = tf.nn.dropout(L23, keep_prob=keep_prob)
## STOI_4th Hidden layer
W24 = tf.get_variable("W24", shape=[layer_width_STOI/4,layer_width_STOI/8], initializer=tf.constant_initializer(value=W4_STOI))
b24 = tf.get_variable("b24", shape=[layer_width_STOI/8], initializer=tf.constant_initializer(value=b4_STOI))
L24 = tf.nn.relu(tf.matmul(L23, W24)+ b24)
L24 = tf.nn.dropout(L24, keep_prob=keep_prob)
## enhanced_speech_output layer
W25 = tf.get_variable("W25", shape=[layer_width_STOI/8,1], initializer=tf.constant_initializer(value=W5_STOI))
b25 = tf.get_variable("b25", shape=[1], initializer=tf.constant_initializer(value=b5_STOI))
STOI_hypothesis = tf.matmul(L24, W25) + b25
########################Cost function and optimizer#########################
SE_var_list = [W1, W2, W3, W4, W5, Variable, Variable_1, Variable_2, Variable_3, Variable_4]
cost_SE = tf.reduce_mean(tf.square(Y - SE_hypothesis))
cost_STOI = tf.reduce_mean(tf.square(STOI_target - STOI_hypothesis))
cost = (1-lamda)*cost_SE + lamda*cost_STOI
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost, var_list = SE_var_list)
saver = tf.train.Saver()
So what I want to know is the value of cost_SE and cost_STOI that I can set lamda for maximizing the efficiency of the model. I tried to do several ways but it doesn't worked.
feed_dict = {X: batch_con_x, Y: batch_con_y, STOI_target: STOI_maximum, keep_prob: 0.5}
c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
above code show only the sun of two cost value, but what I want to know is every cost value. Does it have any solution?
You can specify what you want in the first sess.run parameter:
c, cost_SE_eval, cost_STOI_eval, _ = sess.run(
[cost, cost_SE, cost_STOI, optimizer], feed_dict=feed_dict)