How to add regularisation (L1/L2) to TensorFlow's Adam optimiser? - tensorflow

I am currently going through Google's Machine Learning Crash Course and I am experimenting with the DNNClassifier estimator for a binary classification problem. I am trying add regularisation (L1/L2) to Adam optimiser since it has not been defined as an argument in the function. Any ideas how to implement it? Below is my code:
steps = 1000
periods = 10
steps_per_period = steps / periods
my_optimiser = tf.train.AdamOptimizer(learning_rate = learning_rate)
my_optimiser = tf.contrib.estimator.clip_gradients_by_norm(my_optimiser, 5.0)
dnn_classifier = tf.estimator.DNNClassifier(
feature_columns = construct_feature_columns(training_features),
n_classes = 2,
hidden_units = hidden_units,
optimizer = my_optimiser)
training_input_fn = lambda: my_input_fn(
training_features,
training_targets,
batch_size = batch_size)
predict_training_input_fn = lambda: my_input_fn(
training_features,
training_targets,
num_epochs = 1,
shuffle = False)
predict_validation_input_fn = lambda: my_input_fn(
validation_features,
validation_targets,
num_epochs = 1,
shuffle = False)
training_log_losses = []
validation_log_losses = []
for period in range (0, periods):
dnn_classifier.train(
input_fn = training_input_fn,
steps = steps_per_period
)

Related

The val_loss is nan, but loss is printing. Both train and validation losses are nan in model.evaluate(), and the acc improves during training

There is a 2-class classification problem, and my loss function is custom. The labels are categorical, and the final activation function is Softmax. During the training, the loss is printed, but the val_loss is nan(inf). Using model.evaluate(X_train,Y_train) at the end of training, the train loss is the same as the vaidation loss, and both are nan. 
This is my custom loss function.
def custom_loss(y_true, y_pred):
import tensorflow as tf
bce = tf.keras.losses.BinaryCrossentropy(
from_logits=False,
label_smoothing=0.0,
axis=-1,
reduction="none",
name="binary_crossentropy",
)
intra = tf.constant(1, dtype=tf.float64)
inter = tf.constant(0.01, dtype=tf.float64)
zeros = tf.gather_nd(y_pred,tf.where(tf.argmin(y_true, axis = 1)))
ones = tf.gather_nd(y_pred,tf.where(tf.argmax(y_true, axis = 1)))
centroid_zero = tf.reduce_mean(zeros,0)
centroid_one = tf.reduce_mean(ones,0)
loss_zero_intra = tf.math.squared_difference(zeros,centroid_zero)
loss_one_intra = tf.math.squared_difference(ones,centroid_zero)
loss_zero_intra = tf.cast(loss_zero_intra, tf.float64)
loss_one_intra = tf.cast(loss_one_intra, tf.float64)
loss_intra = tf.zeros_like(y_pred, tf.float64)
loss_intra = tf.tensor_scatter_nd_update(loss_intra,tf.where(tf.argmin(y_true, axis = 1)),loss_zero_intra)
loss_intra = tf.tensor_scatter_nd_update(loss_intra,tf.where(tf.argmax(y_true, axis = 1)),loss_one_intra)
loss_inter_value = tf.math.sqrt(tf.math.squared_difference(centroid_zero[0],centroid_one[0]) +
tf.math.squared_difference(centroid_zero[1],centroid_one[1]))
loss_inter = tf.fill(tf.shape(y_pred),loss_inter_value)
binary_cross_entropy= tf.tile(tf.expand_dims(bce(y_true,y_pred),axis=1),
tf.constant([1,2],tf.int32))
loss_intra = tf.cast(loss_intra, tf.float64)
loss_inter = tf.cast(loss_inter, tf.float64)
binary_cross_entropy= tf.cast(binary_cross_entropy, tf.float64)
loss = tf.math.multiply(intra, loss_intra) - tf.math.multiply(inter, loss_inter) + binary_cross_entropy
return loss
And Also you can see my model code here:
def create_model(kernelLength = 32, nb_classes = 2, Chans = 19, Samples = 512,
dropoutRate = 0.5 , F1 = 8, D = 2, F2 = 16, norm_rate = 0.25,
dropoutType = 'Dropout', optimizer_type = 'Adam', lr=0.0005, **kwargs):
K.clear_session()
gc.collect()
if dropoutType == 'SpatialDropout2D':
dropoutType = SpatialDropout2D
elif dropoutType == 'Dropout':
dropoutType = Dropout
else:
raise ValueError('dropoutType must be one of SpatialDropout2D '
'or Dropout, passed as a string.')
input1 = Input(shape = (1, Chans, Samples))
block1 = Conv2D(F1, (1, kernelLength), padding = 'same',
input_shape = (1, Chans, Samples),
use_bias = False)(input1)
block1 = BatchNormalization(axis = 1)(block1)
block1 = DepthwiseConv2D((Chans, 1), use_bias = False,
depth_multiplier = D,
depthwise_constraint = max_norm(1.))(block1)
block1 = BatchNormalization(axis = 1)(block1)
block1 = Activation('elu')(block1)
block1 = AveragePooling2D((1, 4))(block1)
block1 = dropoutType(dropoutRate)(block1)
block2 = SeparableConv2D(F2, (1, 16),
use_bias = False, padding = 'same')(block1)
block2 = BatchNormalization(axis = 1)(block2)
block2 = Activation('elu')(block2)
block2 = AveragePooling2D((1, 8))(block2)
block2 = dropoutType(dropoutRate)(block2)
flatten = Flatten(name = 'flatten')(block2)
dense = Dense(nb_classes, name = 'dense',
kernel_constraint = max_norm(norm_rate))(flatten)
softmax = Activation('softmax', name = 'softmax')(dense)
model = Model(inputs=input1, outputs=softmax)
if optimizer_type == 'Adam':
optimizer = Adam(learning_rate = lr)
if optimizer_type == 'Adamax':
optimizer = Adamax(learning_rate = lr)
if optimizer_type == 'AdamW':
optimizer = AdamW(learning_rate = lr)
model.compile(loss=custom_loss, optimizer=optimizer, metrics = ['accuracy'])
return model
The custom_loss function returns three distinct terms. One of them is the binary_cross_entropy. The model works fine with this term, which works the same as binary_cross_entropy in Keras. Therefore, there is no problem with the data. The train and validation accuracy improves throughout training, and the train loss decreases. The number of validation samples is the same as the number of train samples.
After the training was accomplished, by using the model.evaluation(X,Y) function, the loss was shown as "nan," however calculating the loss using the custom loss function, resulting in a "number" not a "nan".
Increasing the batch size, scaling the data, and using clipnorm or clipvalue within the optimizer all had no effect. Furthermore, no nan appears in the model predictions (y_pred).I suspect that the problem is caused by the below extreme value inside the model prediction: An example of model prediction with extremes:
Can anyone suggest a solution to this problem?
Thanks in advance.

HuggingFace Pytorch trainer giving worse results than tensorflow

I’m trying to make the switch from tensorflow to pytorch, but I’m getting a good bit worse results when running a model in pytorch using Trainer.
I’m using bert-base-uncased, and as far as I can tell am using primarily the same settings across both (batch size, epochs, learning rate, etc). However I am getting a f1 score of 0.9967 from tensorflow, and a 0.944649446494465 from pytorch. The loss also seems to fluctuate a lot more in pytorch. I’m still pretty new to machine learning and python in general, so I feel like it’s gotta be something obvious, but I’ve yet to find it. Here are my scripts. Thanks in advance.
Tensorflow
SEQ_LEN = 256
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
def train():
def preprocess_function(examples):
return tokenizer(examples["text"], max_length=SEQ_LEN, truncation=True, padding='max_length', add_special_tokens=True, return_attention_mask=True, return_token_type_ids=False, return_tensors='tf')
dataset = load_dataset('json', data_files={"train": "full-items.json", "test": "validation-2.json"})
tokenized = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
batch_size = 8
num_epochs = 4
batches_per_epoch = len(tokenized["train"]) // batch_size
total_train_steps = int(batches_per_epoch * num_epochs)
optimizer, schedule = create_optimizer(init_lr=4e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = TFAutoModelForSequenceClassification.from_pretrained(
"bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)
tf_train_set = model.prepare_tf_dataset(
tokenized["train"],
shuffle=True,
batch_size=batch_size,
collate_fn=data_collator,
)
tf_validation_set = model.prepare_tf_dataset(
tokenized["test"],
shuffle=False,
batch_size=batch_size,
collate_fn=data_collator,
)
eval_metrics = evaluate.load("f1")
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return eval_metrics.compute(predictions=predictions, references=labels)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
METRICS = [
tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
tf.keras.metrics.SparseCategoricalCrossentropy(from_logits=True, name='sparse_crossentropy'),
]
metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_train_set)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)
class_weights = dict(enumerate(sklearn.utils.class_weight.compute_class_weight('balanced',
classes=np.unique(tokenized["train"]["label"]),
y=tokenized["train"]["label"])))
model.compile(optimizer=optimizer, loss=loss, metrics=METRICS)
model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=num_epochs, class_weight=class_weights, callbacks=[early_stop, metric_callback])
model.save_pretrained('lease_to_own_model', save_format="tf")
Pytorch
def pyTorch():
def preprocess_function(examples):
return tokenizer(examples["text"], max_length=SEQ_LEN, truncation=True, padding='max_length', add_special_tokens=True, return_attention_mask=True, return_token_type_ids=False)
dataset = load_dataset('json', data_files={"train": "full-items.json", "test": "validation-2.json"})
tokenized = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
eval_f1 = evaluate.load("f1")
eval_accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
f1 = eval_f1.compute(predictions=predictions, references=labels)
accuracy = eval_accuracy.compute(predictions=predictions, references=labels)
return {"accuracy": accuracy["accuracy"], "f1": f1["f1"]}
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = AutoModelForSequenceClassification.from_pretrained(
"bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)
device = torch.device("cuda")
model.to(device)
batch_size = 8
training_args = TrainingArguments(
num_train_epochs=4,
output_dir="pytorch",
learning_rate=4e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
evaluation_strategy="epoch",
save_strategy="epoch",
metric_for_best_model='f1',
load_best_model_at_end=True,
logging_strategy="epoch",
warmup_steps=0,
)
class_weights = sklearn.utils.class_weight.compute_class_weight('balanced',
classes=np.unique(tokenized["train"]["label"]),
y=tokenized["train"]["label"])
weights= torch.tensor(class_weights,dtype=torch.float).to(device)
class CustomTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.get("labels")
outputs = model(**inputs)
logits = outputs.get("logits")
loss_fct = torch.nn.CrossEntropyLoss(weight=weights)
loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
return (loss, outputs) if return_outputs else loss
trainer = CustomTrainer(
model=model,
args=training_args,
train_dataset=tokenized["train"],
eval_dataset=tokenized["test"],
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)
trainer.train()
trainer.save_model("pytorch")

class_weight giving worst results in Keras model. What could be the reason?

I'm working on an NLP Classification task with imbalanced data and the code:
df['target'] = le.fit_transform(df['CHAPTER'])
Y = df['target'].ravel()
classes = df['target'].nunique()
train_X, val_X, train_y, val_y = train_test_split(X,Y, test_size=0.1, stratify = Y, random_state = SEED)
class_weights = class_weight.compute_class_weight(class_weight = 'balanced',classes = np.unique(train_y),y = train_y)
class_weight_dict = dict(enumerate(class_weights))
vocab_size = 25000
tokenizer = Tokenizer(num_words=vocab_size, filters = ' ')
tokenizer.fit_on_texts(list(train_X))
train_X = tokenizer.texts_to_sequences(train_X)
val_X = tokenizer.texts_to_sequences(val_X)
train_X = pad_sequences(train_X, maxlen=maxlen)
val_X = pad_sequences(val_X, maxlen=maxlen)
Works fine and giving me an accuracy of around 70% when I do:
history = model.fit(train_X, train_y, batch_size=64, epochs = 30,
validation_split = 0.1,verbose = 1)
But the moment I use class_weight=class_weight_dict in train , my accuracy drops from 70 to 30%. What could be the possible reason? Am I doing something wrong with the code?
When you use the dict(enumerate(class_weights)) method, it creates a dictionary with keys starting from zero. In case if you don't have class labels that correspond to zero (or if you don't have any in that range, at all) this can be a problem. Below is a demonstration:
train_y = [1, 1, 1, 2, 2] # training labels: 1 and 2
class_weights = class_weight.compute_class_weight(
class_weight='balanced',
classes=np.unique(train_y),
y=train_y
)
print(class_weights)
# array([0.83333333, 1.25 ])
Creating the dictionary as you've done it:
class_weight_dict = dict(enumerate(class_weights))
print(class_weight_dict)
# {0: 0.8333333333333334, 1: 1.25}
There is no class as 0 and the class weight for class 2 is missing.
Instead, you should do:
class_weight_dict = {label: weight for label, weight in zip(np.unique(train_y), class_weights)}
print(class_weight_dict)
# {1: 0.8333333333333334, 2: 1.25}

ValueError during tensorflow graph construction

I'm trying to train a GAN using tensorflow, but during graph construction I'm getting this error:
ValueError: Input 0 of layer conv1_1 is incompatible with the layer: its rank is undefined, but the layer requires a defined rank.
I'm guessing this is because my discriminator and generator are within different functions which I'm calling. I would ideally want to avoid rolling my entire architecture out of methods, as this would lead to a tediously-long python file.
I've tried using a default value for the placeholders before the first training examples are pushed through a sess.run() call, but this led to the same example being run through the graph at each stage of training (probably because that's how tensorflow constructed the graph)
My training loop code is below. Please let me know if seeing the generator and discriminator functions themselves would help.
img1 = tf.placeholder(dtype = tf.float32)
img2 = tf.placeholder(dtype = tf.float32)
lr = tf.placeholder(dtype = tf.float32)
synthetic_imgs, synthetic_logits, mse, _ = self.gen(img1)
fake_result, fake_logits_hr, fake_feature_2 = self.disc(img1, synthetic_imgs)
ground_truth_result, ground_truth_logits_hr, truth_feature_2 = self.disc(img1, img2)
_, fake_logits_lr = self.disc_two(img1, synthetic_imgs)
_, ground_truth_logits_lr = self.disc_two(img1, img2)
a = tf.nn.sigmoid_cross_entropy_with_logits
dis_labels = tf.random.uniform((self.batch_size, 1), minval = -0.2, maxval = 0.3)
gen_labels = tf.random.uniform((self.batch_size, 1), minval = 0.75, maxval = 1.2)
dis_loss = #Discriminator Loss
gen_loss = #Generator Loss
#May want to change to -log(MSE)
d_vars = [var for var in tf.trainable_variables() if 'disc' in var.name]
g_vars = [var for var in tf.trainable_variables() if 'g_' in var.name]
dis_loss = tf.reduce_mean(dis_loss)
gen_loss = tf.reduce_mean(gen_loss)
with tf.variable_scope('optimizers', reuse = tf.AUTO_REUSE) as scope:
gen_opt = tf.train.AdamOptimizer(learning_rate = lr, name = 'gen_opt')
disc_opt = tf.train.AdamOptimizer(learning_rate = lr, name = 'dis_opt')
gen1 = gen_opt.minimize(gen_loss, var_list = g_vars)
disc1 = disc_opt.minimize(dis_loss, var_list = d_vars)
#Tensorboard code for visualizing gradients
#global_step = variable_scope.get_variable("global_step", [], trainable=False, dtype=dtypes.int64, initializer=init_ops.constant_initializer(0, dtype=dtypes.int64))
# gen_training = tf.contrib.layers.optimize_loss(gen_loss, global_step, learning_rate=lr, optimizer=gen_opt, summaries=["gradients"], variables = g_vars)
# disc_training = tf.contrib.layers.optimize_loss(dis_loss, global_step, learning_rate=lr, optimizer=disc_opt, summaries=["gradients"], variables = d_vars)
#summary = tf.summary.merge_all()
with tf.Session() as sess:
print('start session')
sess.run(tf.global_variables_initializer())
#print(tf.trainable_variables()) #Find variable corresponding to conv filter weights, which you can use for tensorboard visualization
#Code to load each training example
gen = self.pairs()
for i in range(self.num_epochs):
print(str(i+ 1) + 'th epoch')
for j in range(self.num_batches):
i_1 = None
i_2 = None
#Crektes batch
for k in range(self.batch_size):
p = next(gen)
try:
i_1 = np.concatenate((i1, self.load_img(p[0])), axis = 0)
i_2 = np.concatenate((i2, self.load_img(p[1])), axis = 0)
except Exception:
i_1 = self.load_img(p[0])
i_2 = self.load_img(p[1])
l_r = 8e-4 * (0.5)**(i//100) #Play around with this value
test, gLoss, _ = sess.run([img1, gen_loss, gen1], feed_dict = {i1 : i_1, i2 : i_2, learn_rate : l_r})
dLoss, _ = sess.run([dis_loss, disc1], feed_dict = {i1 : i_1, i2 : i_2, learn_rate : l_r})
print(test.shape)
cv2.imwrite('./saved_imgs/gan_test'+str(j)+'.png', np.squeeze(test, axis = 3)[0])
#Code to display gradients and other relevant stats on tensorboard
#Will be under histogram tab, labelled OptimizeLoss
# if j%500 == 0:
# writer = tf.summary.FileWriter(sess.graph, logdir = './tensorboard/1') #Change logdir for each run
# summary_str = sess.run(summary, feed_dict = {i1 : i_1, i2 : i_2, learn_rate : l_r})
# writer.add_summmary(summary_str, str(i)+': '+str(j))#Can change to one epoch only if necessary
# writer.flush()
if j % 12 == 0: #Prints loss statistics every 12th batch
#print('Epoch: '+str(i))
print('Generator Loss: '+str(gLoss))
print('Discriminator Loss: '+str(dLoss))
self.save_model(sess, i)

Reuse Reusing Variable of LSTM in Tensorflow

I'm trying to make RNN using LSTM.
I made LSTM model, and after it, there is two DNN network, and one regression output layer.
I trained my data, and the final training loss become about 0.009.
However, when i applied the model to test data, the loss become about 0.5.
The 1th epoch training loss is about 0.5.
So, I think the trained variable do not used in test model.
The only difference between training and test model is batch size.
Trainning Batch = 100~200, Test Batch Size = 1.
in main function i made LSTM instance.
In LSTM innitializer, the model is made.
def __init__(self,config,train_model=None):
self.sess = sess = tf.Session()
self.num_steps = num_steps = config.num_steps
self.lstm_size = lstm_size = config.lstm_size
self.num_features = num_features = config.num_features
self.num_layers = num_layers = config.num_layers
self.num_hiddens = num_hiddens = config.num_hiddens
self.batch_size = batch_size = config.batch_size
self.train = train = config.train
self.epoch = config.epoch
self.learning_rate = learning_rate = config.learning_rate
with tf.variable_scope('model') as scope:
self.lstm_cell = lstm_cell = tf.nn.rnn_cell.LSTMCell(lstm_size,initializer = tf.contrib.layers.xavier_initializer(uniform=False))
self.cell = cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
with tf.name_scope('placeholders'):
self.x = tf.placeholder(tf.float32,[self.batch_size,num_steps,num_features],
name='input-x')
self.y = tf.placeholder(tf.float32, [self.batch_size,num_features],name='input-y')
self.init_state = cell.zero_state(self.batch_size,tf.float32)
with tf.variable_scope('model'):
self.W1 = tf.Variable(tf.truncated_normal([lstm_size*num_steps,num_hiddens],stddev=0.1),name='W1')
self.b1 = tf.Variable(tf.truncated_normal([num_hiddens],stddev=0.1),name='b1')
self.W2 = tf.Variable(tf.truncated_normal([num_hiddens,num_hiddens],stddev=0.1),name='W2')
self.b2 = tf.Variable(tf.truncated_normal([num_hiddens],stddev=0.1),name='b2')
self.W3 = tf.Variable(tf.truncated_normal([num_hiddens,num_features],stddev=0.1),name='W3')
self.b3 = tf.Variable(tf.truncated_normal([num_features],stddev=0.1),name='b3')
self.output, self.loss = self.inference()
tf.initialize_all_variables().run(session=sess)
tf.initialize_variables([self.b2]).run(session=sess)
if train_model == None:
self.train_step = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss)
Using Above LSTM init, below LSTM instance are made.
with tf.variable_scope("model",reuse=None):
train_model = LSTM(main_config)
with tf.variable_scope("model", reuse=True):
predict_model = LSTM(predict_config)
after making two LSTM instance, I trained the train_model.
And I input the test set in predict_model.
Why the variable are not reused?
The problem is that you should be using tf.get_variable() to create your variables, instead of tf.Variable(), if you are reusing a scope.
Take a look at this tutorial for sharing variables, you'll understand it better.
Also, you don't need to use a session here, because you don't have to initialize your variables when you are defining the model, the variables should be initialized when you are about to train your model.
The code to reuse the variables is the following:
def __init__(self,config,train_model=None):
self.num_steps = num_steps = config.num_steps
self.lstm_size = lstm_size = config.lstm_size
self.num_features = num_features = config.num_features
self.num_layers = num_layers = config.num_layers
self.num_hiddens = num_hiddens = config.num_hiddens
self.batch_size = batch_size = config.batch_size
self.train = train = config.train
self.epoch = config.epoch
self.learning_rate = learning_rate = config.learning_rate
with tf.variable_scope('model') as scope:
self.lstm_cell = lstm_cell = tf.nn.rnn_cell.LSTMCell(lstm_size,initializer = tf.contrib.layers.xavier_initializer(uniform=False))
self.cell = cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
with tf.name_scope('placeholders'):
self.x = tf.placeholder(tf.float32,[self.batch_size,num_steps,num_features],
name='input-x')
self.y = tf.placeholder(tf.float32, [self.batch_size,num_features],name='input-y')
self.init_state = cell.zero_state(self.batch_size,tf.float32)
with tf.variable_scope('model'):
self.W1 = tf.get_variable(initializer=tf.truncated_normal([lstm_size*num_steps,num_hiddens],stddev=0.1),name='W1')
self.b1 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens],stddev=0.1),name='b1')
self.W2 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens,num_hiddens],stddev=0.1),name='W2')
self.b2 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens],stddev=0.1),name='b2')
self.W3 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens,num_features],stddev=0.1),name='W3')
self.b3 = tf.get_variable(initializer=tf.truncated_normal([num_features],stddev=0.1),name='b3')
self.output, self.loss = self.inference()
if train_model == None:
self.train_step = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss)
To see which variables are created after you create train_model and predict_model use the following code:
for v in tf.all_variables():
print(v.name)