I've build a model and tried to compile but ended up with the following warning message.
def create_model_xlnet(xlnet_model):
word_inputs = tf.keras.Input(shape=(4096,), name='word_inputs', dtype='int32')
xlnet = TFXLNetModel.from_pretrained(xlnet_model)
xlnet.classifier = None
xlnet_encodings = xlnet(word_inputs)[0]
# Collect last step from last hidden state (CLS)
doc_encoding = tf.squeeze(xlnet_encodings[:, -1:, :], axis=1)
doc_encoding = tf.keras.layers.Dropout(.1)(doc_encoding)
outputs = tf.keras.layers.Dense(10, activation='softmax', name='outputs')(doc_encoding)
model = tf.keras.Model(inputs=[word_inputs], outputs=[outputs])
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
WARNING:tensorflow:Gradients do not exist for variables ['tfxl_net_model/transformer/mask_emb:0', 'tfxl_net_model/transformer/layer_.0/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.0/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.1/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.1/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.2/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.2/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.3/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.3/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.4/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.4/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.5/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.5/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.6/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.6/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.7/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.7/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.8/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.8/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.9/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.9/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.10/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer.10/rel_attn/seg_embed:0', 'tfxl_net_model/transformer/layer.11/rel_attn/r_s_bias:0', 'tfxl_net_model/transformer/layer._11/rel_attn/seg_embed:0'] when minimizing the loss. If you're using model.compile(), did you forget to provide a loss argument?
Related
I need to set a breakpoint to a old model in Keras:
import tensorflow as tf
inputs = tf.keras.Input(shape=(3,))
x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs)
x1 = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x)
outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x1)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile()
The actual model is a lot complicated and I am just providing a snippet. Is there a way for me to set a breakpoint in the forward pass? Just trying to see the intermediate model output.
It might depend a bit on your actual setting but you could split your model via its layers - similar like you set up an autoencoder.
And forward pass through the backbone, look at it -> pass through the head -> output.
import tensorflow as tf
inputs = tf.keras.Input(shape=(3,))
x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs)
x1 = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x)
outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x1)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile()
back = tf.keras.Sequential(model.layers[:2])
head = tf.keras.Sequential(model.layers[2:])
# Instead of doing model(input) you can now do
inter = back(input)
print(inter)
result = head(inter)
Alternatively you could also define multiple outputs, which are a bit uglier to train but for testing purposes you can pull the trained weights to this cloned model
inputs = tf.keras.Input(shape=(3,))
x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs)
x1 = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x)
outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x1)
model = tf.keras.Model(inputs=inputs, outputs=[outputs, x1]) #<-- adding your intermediate layer as a second output
model.compile()
I am trying to train a graph convolutional neural network using the StellarGraph library. I would like to run this example https://stellargraph.readthedocs.io/en/stable/demos/graph-classification/gcn-supervised-graph-classification.html
but without the N-Fold Crossvalidation by providing my own training, validation and test sets. This is the code I am using (taken from this post)
generator = PaddedGraphGenerator(graphs=graphs)
train_gen = generator.flow([x for x in range(0, len(graphs_train))],
targets=graphs_train_labels,
batch_size=35)
test_gen = generator.flow([x for x in range(len(graphs_train),len(graphs_train) + len(graphs_test))],
targets=graphs_test_labels,
batch_size=35)
# Stopping criterium
es = EarlyStopping(monitor="val_loss",
min_delta=0,
patience=20,
restore_best_weights=True)
# Model definition
gc_model = GCNSupervisedGraphClassification(layer_sizes=[64, 64],
activations=["relu", "relu"],
generator=generator,
dropout=0.5)
x_inp, x_out = gc_model.in_out_tensors()
predictions = Dense(units=32, activation="relu")(x_out)
predictions = Dense(units=16, activation="relu")(predictions)
predictions = Dense(units=1, activation="sigmoid")(predictions)
# Creating Keras model and preparing it for training
model = Model(inputs=x_inp, outputs=predictions)
model.compile(optimizer=Adam(0.001), loss=binary_crossentropy, metrics=["acc"])
# GNN Training
history = model.fit(train_gen, epochs=10, validation_data=test_gen, verbose=1)
model.fit(x=graphs_train,
y=graphs_train_labels,
epochs=10,
verbose=1,
callbacks=[es])
# Calculate performance on the validation data
test_metrics = model.evaluate(valid_gen, verbose=1)
valid_acc = test_metrics[model.metrics_names.index("acc")]
print(f"Test Accuracy model = {valid_acc}")
But at the end I am getting this error
ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'stellargraph.core.graph.StellarGraph'>"}), <class 'numpy.ndarray'>
What am I missing here? Is it because of the way I have created the graphs? In my case the graphs is a list which contains the stellar graphs
Problem solved. I was calling
model.fit(x=graphs_train,
y=graphs_train_labels,
epochs=10,
verbose=1,
callbacks=[es])
after the line
history = model.fit(train_gen, epochs=10, validation_data=test_gen, verbose=1)
I am trying to predict uncertainty in a regression problem using Dropout during testing as per Yarin Gal's article. I created a class using Keras's backend function as provided by this stack overflow question's answer. The class takes a NN model as input and randomly drops neurons during testing to give a stochastic estimate rather than deterministic output for a time-series forecasting.
I create a simple encoder-decoder model as shown below for the forecasting with 0.1 dropout during training:
input_sequence = Input(shape=(lookback, train_x.shape[2]))
encoder = LSTM(128, return_sequences=False)(input_sequence)
r_vec = RepeatVector(forward_pred)(encoder)
decoder = LSTM(128, return_sequences=True, dropout=0.1)(r_vec) #maybe use dropout=0.1
output = TimeDistributed(Dense(train_y.shape[2], activation='linear'))(decoder)
# optimiser = optimizers.Adam(clipnorm=1)
enc_dec_model = Model(input_sequence, output)
enc_dec_model.compile(loss="mean_squared_error",
optimizer="adam",
metrics=['mean_squared_error'])
enc_dec_model.summary()
After that, I define and call the DropoutPrediction class.
# Define the class:
class KerasDropoutPrediction(object):
def __init__(self ,model):
self.f = K.function(
[model.layers[0].input,
K.learning_phase()],
[model.layers[-1].output])
def predict(self ,x, n_iter=10):
result = []
for _ in range(n_iter):
result.append(self.f([x , 1]))
result = np.array(result).reshape(n_iter ,x.shape[0] ,x.shape[1]).T
return result
# Call the object:
kdp = KerasDropoutPrediction(enc_dec_model)
y_pred_do = kdp.predict(x_test,n_iter=100)
y_pred_do_mean = y_pred_do.mean(axis=1)
However, in the line
kdp = KerasDropoutPrediction(enc_dec_model), when I call the LSTM model,
I got the following error message which says the input has to be a Keras Tensor. Can anyone help me with this error?
Error Message:
ValueError: Found unexpected instance while processing input tensors for keras functional model. Expecting KerasTensor which is from tf.keras.Input() or output from keras layer call(). Got: 0
To activate Dropout at inference time, you simply have to specify training=True (TF>2.0) in the layer of interest (in the last LSTM layer in your case)
with training=False
inp = Input(shape=(10, 1))
x = LSTM(1, dropout=0.3)(inp, training=False)
m = Model(inp,x)
# m.compile(...)
# m.fit(...)
X = np.random.uniform(0,1, (1,10,1))
output = []
for i in range(0,100):
output.append(m.predict(X)) # always the same
with training=True
inp = Input(shape=(10, 1))
x = LSTM(1, dropout=0.3)(inp, training=True)
m = Model(inp,x)
# m.compile(...)
# m.fit(...)
X = np.random.uniform(0,1, (1,10,1))
output = []
for i in range(0,100):
output.append(m.predict(X)) # always different
In your example, this becomes:
input_sequence = Input(shape=(lookback, train_x.shape[2]))
encoder = LSTM(128, return_sequences=False)(input_sequence)
r_vec = RepeatVector(forward_pred)(encoder)
decoder = LSTM(128, return_sequences=True, dropout=0.1)(r_vec, training=True)
output = TimeDistributed(Dense(train_y.shape[2], activation='linear'))(decoder)
enc_dec_model = Model(input_sequence, output)
enc_dec_model.compile(
loss="mean_squared_error",
optimizer="adam",
metrics=['mean_squared_error']
)
enc_dec_model.fit(train_x, train_y, epochs=10, batch_size=32)
and the KerasDropoutPrediction:
class KerasDropoutPrediction(object):
def __init__(self, model):
self.model = model
def predict(self, X, n_iter=10):
result = []
for _ in range(n_iter):
result.append(self.model.predict(X))
result = np.array(result)
return result
kdp = KerasDropoutPrediction(enc_dec_model)
y_pred_do = kdp.predict(test_x, n_iter=100)
y_pred_do_mean = y_pred_do.mean(axis=0)
I am running distributed an mnist model in distributed TensorFlow. I would like to monitor "manually" the evolution of the global_step for debugging purposes. What is the best and clean way to get the global step in a distributed TensorFlow setting?
My code below
...
with tf.device(device):
images = tf.placeholder(tf.float32, [None, 784], name='image_input')
labels = tf.placeholder(tf.float32, [None], name='label_input')
data = read_data_sets(FLAGS.data_dir,
one_hot=False,
fake_data=False)
logits = mnist.inference(images, FLAGS.hidden1, FLAGS.hidden2)
loss = mnist.loss(logits, labels)
loss = tf.Print(loss, [loss], message="Loss = ")
train_op = mnist.training(loss, FLAGS.learning_rate)
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.nb_steps)]
with tf.train.MonitoredTrainingSession(
master=target,
is_chief=(FLAGS.task_index == 0),
checkpoint_dir=FLAGS.log_dir,
hooks = hooks) as sess:
while not sess.should_stop():
xs, ys = data.train.next_batch(FLAGS.batch_size, fake_data=False)
sess.run([train_op], feed_dict={images:xs, labels:ys})
global_step_value = # ... what is the clean way to get this variable
Normally a good practice is to initialize your global step variable in your graph-defining process, e.g. global_step = tf.Variable(0, trainable=False, name='global_step'). Then you can use graph.get_tensor_by_name("global_step:0") to get your global step easily.
I'm building a RNN model to do the image classification. I used a pipeline to feed in the data. However it returns
ValueError: Variable rnn/rnn/basic_rnn_cell/weights already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
I wonder what can I do to fix this since there are not many examples of implementing RNN with an input pipeline. I know it would work if I use the placeholder, but my data is already in the form of tensors. Unless I can feed the placeholder with tensors, I prefer just to use the pipeline.
def RNN(inputs):
with tf.variable_scope('cells', reuse=True):
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=batch_size)
with tf.variable_scope('rnn'):
outputs, states = tf.nn.dynamic_rnn(basic_cell, inputs, dtype=tf.float32)
fc_drop = tf.nn.dropout(states, keep_prob)
logits = tf.contrib.layers.fully_connected(fc_drop, batch_size, activation_fn=None)
return logits
#Training
with tf.name_scope("cost_function") as scope:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=train_label_batch, logits=RNN(train_batch)))
train_step = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(cost)
#Accuracy
with tf.name_scope("accuracy") as scope:
correct_prediction = tf.equal(tf.argmax(RNN(test_image), 1), tf.argmax(test_image_label, 0))
accuracy = tf.cast(correct_prediction, tf.float32)
You need to use the reuse option correctly. following changes would solve it. For prediction you need to use the already existed variables in the graph.
def RNN(inputs, reuse):
with tf.variable_scope('cells', reuse=reuse):
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=batch_size, reuse=reuse)
...
...
#Training
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=train_label_batch, logits=RNN(train_batch, reuse=None)))
#Accuracy
...
correct_prediction = tf.equal(tf.argmax(RNN(test_image, reuse=True), 1), tf.argmax(test_image_label, 0))