use mediapipe with tensorflow model(DTLN denoise)? - tensorflow

I want to use mediapipe to run denoising of real-time audio (I already have a model trained with tesonrflow), there is no video input, just audio. I didn't find a relevant use case, but is this theoretically possible?

There are many way todo but the concept is still working when you transform and re-re-transform they also removed of noises and return the function values.
[ Sample ]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Logicals
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
file = tf.io.read_file("F:\\temp\\Python\\Speech\\Piano\\Berklee44v4\\prep_pianoE0_1.wav")
decoded_wav = tfio.audio.decode_wav( file, shape=None, dtype=tf.int16, name="decoded_wav" )
decoded_wav = tf.squeeze(decoded_wav, axis=-1)
decoded_wav_float = tf.cast( decoded_wav, dtype=tf.float32 )
print( decoded_wav )
stft = tf.signal.stft(decoded_wav_float, frame_length=256, frame_step=64)
stft = tf.abs(stft)
plt.subplot(2, 2, 1)
plt.plot(decoded_wav, lw=0.5)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.xlabel( "Spectrogram", fontsize=22 )
plt.subplot(2, 2, 2)
plt.plot(stft, lw=0.5)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.xlabel( "stft", fontsize=22 )
plt.show()
plt.close()
[ Output ]:

Related

how to add text preprocessing tokenization step into Tensorflow model

I have a TensorFlow model SavedModel which includes saved_model.pb and variables folder. The preprocessing step has not been incorporated into this model that's why I need to do preprocessing(Tokenization etc) before feeding the data to the model for the prediction aspect.
I am looking for an approach that I can incorporate the preprocessing step into the model. I have seen examples here and here however they are image data.
Just to get an idea how the training part has been done, this is a portion of the code that we did training (if you need the implementation of the function I have used here, please let me know(I did not include it to make my question more understandable ))
Training:
processor = IntentProcessor(FLAGS.data_path, FLAGS.test_data_path,
FLAGS.test_proportion, FLAGS.seed, FLAGS.do_early_stopping)
bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
tokenizer = tokenization.FullTokenizer(
vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
run_config = tf.estimator.RunConfig(
model_dir=FLAGS.output_dir,
save_checkpoints_steps=FLAGS.save_checkpoints_steps)
train_examples = None
num_train_steps = None
num_warmup_steps = None
if FLAGS.do_train:
train_examples = processor.get_train_examples()
num_iter_per_epoch = int(len(train_examples) / FLAGS.train_batch_size)
num_train_steps = num_iter_per_epoch * FLAGS.num_train_epochs
num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
run_config = tf.estimator.RunConfig(
model_dir=FLAGS.output_dir,
save_checkpoints_steps=num_iter_per_epoch)
best_temperature = 1.0 # Initiate the best T value as 1.0 and will
# update this during the training
model_fn = model_fn_builder(
bert_config=bert_config,
num_labels=len(processor.le.classes_),
init_checkpoint=FLAGS.init_checkpoint,
learning_rate=FLAGS.learning_rate,
num_train_steps=num_train_steps,
num_warmup_steps=num_warmup_steps,
best_temperature=best_temperature,
seed=FLAGS.seed)
estimator = tf.estimator.Estimator(
model_fn=model_fn,
config=run_config)
# add parameters by passing a prams variable
if FLAGS.do_train:
train_features = convert_examples_to_features(
train_examples, FLAGS.max_seq_length, tokenizer)
train_labels = processor.get_train_labels()
train_input_fn = input_fn_builder(
features=train_features,
is_training=True,
batch_size=FLAGS.train_batch_size,
seed=FLAGS.seed,
labels=train_labels
)
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
And this is the preprocessing that I use for the training:
LABEL_LIST = ['negative', 'neutral', 'positive']
INTENT_MAP = {i: LABEL_LIST[i] for i in range(len(LABEL_LIST))}
BATCH_SIZE = 1
MAX_SEQ_LEN = 70
def convert_examples_to_features(texts, max_seq_length, tokenizer):
"""Loads a data file into a list of InputBatchs.
texts is the list of input text
"""
features = {}
input_ids_list = []
input_mask_list = []
segment_ids_list = []
for (ex_index, text) in enumerate(texts):
tokens_a = tokenizer.tokenize(str(text))
# Account for [CLS] and [SEP] with "- 2"
if len(tokens_a) > max_seq_length - 2:
tokens_a = tokens_a[0:(max_seq_length - 2)]
tokens = []
segment_ids = []
tokens.append("[CLS]")
segment_ids.append(0)
for token in tokens_a:
tokens.append(token)
segment_ids.append(0)
tokens.append("[SEP]")
segment_ids.append(0)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
# print(tokens)
# The mask has 1 for real tokens and 0 for padding tokens. Only real
# tokens are attended to.
input_mask = [1] * len(input_ids)
# Zero-pad up to the sequence length.
while len(input_ids) < max_seq_length:
input_ids.append(0)
input_mask.append(0)
segment_ids.append(0)
assert len(input_ids) == max_seq_length
assert len(input_mask) == max_seq_length
assert len(segment_ids) == max_seq_length
input_ids_list.append(input_ids)
input_mask_list.append(input_mask)
segment_ids_list.append(segment_ids)
features['input_ids'] = np.asanyarray(input_ids_list)
features['input_mask'] = np.asanyarray(input_mask_list)
features['segment_ids'] = np.asanyarray(segment_ids_list)
# tf.data.Dataset.from_tensor_slices needs to pass numpy array not
# tensor, or the tensor graph (shape) should match
return features
and inferencing would be like this:
def inference(texts,MODEL_DIR, VOCAB_FILE):
if not isinstance(texts, list):
texts = [texts]
tokenizer = FullTokenizer(vocab_file=VOCAB_FILE, do_lower_case=False)
features = convert_examples_to_features(texts, MAX_SEQ_LEN, tokenizer)
predict_fn = predictor.from_saved_model(MODEL_DIR)
response = predict_fn(features)
#print(response)
return get_sentiment(response)
def preprocess(texts):
if not isinstance(texts, list):
texts = [texts]
tokenizer = FullTokenizer(vocab_file=VOCAB_FILE, do_lower_case=False)
features = convert_examples_to_features(texts, MAX_SEQ_LEN, tokenizer)
return features
def get_sentiment(response):
idx = response['intent'].tolist()
print(idx)
print(INTENT_MAP.get(idx[0]))
outputs = []
for i in range(0, len(idx)):
outputs.append({
"sentiment": INTENT_MAP.get(idx[i]),
"confidence": response['prob'][i][idx[i]]
})
return outputs
sentence = 'The movie is ok'
inference(sentence, args.model_path, args.vocab_path)
And this is the implementation of model_fn_builder:
def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
num_train_steps, num_warmup_steps, best_temperature, seed):
"""Returns multi-intents `model_fn` closure for Estimator"""
def model_fn(features, labels, mode,
params): # pylint: disable=unused-argument
"""The `model_fn` for Estimator."""
tf.logging.info("*** Features ***")
for name in sorted(features.keys()):
tf.logging.info(
" name = %s, shape = %s" % (name, features[name].shape))
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
is_training = (mode == tf.estimator.ModeKeys.TRAIN)
(total_loss, per_example_loss, logits) = create_intent_model(
bert_config, is_training, input_ids, input_mask, segment_ids,
labels, num_labels, mode, seed)
tvars = tf.trainable_variables()
initialized_variable_names = None
if init_checkpoint:
(assignment_map,
initialized_variable_names) = \
modeling.get_assignment_map_from_checkpoint(
tvars, init_checkpoint)
tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
tf.logging.info("**** Trainable Variables ****")
for var in tvars:
init_string = ""
if var.name in initialized_variable_names:
init_string = ", *INIT_FROM_CKPT*"
tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape,
init_string)
output_spec = None
if mode == tf.estimator.ModeKeys.TRAIN:
train_op = optimization.create_optimizer(
total_loss, learning_rate, num_train_steps, num_warmup_steps)
output_spec = tf.estimator.EstimatorSpec(
mode=mode,
loss=total_loss,
train_op=train_op)
elif mode == tf.estimator.ModeKeys.EVAL:
def metric_fn(per_example_loss, labels, logits):
predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
accuracy = tf.metrics.accuracy(labels, predictions)
loss = tf.metrics.mean(per_example_loss)
return {
"eval_accuracy": accuracy,
"eval_loss": loss
}
eval_metrics = metric_fn(per_example_loss, labels, logits)
output_spec = tf.estimator.EstimatorSpec(
mode=mode,
loss=total_loss,
eval_metric_ops=eval_metrics)
elif mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'intent': tf.argmax(logits, axis=-1, output_type=tf.int32),
'prob': tf.nn.softmax(logits / tf.constant(best_temperature)),
'logits': logits
}
output_spec = tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions)
return output_spec
return model_fn
And this is the implementation of create_intent_model
def create_intent_model(bert_config, is_training, input_ids, input_mask,
segment_ids,
labels, num_labels, mode, seed):
model = modeling.BertModel(
config=bert_config,
is_training=is_training,
input_ids=input_ids,
input_mask=input_mask,
token_type_ids=segment_ids,
use_one_hot_embeddings=False,
seed=seed
)
output_layer = model.get_pooled_output()
hidden_size = output_layer.shape[-1].value
with tf.variable_scope("loss"):
output_weights = tf.get_variable(
"output_weights", [num_labels, hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02, seed=seed))
output_bias = tf.get_variable(
"output_bias", [num_labels], initializer=tf.zeros_initializer())
if is_training:
# I.e., 0.1 dropout
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9, seed=seed)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
loss = None
per_example_loss = None
if mode == tf.estimator.ModeKeys.TRAIN or mode == \
tf.estimator.ModeKeys.EVAL:
log_probs = tf.nn.log_softmax(logits, axis=-1)
one_hot_labels = tf.one_hot(labels, depth=num_labels,
dtype=tf.float32)
per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
axis=-1)
loss = tf.reduce_mean(per_example_loss)
return loss, per_example_loss, logits
This is the list tensorflow related libraries:
tensorboard==1.15.0
tensorflow-estimator==1.15.1
tensorflow-gpu==1.15.0
There is good documentation here, however, it uses Keras API. Plus, I don't know how can I incorporate preprocessing layer here even with the Keras API.
Again, my final goal is to incorporate the preprocessing step into the model building phase so that when I later load the model I directly pass the The movie is ok to the model?
I just need the idea on how to incorporate a preprocessing layer into this code which is function based.
Thanks in advance~
You can use the TextVectorization layer as follows. But to answer your question fully, I'd need to know what's in model_fn_builder() function. I'll show how you can do this with Keras model building API.
class BertTextProcessor(tf.keras.layers.Layer):
def __init__(self, max_length):
super().__init__()
self.max_length = max_length
# Here I'm setting any preprocessing to none
# by default this layer lowers case and remove punctuation
# i.e. tokens like [CLS] would become cls
self.vectorizer = tf.keras.layers.TextVectorization(output_sequence_length=max_length, standardize=None)
def call(self, inputs):
inputs = "[CLS] " + inputs + " [SEP]"
tok_inputs = self.vectorizer(inputs)
return {
"input_ids": tok_inputs,
"input_mask": tf.cast(tok_inputs != 0, 'int32'),
"segment_ids": tf.zeros_like(tok_inputs)
}
def adapt(self, data):
data = "[CLS] " + data + " [SEP]"
self.vectorizer.adapt(data)
def get_config(self):
return {
"max_length": self.max_length
}
Usage,
input_str = tf.constant(["movie is okay good plot very nice", "terrible movie bad actors not good"])
proc = BertTextProcessor(8)
# You need to call this so that the vectorizer layer learns the vocabulary
proc.adapt(input_str)
print(proc(input_str))
which outputs,
{'input_ids': <tf.Tensor: shape=(2, 10), dtype=int64, numpy=
array([[ 5, 2, 12, 9, 3, 8, 6, 11, 4, 0],
[ 5, 7, 2, 13, 14, 10, 3, 4, 0, 0]])>, 'input_mask': <tf.Tensor: shape=(2, 10), dtype=int32, numpy=
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 1, 1, 1, 0, 0]], dtype=int32)>, 'segment_ids': <tf.Tensor: shape=(2, 10), dtype=int64, numpy=
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])>}
You can use this layer as an input for a Keras model as you would use any layer.
You can also get the vocabulary using, proc.vectorizer.get_vocabulary() which returns,
['',
'[UNK]',
'movie',
'good',
'[SEP]',
'[CLS]',
'very',
'terrible',
'plot',
'okay',
'not',
'nice',
'is',
'bad',
'actors']
Alternative with tf-models-official
To get data in a format accepted by BERT, you can also use the tf-models-official library. Specifically, you can use the BertPackInputs object.
I recently updated code for one of my books and in Chapter 13/13.1_Spam_Classification you can see how it is used. The section Generating the correct input format for BERT shows how this could be done.
Edit: How to do this in tensorflow==1.15.0
In order to do this in TensorFlow 1.x you will need some reworking as lot of functionality in the original answer is missing. Here's an example of how you can do this, you will need to adapt this code accordingly to your specific usecase/method.
lookup_layer = tf.lookup.StaticHashTable(
tf.lookup.TextFileInitializer(
"vocab.txt", tf.string, tf.lookup.TextFileIndex.WHOLE_LINE,
tf.int64, tf.lookup.TextFileIndex.LINE_NUMBER, delimiter=" "),
100
)
text = tf.constant(["bad film", "movie is okay good plot very nice", "terrible movie bad actors not good"])
text = "[CLS]" + text + "[SEP]"
text = tf.strings.split(text, result_type="RaggedTensor")
text_dense = text.to_tensor("[PAD]")
out = lookup_layer.lookup(text_dense)
with tf.Session() as sess:
sess.run(tf.tables_initializer())
print(sess.run(out))

Stateful RNN (LSTM) in keras

imagin the following the data:
X = [x1, x2, x3, x4, x5, x6, ...]
and
Y = [y1, y2, y3, y4, ...]
the label represent the input in the following manner:
[x1,x2] -> y1
[x2,x3] -> y2
.
.
.
I am trying to make a model in using keras, so that when the classification takes place, the model remembers what it classified the previous stage to be, and make it causal as in the next prediction is directly dependent on the previous one, somewhat similar to other methods like HMM. So something like this:
Y2 = f( [x2,x3] , y1)
I have read this page, where they divide each batch into sub-batches (if that's the correct term?) and reset state between each main batch, but what I want to do is not shuffle the batches and introduce that causality into the model.
My question is how can you do this with stateful LSTMs?
One way is to do custom layer inherits from the LSTM class
[ Sample ]:
import tensorflow as tf
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Definition
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyLSTMLayer( tf.keras.layers.LSTM ):
def __init__(self, units, return_sequences, return_state):
super(MyLSTMLayer, self).__init__( units, return_sequences=True, return_state=False )
self.num_units = units
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_units])
def call(self, inputs):
return tf.matmul(inputs, self.kernel)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
start = 3
limit = 12
delta = 3
sample = tf.range( start, limit, delta )
sample = tf.cast( sample, dtype=tf.float32 )
sample = tf.constant( sample, shape=( sample.shape[0], 1, 1 ) )
layer = MyLSTMLayer( sample.shape[0], True, False )
print( sample )
print( layer(sample) )
[ Output ]:
tf.Tensor(
[[[3.]]
[[6.]]
[[9.]]], shape=(3, 1, 1), dtype=float32)
tf.Tensor(
[[[-1.8635211 2.6157026 -1.6650987]]
[[-3.7270422 5.2314053 -3.3301973]]
[[-5.5905633 7.8471084 -4.995296 ]]], shape=(3, 1, 3), dtype=float32)

Need help in object detection YOLO or tensorflow on how to compare two images and send alert for missing object like through SNS

So I'm doing this project on empty shelf detection in store and sending alert through sns and I'm am not able to get any source on how to complete it. I'm coding on google colab.
I trained my images on YOLO and tensor flow. And I've a working live feed showing me the object detection. But now I want my object to detect empty shelf when the items are removed then send an alert to the said number or account.
Can anyone help me on how to achieve this? or anyway to compare the two images like from planogram and captured feed, then send alert on the missing item.
Thanks.
I answer for the image categorized task but SNS you need to request for interface allows or specification methods.
You can find the code from the Internet but we are also trying and specification is important since two-sided communication they ar expecting the same definitions.
Some messages server may delays to crashes with communication forwards when you are not sending correct messages.
We will help the image categorizes task.
[ Sample ]: The list of communication target expecting you create folders or queues for target server you may utilized this codes samples.
import tensorflow as tf
import tensorflow_io as tfio
import pandas as pd
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
list_label_actual = [ 'Candidt Kibt', 'Pikaploy' ]
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Dataset
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
variables = pd.read_excel('F:\\temp\\Python\\excel\\Book 7.xlsx', index_col=None, header=[0])
list_label = [ ]
list_Image = [ ]
list_file_actual = [ ]
for Index, Image, Label in variables.values:
print( Label )
list_label.append( Label )
image = tf.io.read_file( Image )
image = tfio.experimental.image.decode_tiff(image, index=0)
list_file_actual.append(image)
image = tf.image.resize(image, [32,32], method='nearest')
list_Image.append(image)
list_label = tf.cast( list_label, dtype=tf.int32 )
list_label = tf.constant( list_label, shape=( 33, 1, 1 ) )
list_Image = tf.cast( list_Image, dtype=tf.int32 )
list_Image = tf.constant( list_Image, shape=( 33, 1, 32, 32, 4 ) )
dataset = tf.data.Dataset.from_tensor_slices(( list_Image, list_label ))
list_Image = tf.constant( list_Image, shape=( 33, 32, 32, 4) ).numpy()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Reshape((256, 32 * 32)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(196, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(196)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(2),
])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Callback
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class custom_callback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if( logs['accuracy'] >= 0.97 ):
self.model.stop_training = True
custom_callback = custom_callback()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
learning_rate=0.000001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
name='Nadam'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'] )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit( dataset, batch_size=100, epochs=50, callbacks=[custom_callback] )
plt.figure(figsize=(6,6))
plt.title("Actors recognitions")
for i in range(len(list_Image)):
img = tf.keras.preprocessing.image.array_to_img(
list_Image[i],
data_format=None,
scale=True
)
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
plt.subplot(6, 6, i + 1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(list_file_actual[i])
plt.xlabel(str(round(score[tf.math.argmax(score).numpy()].numpy(), 2)) + ":" + str(list_label_actual[tf.math.argmax(score)]))
plt.show()
input('...')
[ Output ]:

How to feed my network with the correct array size in tensorflow

I have the following code and I am trying to train the network that I built with Belgian traffic signs , here is the code below :
import tensorflow as tf
import os
import skimage.io
from skimage import transform
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
config=tf.ConfigProto(log_device_placement=True)
#config_soft = tf.ConfigProto(allow_soft_placement =True)
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".ppm")]
for f in file_names:
images.append(skimage.io.imread(f))
labels.append(int(d))
return images, labels
Root_Path = "/home/raed/Dropbox/Thesis/Codes/Tensorflow"
training_Directory = os.path.join(Root_Path,"Training")
testing_Directory = os.path.join(Root_Path,"Testing")
images, labels = load_data(training_Directory)
# Convert lists to array in order to retrieve to facilitate information retrieval
images_array = np.asarray(images)
labels_array = np.asanyarray(labels)
#print some information about the datasets
print(images_array.ndim)
print(images_array.size)
print(labels_array.ndim)
print(labels_array.nbytes)
print(len(labels_array))
# plotting the distribution of different signs
sns.set(palette="deep")
plt.hist(labels,62)
# Selecting couple of images based on their indices
traffic_signs = [300,2250,3650,4000]
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.show()
# Fill out the subplots with the random images and add shape, min and max values
for i in range(len(traffic_signs)):
plt.subplot(1,4,i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.axis('off')
plt.show()
print("Shape:{0},max:{1}, min:{2}".format(images_array[traffic_signs[i]].shape,
images_array[traffic_signs[i]].max(),
images_array[traffic_signs[i]].min()))
# Get unique labels
unique_labels = set(labels_array)
# initialize the figure
plt.figure(figsize=(15,15))
i=1
for label in unique_labels:
image = images_array[labels.index(label)]
plt.subplot(8,8,i)
plt.axis('off')
plt.title('label:{0} ({1})'.format(label, labels.count(label)))
i=i+1
plt.imshow(image)
plt.show()
images28 = [transform.resize(image, (28, 28)) for image in images]
images28_array = np.asanyarray(images28)
for i in range(len(traffic_signs)):
plt.subplot(1,4,i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.axis('off')
plt.show()
print("Shape:{0},max:{1}, min:{2}".format(images28_array[i].shape,
images28_array[i].max(),
images28_array[i].min()))
#convert to grayscale
gray_images = skimage.color.rgb2gray(images28_array)
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.axis('off')
plt.imshow(gray_images[traffic_signs[i]], cmap="gray")
plt.subplots_adjust(wspace=0.5)
# Show the plot
plt.show()
# prepare placeholders
x = tf.placeholder(dtype=tf.float32, shape =[None, 28,28])
y = tf.placeholder(dtype= tf.int32, shape=[None])
#Flatten the input data
images_flat = tf.layers.flatten(x)
#Fully connected layer , Multi-layer Perceptron (MLP)
logits = tf.contrib.layers.fully_connected(images_flat,62, tf.nn.relu)
#Define loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))
#define an optimizer (Stochastic Gradient Descent )
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
#convert logits to label indices
correct_prediction = tf.arg_max(logits,1)
#define an accuracy metric
accuracy =tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#########################################
print('######### Main Program #########')
#########################################
print("images_flat: ", images_flat)
print("logits: ", logits)
print("loss: ", loss)
print("Optimizer:",optimizer)
print("predicted_labels: ", correct_prediction)
tf.set_random_seed(1235)
#images28 = np.asanyarray(images28).reshape(-1, 28, 28,1)
#with tf.Session() as training_session:
# training_session.run(tf.global_variables_initializer())
# for i in range(201):
# print('Epoch', i)
# _,accuracy_value = training_session([optimizer, accuracy],feed_dict={x:images28, y:labels})
# if i%10 ==0:
# print("Loss", loss)
# print('Epochs Done!!')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(201):
_, loss_value = sess.run([optimizer, loss], feed_dict={x: gray_images, y: labels})
if i % 10 == 0:
print("Loss: ", loss)
I also did a series of transformation before feeding the netwok as follows :
images28 = [transform.resize(image, (28, 28)) for image in images]
images28_array = np.asanyarray(images28)
But on execution I am getting the following error:
ValueError: Cannot feed value of shape (4575, 28, 28, 3) for Tensor 'Placeholder_189:0', which has shape '(?, 28, 28)'
Could you please help me , where am I doing wrong in training this network, please refer to the following link for more information:
https://www.datacamp.com/community/tutorials/tensorflow-tutorial

Masking zero-padding embedding (and return zero gradients) in Tensorflow as in Pytorch

I'm trying to recreate the PoolNet from Spotlight with the BPR loss in Tensorflow but I can't get the same results. Below is the model I'm using (it's an estimator model_fn).
def _pooling_model_fn(features, labels, mode, params):
with tf.name_scope('inputs'):
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
users_prev_items_inputs_train = features['item_seqs']
elif mode == tf.estimator.ModeKeys.PREDICT:
users_prev_items_inputs_train = tf.reshape(features['item_seqs'], [1, -1])
with tf.device('/cpu:0'):
prod_embeddings = tf.keras.layers.Embedding(params["num_items"], params["item_emb_size"], mask_zero=True)
item_biases = tf.keras.layers.Embedding(params["num_items"], 1, mask_zero=True, embeddings_initializer=tf.keras.initializers.Zeros())
prod_embed = prod_embeddings(users_prev_items_inputs_train)
targets = tf.transpose(prod_embed, [0, 2, 1])
sequence_embeddings = tf.expand_dims(targets, axis=3)
sequence_embeddings = tf.pad(sequence_embeddings, paddings=tf.constant([[0, 0], [0, 0], [1, 0], [0, 0]]))
sequence_embedding_sum = tf.cumsum(sequence_embeddings, 2)
non_padding_entries = tf.cumsum(tf.cast(tf.not_equal(sequence_embeddings, tf.constant(0.0)), tf.float32), 2) # .expand_as(sequence_embedding_sum)
user_representations = tf.squeeze((sequence_embedding_sum / (non_padding_entries + 1)), [3])
user_representations_so_far = user_representations[:, :, :-1]
user_representations_new = user_representations[:, :, -1]
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
global_step = tf.contrib.framework.get_or_create_global_step()
with tf.name_scope('loss'):
negative_samples = features['neg_samp']
with tf.device('/cpu:0'):
prod_embed_pos = prod_embeddings(users_prev_items_inputs_train)
target_embedding_positive = tf.squeeze(tf.transpose(prod_embed_pos, [0, 2, 1]))
prod_bias_pos = item_biases(users_prev_items_inputs_train)
target_bias_positive = tf.squeeze(prod_bias_pos)
dot_positive = tf.reduce_sum(user_representations_so_far * target_embedding_positive, 1) + target_bias_positive
with tf.device('/cpu:0'):
prod_embed_neg = prod_embeddings(negative_samples)
target_embedding_negative = tf.squeeze(tf.transpose(prod_embed_neg, [0, 2, 1]))
prod_bias_neg = item_biases(negative_samples)
target_bias_negative = tf.squeeze(prod_bias_neg)
dot_negative = tf.reduce_sum(user_representations_so_far * target_embedding_negative, 1) + target_bias_negative
mask = tf.not_equal(users_prev_items_inputs_train, 0)
loss = bpr_loss(dot_positive, dot_negative, mask)
if mode == tf.estimator.ModeKeys.TRAIN:
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=params["lr"])
train_op = optimizer.minimize(loss, global_step=global_step)
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
if mode == tf.estimator.ModeKeys.PREDICT:
item_ids = np.arange(params['num_items']).reshape(-1, 1)
item_ids_tensor = tf.convert_to_tensor(item_ids, dtype=tf.int64)
with tf.device('/cpu:0'):
prod_embed_pos = prod_embeddings(item_ids_tensor) # tf.nn.embedding_lookup(prod_embeddings, item_ids_tensor)
target_embedding_positive = tf.squeeze(tf.transpose(prod_embed_pos, [0, 2, 1]))
prod_bias_pos = item_biases(item_ids_tensor) # tf.nn.embedding_lookup(item_biases, item_ids_tensor)
target_bias_positive = tf.squeeze(prod_bias_pos)
dot_positive = tf.reduce_sum(user_representations_new * target_embedding_positive, 1) + target_bias_positive
predictions = {
'products': tf.reshape(dot_positive, [1, -1])
}
export_outputs = {
'prediction': tf.estimator.export.PredictOutput(predictions)
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs)
and the loss function
def bpr_loss(positive_predictions, negative_predictions, mask):
loss1 = 1.0 - tf.nn.sigmoid(positive_predictions - negative_predictions)
if mask is not None:
mask = tf.cast(mask, loss1.dtype)
final_loss = loss1 * mask
return tf.reduce_sum(final_loss) / tf.reduce_sum(mask)
return tf.reduce_mean(loss1)
With the above model, I can't get the same predictions on the exact same dataset (and same random seed) as I do with Spotlight. I end up that the problem is with the zero-padding. The way that the data is generated is as the following:
[[0,0,0,5,6,98],
[0,62,15,4,8,47],
[0,0,5,9,6,3,41],
[78,21,2,56,1,3]]
they have leading zero-padding so every input sample has the same length.
Based on my code I believed I did everything to mask out these zeros from the loss, the embedding layer (using the mask_zero parameter from Keras) as well as from the averaging of the embeddings that I'm computing (using the cumsum). Still though, after training, the zero-indexed embedding is constantly changing (meaning that instead of excluded is taken into consideration and leading to influence the rest gradients and adding noise to my results).
Pytorch seems to have a nice feature in their implementation of the Embedding layer where you can set the padding_idx with the id of the pad and this will be initialized with zeros. Also, it keeps the gradient of this index always zero. So basically, I'm trying to do the same thing with Tensorflow.
Any help would be appreciated.
I solved it using the following solution posted on Tensorflow's Github. It seems to work now.
mask_padding_zero_op = tf.scatter_update(lookup_table,
PADDING_ID,
tf.zeros([EMBEDDING_DIM,], dtype=DTYPE))
with tf.control_dependencies([mask_padding_zero_op]):
# do embedding lookup...