How to concat laserembeddings with huggingface funnel transformers simple CLS output for NLP sequence classification task? - tensorflow

i was approaching NLP sequence classification problem (3 classes) using huggingface transformers (funnel-transformer/large) and tensorflow.
first i created laserembedding like this :
from laserembeddings import Laser
laser = Laser()
df = pd.read_csv("mycsv.csv")
embeds = laser.embed_sentences(df['text'].values, lang='en')
write_pickle_to_file('train.pkl', embeds )
part 1 : Tensorflow version
for data preparation i use code like below :
df['text']=temp['column1']+tokenizer.sep_token+temp['column2']+tokenizer.sep_token+temp['column3']
def encode_text(texts):
enc_di = tokenizer.batch_encode_plus(
texts,
padding='max_length',
truncation=True,
return_token_type_ids=True,
pad_to_max_length=True,
max_length=cfg.max_len
)
return [np.asarray(enc_di['input_ids'], dtype=np.int64),
np.asarray(enc_di['attention_mask'], dtype=np.int64),
np.asarray(enc_di['token_type_ids'], dtype=np.int64)]
then inside training function :
x_train = encode_text(df.text.to_list())
train_ds = (
tf.data.Dataset
.from_tensor_slices((
{
"input_ids": x_train[0],
"input_masks": x_train[1],
"input_segments": x_train[2],
"lasers": np.array( train[laser_columns].values, dtype=np.float32 ) #laser_columns contains all the laser embedded columns
},
tf.one_hot(df["label"].to_list(), 3) #3 class
))
.repeat()
.shuffle(2048)
.batch(cfg.batch_size)
.prefetch(AUTO)
)
i add laser embedding in my model like this :
def create_model():
transformer = transformers.TFAutoModel.from_pretrained(cfg.pretrained,config=config,from_pt=True)
max_len=512
# transformer
input_ids = Input(shape=(max_len,), dtype="int32", name="input_ids")
input_masks = Input(shape=(max_len,), dtype="int32", name="input_masks")
input_segments = Input(shape=(max_len,), dtype="int32", name="input_segments")
sequence_output = transformer(input_ids, attention_mask=input_masks, token_type_ids=input_segments)[0]
cls_token = sequence_output[:, 0, :]
# lasers
lasers = Input(shape=(n_lasers,), dtype=tf.float32, name="lasers") #n_lasers = 1024
lasers_output = tf.keras.layers.Dense(n_lasers, activation='tanh')(lasers)
x = tf.keras.layers.Concatenate()([cls_token, lasers_output])
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(2048, activation='tanh')(x)
x = tf.keras.layers.Dropout(0.1)(x)
out = tf.keras.layers.Dense(3, activation='softmax')(x)
model = Model(inputs=[input_ids, input_masks, input_segments, lasers], outputs=out)
model.compile(Adam(lr=1e-5), loss=losses.CategoricalCrossentropy(), metrics=["acc", metrics.CategoricalCrossentropy(name='xentropy')])
return model
now my question is, how do we do the same with pytorch for exact same problem and same dataset?
part 2 : pytorch version
df = pd.read_csv("mytrain.csv")
class myDataset(Dataset):
def __init__(self,df, max_length, tokenizer, training=True):
self.df = df
self.max_len = max_length
self.tokenizer = tokenizer
self.column1 = self.df['column1'].values
self.column2 = self.df['column2'].values
self.column3= self.df['column3'].values
self.column4= self.df['column4'].values
self.training = training
if self.training:
self.targets = self.df['label'].values
def __len__(self):
return len(self.df)
def __getitem__(self, index):
column1 = self.column1[index]
column2= self.column2[index]
column3= self.column3[index]
text0 = self.column4[index]
text1 = column1 + ' ' + column2+ ' ' + column3
inputs = self.tokenizer.encode_plus(
text1 ,
text0 ,
truncation = True,
add_special_tokens = True,
return_token_type_ids = True,
is_split_into_words=False,
max_length = self.max_len
)
samples = {
'input_ids': inputs['input_ids'],
'attention_mask': inputs['attention_mask'],
}
if 'token_type_ids' in inputs:
samples['token_type_ids'] = inputs['token_type_ids']
if self.training:
samples['target'] = self.targets[index]
return samples
collate_fn = DataCollatorWithPadding(tokenizer=CONFIG['tokenizer'])
class myModel(nn.Module):
def __init__(self, model_name):
super(myModel, self).__init__()
self.model = AutoModel.from_pretrained(model_name)
if(True):
print("using gradient_checkpoint...")
self.model.gradient_checkpointing_enable()
self.config = AutoConfig.from_pretrained(model_name)
self.config.update(
{
"output_hidden_states": True,
"hidden_dropout_prob": 0.0,
"layer_norm_eps": 1e-7,
"add_pooling_layer": False,
"attention_probs_dropout_prob":0.0,
}
)
self.fc = nn.Linear(self.config.hidden_size, 3)
def forward(self, ids, mask):
out = self.model(input_ids=ids,attention_mask=mask,output_hidden_states=False)
out = out[0][:, 0, :]
outputs = self.fc(out)
return outputs
and in train and validation loop i have code like this :
bar = tqdm(enumerate(dataloader), total=len(dataloader))
for step, data in bar:
ids = data['input_ids'].to(device, dtype = torch.long)
mask = data['attention_mask'].to(device, dtype = torch.long)
targets = data['target'].to(device, dtype=torch.long)
batch_size = ids.size(0)
optimizer.zero_grad()
# forward pass with `autocast` context manager
with autocast(enabled=True):
outputs = model(ids, mask)
loss = loss_fct(outputs, targets)
i would like to know where and how in my huggingface pytorch pipeline i can use the laserembedding that i created earlier and used in tensorflow huggingface model?
i would like to concat laserembeddings with funnel transformer's simple CLS token output and train the transformers model with laser embed as extra feature in pytorch implementation exactly like i did in tensorflow example,do you know how to modify my pytorch code to make it working in pytorch? the tensorflow implementation with laserembedding concatenated above that i have posted here works good,i just wanted to do the same in pytorch implementation,,your help is highly appreciated,thanks in advance

Related

How to build a custom question-answering head when using hugginface transformers?

Using the TFBertForQuestionAnswering.from_pretrained() function, we get a predefined head on top of BERT together with a loss function that are suitable for this task.
My question is how to create a custom head without relying on TFAutoModelForQuestionAnswering.from_pretrained().
I want to do this because there is no place where the architecture of the head is explained clearly. By reading the code here we can see the architecture they are using, but I can't be sure I understand their code 100%.
Starting from How to Fine-tune HuggingFace BERT model for Text Classification is good. However, it covers only the classification task, which is much simpler.
'start_positions' and 'end_positions' are created following this tutorial.
So far, I've got the following:
train_dataset
# Dataset({
# features: ['input_ids', 'token_type_ids', 'attention_mask', 'start_positions', 'end_positions'],
# num_rows: 99205
# })
train_dataset.set_format(type='tensorflow', columns=['input_ids', 'token_type_ids', 'attention_mask'])
features = {x: train_dataset[x] for x in ['input_ids', 'token_type_ids', 'attention_mask']}
labels = [train_dataset[x] for x in ['start_positions', 'end_positions']]
labels = np.array(labels).T
tfdataset = tf.data.Dataset.from_tensor_slices((features, labels)).batch(16)
input_ids = tf.keras.layers.Input(shape=(256,), dtype=tf.int32, name='input_ids')
token_type_ids = tf.keras.layers.Input(shape=(256,), dtype=tf.int32, name='token_type_ids')
attention_mask = tf.keras.layers.Input((256,), dtype=tf.int32, name='attention_mask')
bert = TFAutoModel.from_pretrained("bert-base-multilingual-cased")
output = bert([input_ids, token_type_ids, attention_mask]).last_hidden_state
output = tf.keras.layers.Dense(2, name="qa_outputs")(output)
model = tf.keras.models.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=output)
num_train_epochs = 3
num_train_steps = len(tfdataset) * num_train_epochs
optimizer, schedule = create_optimizer(
init_lr=2e-5,
num_warmup_steps=0,
num_train_steps=num_train_steps,
weight_decay_rate=0.01
)
def qa_loss(labels, logits):
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction=tf.keras.losses.Reduction.NONE
)
start_loss = loss_fn(labels[0], logits[0])
end_loss = loss_fn(labels[1], logits[1])
return (start_loss + end_loss) / 2.0
model.compile(
loss=loss_fn,
optimizer=optimizer
)
model.fit(tfdataset, epochs=num_train_epochs)
And I am getting the following error:
ValueError: `labels.shape` must equal `logits.shape` except for the last dimension. Received: labels.shape=(2,) and logits.shape=(256, 2)
It is complaining about the shape of the labels. This should not happen since I am using SparseCategoricalCrossentropy loss.
For future reference, I actually found a solution, which is just editing the TFBertForQuestionAnswering class itself. For example, I added an additional layer in the following code and trained the model as usual and it worked.
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
from transformers.modeling_tf_utils import TFQuestionAnsweringLoss, get_initializer, input_processing
from transformers.modeling_tf_outputs import TFQuestionAnsweringModelOutput
from transformers import BertConfig
class MY_TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss):
# names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model
_keys_to_ignore_on_load_unexpected = [
r"pooler",
r"mlm___cls",
r"nsp___cls",
r"cls.predictions",
r"cls.seq_relationship",
]
def __init__(self, config: BertConfig, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, add_pooling_layer=False, name="bert")
# This is the dense layer I added
self.my_dense = tf.keras.layers.Dense(
units=config.hidden_size,
kernel_initializer=get_initializer(config.initializer_range),
name="my_dense",
)
self.qa_outputs = tf.keras.layers.Dense(
units=config.num_labels,
kernel_initializer=get_initializer(config.initializer_range),
name="qa_outputs",
)
def call(
self,
input_ids = None,
attention_mask = None,
token_type_ids = None,
position_ids = None,
head_mask = None,
inputs_embeds = None,
output_attentions = None,
output_hidden_states = None,
return_dict = None,
start_positions = None,
end_positions= None,
training = False,
**kwargs,
):
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
end_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the end of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
start_positions=start_positions,
end_positions=end_positions,
training=training,
kwargs_call=kwargs,
)
outputs = self.bert(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
token_type_ids=inputs["token_type_ids"],
position_ids=inputs["position_ids"],
head_mask=inputs["head_mask"],
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output = outputs[0]
# You also have to add it here
my_logits = self.my_dense(inputs=sequence_output)
logits = self.qa_outputs(inputs=my_logits)
start_logits, end_logits = tf.split(value=logits, num_or_size_splits=2, axis=-1)
start_logits = tf.squeeze(input=start_logits, axis=-1)
end_logits = tf.squeeze(input=end_logits, axis=-1)
loss = None
if inputs["start_positions"] is not None and inputs["end_positions"] is not None:
labels = {"start_position": inputs["start_positions"]}
labels["end_position"] = inputs["end_positions"]
loss = self.hf_compute_loss(labels=labels, logits=(start_logits, end_logits))
if not inputs["return_dict"]:
output = (start_logits, end_logits) + outputs[2:]
return ((loss,) + output) if loss is not None else output
return TFQuestionAnsweringModelOutput(
loss=loss,
start_logits=start_logits,
end_logits=end_logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns
)

GoogleNet fails to classify images

I built Keras Google Net from here:
https://www.analyticsvidhya.com/blog/2018/10/understanding-inception-network-from-scratch/
The only difference is that I replaced 1000 classes in output layers with 3. data is prepared this way :
def grey_preprocessor (xarray):
xarray=(xarray/127.5)-1
return xarray
img_resol = (224,224)
train_batches = ImageDataGenerator(horizontal_flip = True, preprocessing_function = grey_preprocessor).flow_from_directory(
directory = train_path, target_size=img_resol, classes = ['bacterial', 'healthy', 'viral'], batch_size = 10)
valid_batches = ImageDataGenerator(horizontal_flip = True, preprocessing_function = grey_preprocessor).flow_from_directory(
directory = valid_path, target_size=img_resol, classes = ['bacterial', 'healthy', 'viral'], batch_size = 10)
test_batches = ImageDataGenerator(horizontal_flip = True, preprocessing_function = grey_preprocessor).flow_from_directory(
directory = test_path, target_size=img_resol, classes = ['bacterial', 'healthy', 'viral'], batch_size = 10, shuffle = False)
assert train_batches.n == 4222
assert valid_batches.n == 300
assert test_batches.n == 150
assert train_batches.num_classes == valid_batches.num_classes == test_batches.num_classes == 3
I train it like this:
history = model.fit(train_batches, validation_data=valid_batches, epochs=epochs, batch_size=256, callbacks=[lr_sc])
However, all the accuracies on every batch are 0.3333, which means it doesn't classify at all. I understand that it can be anything. What is a good way to troubleshoot it?
If you want to normalize your grayscale image use this!
def gray_preprocessor (xarray):
xarray=xarray/255.0
return xarray
or you can also use lambda function:
gray_preprocessor = lambda xarray : xarray / 255.0

Triplet-Loss using pre-trained network

I am trying to use the Triple-Loss technique to fine-tune an EfficientNet network for human Re-ID using Keras. Here is the code I am using:
This is the generator:
class SampleGen(object):
def __init__(self, file_class_mapping):
self.file_class_mapping = file_class_mapping
self.class_to_list_files = defaultdict(list)
self.list_all_files = list(file_class_mapping.keys())
self.range_all_files = list(range(len(self.list_all_files)))
for file, class_ in file_class_mapping.items():
self.class_to_list_files[class_].append(file)
self.list_classes = list(set(self.file_class_mapping.values()))
self.range_list_classes = range(len(self.list_classes))
self.class_weight = np.array([len(self.class_to_list_files[class_]) for class_ in self.list_classes])
self.class_weight = self.class_weight / np.sum(self.class_weight)
def get_sample(self):
class_idx = np.random.choice(self.range_list_classes, 1, p=self.class_weight)[0]
examples_class_idx = np.random.choice(range(len(self.class_to_list_files[self.list_classes[class_idx]])), 2)
positive_example_1, positive_example_2 = \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[0]], \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[1]]
negative_example = None
while negative_example is None or self.file_class_mapping[negative_example] == \
self.file_class_mapping[positive_example_1]:
negative_example_idx = np.random.choice(self.range_all_files, 1)[0]
negative_example = self.list_all_files[negative_example_idx]
return positive_example_1, negative_example, positive_example_2
def read_and_resize(filepath):
im = Image.open((filepath)).convert('RGB')
im = im.resize((image_size, image_size))
return np.array(im, dtype="float32")
def augment(im_array):
if np.random.uniform(0, 1) > 0.9:
im_array = np.fliplr(im_array)
return im_array
def gen(triplet_gen):
while True:
list_positive_examples_1 = []
list_negative_examples = []
list_positive_examples_2 = []
for i in range(batch_size):
positive_example_1, negative_example, positive_example_2 = triplet_gen.get_sample()
path_pos1 = join(path_train, positive_example_1)
path_neg = join(path_train, negative_example)
path_pos2 = join(path_train, positive_example_2)
positive_example_1_img = read_and_resize(path_pos1)
negative_example_img = read_and_resize(path_neg)
positive_example_2_img = read_and_resize(path_pos2)
positive_example_1_img = augment(positive_example_1_img)
negative_example_img = augment(negative_example_img)
positive_example_2_img = augment(positive_example_2_img)
list_positive_examples_1.append(positive_example_1_img)
list_negative_examples.append(negative_example_img)
list_positive_examples_2.append(positive_example_2_img)
A = preprocess_input(np.array(list_positive_examples_1))
B = preprocess_input(np.array(list_positive_examples_2))
C = preprocess_input(np.array(list_negative_examples))
label = None
yield {'anchor_input': A, 'positive_input': B, 'negative_input': C}, label
This is how I create the model:
def get_model():
base_model = efn.EfficientNetB3(weights='imagenet', include_top=False)
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = Dropout(0.6)(x)
x = Dense(embedding_dim)(x)
x = Lambda(lambda x: K.l2_normalize(x, axis=1), name="enc_out")(x)
embedding_model = Model(base_model.input, x, name="embedding")
input_shape = (image_size, image_size, 3)
anchor_input = Input(input_shape, name='anchor_input')
positive_input = Input(input_shape, name='positive_input')
negative_input = Input(input_shape, name='negative_input')
anchor_embedding = embedding_model(anchor_input)
positive_embedding = embedding_model(positive_input)
negative_embedding = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [anchor_embedding, positive_embedding, negative_embedding]
triplet_model = Model(inputs, outputs)
triplet_model.add_loss(K.mean(triplet_loss(outputs)))
return embedding_model, triplet_model
And this is how I'm trying to run the training:
if __name__ == '__main__':
data = pd.read_csv(path_csv)
train, test = train_test_split(data, train_size=0.7, random_state=1337)
file_id_mapping_train = {k: v for k, v in zip(train.Image.values, train.Id.values)}
file_id_mapping_test = {k: v for k, v in zip(test.Image.values, test.Id.values)}
gen_tr = gen(SampleGen(file_id_mapping_train))
gen_te = gen(SampleGen(file_id_mapping_test))
embedding_model, triplet_model = get_model()
for i, layer in enumerate(embedding_model.layers):
print(i, layer.name, layer.trainable)
for layer in embedding_model.layers[379:]:
layer.trainable = True
for layer in embedding_model.layers[:379]:
layer.trainable = False
triplet_model.compile(loss=None, optimizer=Adam(0.0001))
history = triplet_model.fit(x=gen_tr,
validation_data=gen_te,
epochs=10,
verbose=1,
steps_per_epoch=200,
validation_steps=20,
callbacks=create_callbacks())
The csv contains two columns (Image, Id) and I am generating triplets on the go using a generator. The layer 379 is the last layer of the network so I just leave that as trainable. I let it run for some epochs and it seems like it doesn't converge, it stays around 2.30. On epochs like 20, the loss is even higher than what I've started with. Here you can see what I mean: train example Is there anything wrong with the way I think about the problem?
Thank you!

Tensorflow Estimator gives different prediction result on each call of predict

I trained a classifier for my own dataset using TF Estimators, but after each prediction call I'll get different prediction results. I checked the dataset and the data example order is alright , same for every prediction call , but the model will give different classification result. I'm so confused , wonder if I'm making something wrong.
This is my code for reading input:
def parse_predict_record(example):
features = {"user_id": tf.FixedLenFeature([], tf.int64),"ad_info": tf.VarLenFeature(tf.string)}
data = tf.parse_single_example(example, features)
uid = data['user_id']
ad_info = tf.sparse_tensor_to_dense(data['ad_info'], default_value='0')
test_info = tf.sparse_tensor_to_dense(tf.string_split(ad_info, "#"), default_value="0")
test_info = tf.string_to_number(test_info, out_type=tf.int32)
feature_dict = {"user_id": uid, "ad_info": test_info}
print("feature_dict=", feature_dict)
return feature_dict
files, cnt = get_files(FLAGS.predict_path)
predict_dataset = tf.data.TFRecordDataset(files).map(lambda x: parse_predict_record(x)) \
.padded_batch(FLAGS.batch_size, padded_shapes={'user_id': [], 'ad_info': [None, None]}) \
.prefetch(32)
iterator = predict_dataset.make_one_shot_iterator()
return iterator.get_next()
code for prediction:
print("start predict")
result = model.predict(input_fn=predict_input_fn,
hooks=[tf.train.LoggingTensorHook([ 'user_id', 'ad_info', 'predict_id'], every_n_iter=500)])
prediction_res = []
for prediction in result:
# print("predictions=", prediction)
user_id = prediction['user_id']
predict_label = prediction['predict_label']
print("usre_id=", user_id)
print("predict_label=", predict_label)
I've got the answer,
this worked for me:
checkpoint_path = model.latest_checkpoint()
print("checkpoint_path=", checkpoint_path)
result = model.predict(input_fn=predict_input_fn,
hooks=[tf.train.LoggingTensorHook([ 'user_id', 'ad_info',
'predict_id'], every_n_iter=1000)],
checkpoint_path=checkpoint_path)
prediction_res = []
for prediction in result:
# print("predictions=", prediction)
user_id = prediction['user_id']
predict_label = prediction['predict_label']

Expected to see 3 array(s), but instead got the following list of 1 arrays:

I am trying to train a triple loss model using a fit_generator. it requires three input and no output. so i have a function that generates hard triplets. the output from the triplets generator has a shape of (3,5,279) which is 3 inputs(anchor,positive and negative) for 5 batches and a total of 279 features. When i run the fit_generator it throws this error that "the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 3 array(s), but instead got the following list of 1 arrays" meanwhile i have passed a list of three arrays. the code is below. it works when i use the fit, however, i want to always call the generator function to generate my triplets as my batches. thanks in advance..this has taken me three days
def load_data():
path = "arrhythmia_data.txt"
f = open( path, "r")
data = []
#remove line breaker, comma separate and store in array
for line in f:
line = line.replace('\n','').replace('?','0')
line = line.split(",")
data.append(line)
f.close()
data = np.array(data).astype(np.float64)
#print(data.shape)
#create the class labels for input data
Y_train = data[:,-1:]
train = data[:,:-1]
normaliser = preprocessing.MinMaxScaler()
train = normaliser.fit_transform(train)
val = train[320:,:]
train = train[:320,:]
#create one hot encoding of the class labels of the data and separate them into train and test data
lb = LabelBinarizer()
encode = lb.fit_transform(Y_train)
nb_classes = int(len(encode[0]))
#one_hot_labels = keras.utils.to_categorical(labels, num_classes=10) this could also be used for one hot encoding
Y_val_e = encode[320:,:]
Y_train_e = encode[:320,:]
print(Y_train_e[0])
print(np.argmax(Y_train_e[0]))
val_in = []
train_in = []
#grouping and sorting the input data based on label id or name
for n in range(nb_classes):
images_class_n = np.asarray([row for idx,row in enumerate(train) if np.argmax(Y_train_e[idx])==n])
train_in.append(images_class_n)
images_class_n = np.asarray([row for idx,row in enumerate(val) if np.argmax(Y_val_e[idx])==n])
val_in.append(images_class_n)
#print(train_in[0].shape)
return train_in,val_in,Y_train_e,Y_val_e,nb_classes
train_in,val,Y_train,Y_val,nb_classes = load_data()
input_shape = (train_in[0].shape[1],)
def build_network(input_shape , embeddingsize):
'''
Define the neural network to learn image similarity
Input :
input_shape : shape of input images
embeddingsize : vectorsize used to encode our picture
'''
#in_ = Input(train.shape)
net = Sequential()
net.add(Dense(128, activation='relu', input_shape=input_shape))
net.add(Dense(128, activation='relu'))
net.add(Dense(256, activation='relu'))
net.add(Dense(4096, activation='sigmoid'))
net.add(Dense(embeddingsize, activation= None))
#Force the encoding to live on the d-dimentional hypershpere
net.add(Lambda(lambda x: K.l2_normalize(x,axis=-1)))
return net
class TripletLossLayer(Layer):
def __init__(self, alpha, **kwargs):
self.alpha = alpha
super(TripletLossLayer, self).__init__(**kwargs)
def triplet_loss(self, inputs):
anchor, positive, negative = inputs
p_dist = K.sum(K.square(anchor-positive), axis=-1)
n_dist = K.sum(K.square(anchor-negative), axis=-1)
return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
def call(self, inputs):
loss = self.triplet_loss(inputs)
self.add_loss(loss)
return loss
def build_model(input_shape, network, margin=0.2):
'''
Define the Keras Model for training
Input :
input_shape : shape of input images
network : Neural network to train outputing embeddings
margin : minimal distance between Anchor-Positive and Anchor-Negative for the lossfunction (alpha)
'''
# Define the tensors for the three input images
anchor_input = Input(input_shape, name="anchor_input")
positive_input = Input(input_shape, name="positive_input")
negative_input = Input(input_shape, name="negative_input")
# Generate the encodings (feature vectors) for the three images
encoded_a = network(anchor_input)
encoded_p = network(positive_input)
encoded_n = network(negative_input)
#TripletLoss Layer
loss_layer = TripletLossLayer(alpha=margin,name='triplet_loss_layer')([encoded_a,encoded_p,encoded_n])
# Connect the inputs with the outputs
network_train = Model(inputs=[anchor_input,positive_input,negative_input],outputs=loss_layer)
# return the model
return network_train
def get_batch_random(batch_size,s="train"):
# initialize result
triplets=[np.zeros((batch_size,m)) for i in range(3)]
for i in range(batch_size):
#Pick one random class for anchor
anchor_class = np.random.randint(0, nb_classes)
nb_sample_available_for_class_AP = X[anchor_class].shape[0]
#Pick two different random pics for this class => A and P. You can use same anchor as P if there is one one element for anchor
if nb_sample_available_for_class_AP<=1:
continue
[idx_A,idx_P] = np.random.choice(nb_sample_available_for_class_AP,size=2 ,replace=False)
#Pick another class for N, different from anchor_class
negative_class = (anchor_class + np.random.randint(1,nb_classes)) % nb_classes
nb_sample_available_for_class_N = X[negative_class].shape[0]
#Pick a random pic for this negative class => N
idx_N = np.random.randint(0, nb_sample_available_for_class_N)
triplets[0][i,:] = X[anchor_class][idx_A,:]
triplets[1][i,:] = X[anchor_class][idx_P,:]
triplets[2][i,:] = X[negative_class][idx_N,:]
return np.array(triplets)
def get_batch_hard(draw_batch_size,hard_batchs_size,norm_batchs_size,network,s="train"):
if s == 'train':
X = train_in
else:
X = val
#m, features = X[0].shape
#while True:
#Step 1 : pick a random batch to study
studybatch = get_batch_random(draw_batch_size,X)
#Step 2 : compute the loss with current network : d(A,P)-d(A,N). The alpha parameter here is omited here since we want only to order them
studybatchloss = np.zeros((draw_batch_size))
#Compute embeddings for anchors, positive and negatives
A = network.predict(studybatch[0])
P = network.predict(studybatch[1])
N = network.predict(studybatch[2])
#Compute d(A,P)-d(A,N)
studybatchloss = np.sum(np.square(A-P),axis=1) - np.sum(np.square(A-N),axis=1)
#Sort by distance (high distance first) and take the
selection = np.argsort(studybatchloss)[::-1][:hard_batchs_size]
#Draw other random samples from the batch
selection2 = np.random.choice(np.delete(np.arange(draw_batch_size),selection),norm_batchs_size,replace=False)
selection = np.append(selection,selection2)
triplets = [studybatch[0][selection,:], studybatch[1][selection,:],studybatch[2][selection,:]]
triplets = triplets.reshape(triplets.shape[0],triplets.shape[1],triplets.shape[2])
yield triplets
network = build_network(input_shape,embeddingsize=10)
hard = get_batch_hard(5,4,1,network,s="train")
network_train = build_model(input_shape,network)
optimizer = Adam(lr = 0.00006)
network_train.compile(loss=None,optimizer=optimizer)
#this works
#history = network_train.fit(hard,epochs=100,steps_per_epoch=1, verbose=2)
history = network_train.fit_generator(hard,epochs=10,steps_per_epoch=16, verbose=2)
# error:: the list of Numpy arrays that you are passing to your model is not the size the model
expected. Expected to see 3 array(s), but instead got the following list of 1 arrays:
I think that's beacause in your generator you are yielding the 3 inputs array in one list, you need to yield the 3 arrays independently:
triplet_1 = studybatch[0][selection,:]
triplet_2 = studybatch[1][selection,:]
triplet_3 = studybatch[2][selection,:]
yield [triplet_1, triplet_2, triplet_3]