I create a custom image data set like:
from torch.utils.data.dataset import Dataset
from PIL import Image
import torchvision
from torchvision import datasets, models, transforms
import numpy as np
class MyCustomDataset(Dataset):
def __init__(self, df, transforms=None):
"""
Args:
df (pandas.DataFrame): path to images and in dataframe
transforms: pytorch transforms for transforms and tensor conversion
"""
# Transforms
self.transforms = transforms
# Read the dataframe
self.data_info = df
# First column contains the image paths
self.image_arr = np.asarray(self.data_info.iloc[:, 0])
# Second column is the labels
self.label_arr = np.asarray(self.data_info.iloc[:, 1])
# Calculate len
self.data_len = len(self.data_info.index)
def __getitem__(self, index):
# Get image name from the pandas df
single_image_name = self.image_arr[index]
# Open image
img_as_img = Image.open(single_image_name)
img_as_tensor= self.transforms(img_as_img)
# Get label(class) of the image based on the cropped pandas column
single_image_label = self.label_arr[index]
return (img_as_tensor, single_image_label)
def __len__(self):
return self.data_len
The input of df for MyCustomDataset(df, transforms) is pandas dataframe storing images' paths and labels look like below:
file_name label
0 M:\RealModels\images\001\001001.png 0
1 M:\RealModels\images\001\002001.png 0
2 M:\RealModels\images\001\003001.png 0
3 M:\RealModels\images\001\004001.png 0
4 M:\RealModels\images\001\006001.png 0
... ... ...
3197 M:\RenderedModels\images_rgb\450\116450.png 45
3198 M:\RenderedModels\images_rgb\450\117450.png 45
3199 M:\RenderedModels\images_rgb\450\118450.png 45
3200 M:\RenderedModels\images_rgb\450\119450.png 45
3201 M:\RenderedModels\images_rgb\450\120450.png 45
3202 rows × 2 columns
There are 16 classes in my dataset. The classes lables are like:['00', '01', '12', '34','35'...,'45']
My whole program is:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import time
import os
import copy
import scipy
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data.dataset import Dataset
import torchvision
from torchvision import datasets, models, transforms
# In[2]:
import sys
sys.path.append(r"M:\program\pytorch\Scripts")
import custom_fun
from custom_fun import custom_dataset
import create_folder
from create_folder import create_tb_folder
import dataset_from_image
from dataset_from_image import MyCustomDataset
# In[3]:
tb_dir = r'../'
path = create_tb_folder(tb_dir)
# In[4]:
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
# In[5]:
data_transforms = {
'train': transforms.Compose([
transforms.RandomRotation(degrees=15),
transforms.Resize((224,224)), # 299 for Inception v3
transforms.ColorJitter(),
transforms.ToTensor(),
transforms.Normalize(mean, std)
]),
'val': transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize(mean, std)
]),
}
# In[6]:
data_dir = r'M:\dataset\first_att'
# In[7]:
batch_size = 4
lr = 0.003
# In[8]:
data = custom_dataset(data_dir,0.8)
# In[37]:
df = data['train']
df.head()
# In[9]:
image_datasets = {x: MyCustomDataset(data[x], data_transforms[x]) for x in ['train', 'val']}
# In[24]:
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
shuffle=True, num_workers=0)
for x in ['train', 'val']}
# In[25]:
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
device = torch.device("cpu")
# In[26]:
# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
print(inputs.shape,classes.shape)
print(classes)
# Make a grid from batch
out = torchvision.utils.make_grid(inputs)
# In[27]:
tb1 = SummaryWriter(path[1])
tb2 = SummaryWriter(path[2])
# In[28]:
def train_model(model, criterion, optimizer, scheduler, num_epochs):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
optimizer.zero_grad()
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / dataset_sizes[phase] # images of training data
epoch_acc = running_corrects.double() / dataset_sizes[phase] # images of val data
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# Tensorboard works here
if phase == 'train':
tb1.add_scalar('Loss', epoch_loss, epoch)
tb1.add_scalar('Accuracy', epoch_acc, epoch)
for name, weight in model.named_parameters():
tb.add_histogram(name, weight, epoch)
tb.add_histogram(f'{name}.grad', weight.grad, epoch)
else:
tb2.add_scalar('Loss', epoch_loss, epoch)
tb2.add_scalar('Accuracy', epoch_acc, epoch)
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
# In[29]:
model = models.resnet18(pretrained=True)
# The way below is feature extraction.
#for param in model.parameters():
#param.requires_grad = False
# In[30]:
num_ftrs = model.fc.in_features
# In[31]:
model.fc = nn.Linear(num_ftrs, 16)
# In[32]:
model = model.to(device)
criterion = nn.CrossEntropyLoss()
# In[33]:
# Observe that all parameters are being optimized
optimizer = optim.Adam(model.parameters(), lr=lr)
# In[34]:
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
# In[35]:
tb = SummaryWriter(path[0])
grid = torchvision.utils.make_grid(inputs)
tb.add_image('images', grid)
tb.add_graph(model, inputs)
tb.close()
# In[36]:
model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=20)
# In[ ]:
torch.save(model.state_dict(), 'first_att_02.pth')
# In[ ]:
But I got the error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-36-edba91d1cb93> in <module>
----> 1 model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=20)
<ipython-input-28-662f652902cf> in train_model(model, criterion, optimizer, scheduler, num_epochs)
29 outputs = model(inputs)
30 _, preds = torch.max(outputs, 1)
---> 31 loss = criterion(outputs, labels)
32
33 # backward + optimize only if in training phase
M:\program\pytorch\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
M:\program\pytorch\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
914 def forward(self, input, target):
915 return F.cross_entropy(input, target, weight=self.weight,
--> 916 ignore_index=self.ignore_index, reduction=self.reduction)
917
918
M:\program\pytorch\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2019 if size_average is not None or reduce is not None:
2020 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2021 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2022
2023
M:\program\pytorch\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1836 .format(input.size(0), target.size(0)))
1837 if dim == 2:
-> 1838 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
1839 elif dim == 4:
1840 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
IndexError: Target 40 is out of bounds.
Could you let me know where I am wrong? Thanks very much.
Related
I am following TensorFlow’s tutorial on time series forecasting. I created and saved the model like in this tutorial. There are many examples in the manual for learning, but few uses of it.
How can I use the saved model in another script? How can I predict temperature, e.g., “01.01.2017 00:10:00”? How can I get the temperature value in a normal format?
Example code from the tutorial:
from datetime import datetime
import time
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import Adam, RMSprop
import os
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
zip_path = tf.keras.utils.get_file(
origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
fname='jena_climate_2009_2016.csv.zip',
extract=True)
csv_path, _ = os.path.splitext(zip_path)
df = pd.read_csv(csv_path)
# Slice [start:stop:step], starting from index 5 take every 6th record.
df = df[5::6]
date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')
wv = df['wv (m/s)']
bad_wv = wv == -9999.0
wv[bad_wv] = 0.0
max_wv = df['max. wv (m/s)']
bad_max_wv = max_wv == -9999.0
max_wv[bad_max_wv] = 0.0
# The above inplace edits are reflected in the DataFrame.
df['wv (m/s)'].min()
wv = df.pop('wv (m/s)')
max_wv = df.pop('max. wv (m/s)')
# Convert to radians.
wd_rad = df.pop('wd (deg)')*np.pi / 180
# Calculate the wind x and y components.
df['Wx'] = wv*np.cos(wd_rad)
df['Wy'] = wv*np.sin(wd_rad)
# Calculate the max wind x and y components.
df['max Wx'] = max_wv*np.cos(wd_rad)
df['max Wy'] = max_wv*np.sin(wd_rad)
timestamp_s = date_time.map(pd.Timestamp.timestamp)
day = 24*60*60
year = (365.2425)*day
df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))
fft = tf.signal.rfft(df['T (degC)'])
f_per_dataset = np.arange(0, len(fft))
n_samples_h = len(df['T (degC)'])
hours_per_year = 24*365.2524
years_per_dataset = n_samples_h/(hours_per_year)
# f_per_year = f_per_dataset/years_per_dataset
# plt.step(f_per_year, np.abs(fft))
# plt.xscale('log')
# plt.ylim(0, 400000)
# plt.xlim([0.1, max(plt.xlim())])
# plt.xticks([1, 365.2524], labels=['1/Year', '1/day'])
# _ = plt.xlabel('Frequency (log scale)')
column_indices = {name: i for i, name in enumerate(df.columns)}
n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]
num_features = df.shape[1]
train_mean = train_df.mean()
train_std = train_df.std()
train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std
df_std = (df - train_mean) / train_std
# df_std = df_std.melt(var_name='Column', value_name='Normalized')
# plt.figure(figsize=(12, 6))
# ax = sns.violinplot(x='Column', y='Normalized', data=df_std)
# _ = ax.set_xticklabels(df.keys(), rotation=90)
# //////////////////////////////////////////////////////////////////////////////////////////////
class WindowGenerator():
def __init__(self, input_width, label_width, shift,
train_df=train_df, val_df=val_df, test_df=test_df,
label_columns=None):
# Store the raw data.
self.train_df = train_df
self.val_df = val_df
self.test_df = test_df
# Work out the label column indices.
self.label_columns = label_columns
if label_columns is not None:
self.label_columns_indices = {name: i for i, name in
enumerate(label_columns)}
self.column_indices = {name: i for i, name in
enumerate(train_df.columns)}
# Work out the window parameters.
self.input_width = input_width
self.label_width = label_width
self.shift = shift
self.total_window_size = input_width + shift
self.input_slice = slice(0, input_width)
self.input_indices = np.arange(self.total_window_size)[self.input_slice]
self.label_start = self.total_window_size - self.label_width
self.labels_slice = slice(self.label_start, None)
self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
def __repr__(self):
return '\n'.join([
f'Total window size: {self.total_window_size}',
f'Input indices: {self.input_indices}',
f'Label indices: {self.label_indices}',
f'Label column name(s): {self.label_columns}'])
# //////////////////////////////////////////////////////////////////////////////////////////////
def split_window(self, features):
inputs = features[:, self.input_slice, :]
labels = features[:, self.labels_slice, :]
if self.label_columns is not None:
labels = tf.stack(
[labels[:, :, self.column_indices[name]] for name in self.label_columns],
axis=-1)
# Slicing doesn't preserve static shape information, so set the shapes
# manually. This way the `tf.data.Datasets` are easier to inspect.
inputs.set_shape([None, self.input_width, None])
labels.set_shape([None, self.label_width, None])
return inputs, labels
WindowGenerator.split_window = split_window
def plot(self, model=None, plot_col='T (degC)', max_subplots=3, num=None):
inputs, labels = self.example
plt.figure(figsize=(12, 8), num=num)
plot_col_index = self.column_indices[plot_col]
max_n = min(max_subplots, len(inputs))
for n in range(max_n):
plt.subplot(max_n, 1, n+1)
plt.ylabel(f'{plot_col} [normed]')
plt.plot(self.input_indices, inputs[n, :, plot_col_index],
label='Inputs', marker='.', zorder=-10)
if self.label_columns:
label_col_index = self.label_columns_indices.get(plot_col, None)
else:
label_col_index = plot_col_index
if label_col_index is None:
continue
plt.scatter(self.label_indices, labels[n, :, label_col_index],
edgecolors='k', label='Labels', c='#2ca02c', s=64)
if model is not None:
predictions = model(inputs)
plt.scatter(self.label_indices, predictions[n, :, label_col_index],
marker='X', edgecolors='k', label='Predictions',
c='#ff7f0e', s=64)
if n == 0:
plt.legend()
plt.xlabel('Time [h]')
WindowGenerator.plot = plot
def make_dataset(self, data):
data = np.array(data, dtype=np.float32)
ds = tf.keras.utils.timeseries_dataset_from_array(
data=data,
targets=None,
sequence_length=self.total_window_size,
sequence_stride=1,
shuffle=True,
batch_size=32,)
ds = ds.map(self.split_window)
return ds
WindowGenerator.make_dataset = make_dataset
#property
def train(self):
return self.make_dataset(self.train_df)
#property
def val(self):
return self.make_dataset(self.val_df)
#property
def test(self):
return self.make_dataset(self.test_df)
#property
def example(self):
"""Get and cache an example batch of `inputs, labels` for plotting."""
result = getattr(self, '_example', None)
if result is None:
# No example batch was found, so get one from the `.train` dataset
result = next(iter(self.train))
# And cache it for next time
self._example = result
return result
WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example
single_step_window = WindowGenerator(
input_width=1, label_width=1, shift=1,
label_columns=['T (degC)'])
MAX_EPOCHS = 20
def compile_and_fit(model, window, patience=2, save=False):
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
patience=patience,
mode='min')
model.compile(loss=tf.keras.losses.MeanSquaredError(),
optimizer=tf.keras.optimizers.Adam(),
metrics=[tf.keras.metrics.MeanAbsoluteError()])
history = model.fit(window.train, epochs=MAX_EPOCHS,
validation_data=window.val,
callbacks=[early_stopping])
if save == True:
model.save('./saved_model')
return history
dense = tf.keras.Sequential([
tf.keras.layers.Dense(units=64, activation='relu'),
tf.keras.layers.Dense(units=64, activation='relu'),
tf.keras.layers.Dense(units=1),
])
history = compile_and_fit(dense, single_step_window, save=True)
val_performance = {}
performance = {}
val_performance['Dense'] = dense.evaluate(single_step_window.val)
performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0)
dense.summary()
Next I need to load the model and use:
from datetime import datetime
import time
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import Adam, RMSprop
import os
import matplotlib.pyplot as plt
import seaborn as sns
loaded_model = tf.keras.models.load_model('./saved_model')
loaded_model.summary()
What's next?
There are many ways you may consider.
model.save() and model.load()
model.save_weights() and model.load_wights()
Export parameters as arrays. We discussed here no need for target API install. You can use any random function from the target machine with the parameter.
Transform and copy the working directory
etc.
Time conversation. You know that the database has unique time units, Oracle, Informix, and SQL. They can display as unix_time or binary sequences as designed in many applications. The unix_time in the communications response with a clock or response time to convert per security method if any. You need to synchronize the clock and response to the correct binary format or time format for extracting data when request or response information extraction, number as binary, and then string exports.
TAI (17 September 2004) UTC (16 to 17 September 2004) [Unix time][1]
2004-09-17T00:00:30.75 2004-09-16T23:59:58.75 1095379198.75
Sample: Export trained parameters from the model. It is easy to print out at any of the steps, but working coefficients respond to the target environment with significant feedback.
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Callback
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class custom_callback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if( logs['accuracy'] >= 0.95 ):
self.model.stop_training = True
def on_epoch_end(self, epoch, logs=None):
print( model.get_weights()[0].shape ) # (3, 3, 4, 32)
print( model.get_weights()[1].shape ) # (32,)
custom_callback = custom_callback()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit( dataset, batch_size=100, epochs=10000, callbacks=[custom_callback] )
I am using the following code (once on CPU (without the "disable_eager_execution" part)) and yet once more with GPU.
On the CPU training, one epoch takes 12 Hours but the loss changes from batch to batch and I see that training takes place.
On the GPU version. Nothing happens. Training one epoch takes around 1 Hour but loss and accuracy stay the same.
PLEASE HELP ME UNDERSTAND WHAT DO I DO WRONG...
I am running this code using aws sage maker (ml.g4dn.4xlarge)
Code:
import numpy as np
import pandas as pd
import os
import datetime
import tensorflow as tf
import re
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.python.framework.ops import disable_eager_execution
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')), '\n')
disable_eager_execution()
# Read data
# read dictionaries:
# company_dict:
company_df = pd.read_csv("/home/ec2-user/SageMaker/company_similarity/data/company_dict.csv", sep='\t', header=None)
company_df.columns = ['company_id', 'idx']
# payee dict
payee_df = pd.read_csv("/home/ec2-user/SageMaker/company_similarity/data/cleaned_up_payee_dict.csv", sep='\t', header=None)
payee_df.columns = ['payee', 'idx']
# Read raw data
BATCH_SIZE = 32
raw_data = tf.data.experimental.make_csv_dataset(
"/home/ec2-user/SageMaker/company_similarity/data/training_data.csv",
column_names=['company_id', 'payee', 'label'],
select_columns=['company_id', 'payee', 'label'],
field_delim='\t',
column_defaults=[tf.string, tf.string, tf.int32],
batch_size=BATCH_SIZE,
label_name='label',
na_value="?",
num_epochs=1,
ignore_errors=True,
)
class PreprocessingFeatures(object):
def __init__(self, company_idx, payee_idx):
self.payee_idx = payee_idx
self.company_idx = company_idx
self.symbols = '!"$%&\'\?()*+,-./:;<=>?[\\]^_`{|}~a-zA-Z0-9 '
self.payee_lookup = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=payee_idx,
mask_token=None,
num_oov_indices=1
)
self.company_lookup = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=company_idx,
mask_token=None,
num_oov_indices=1
)
def __call__(self, features, labels):
payee = self.payee_lookup(features['payee'])
company = self.company_lookup(features['company_id'])
return (company, payee), labels
payee_list = list(payee_df['payee'])
company_list = [str(si) for si in list(company_df['company_id'])]
# ************ START TRAINING ************ #
log_dir = '/home/ec2-user/SageMaker/company_similarity/models/logs/fit/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
checkpoint_dir = '/home/ec2-user/SageMaker/company_similarity/models/embedding_checkpoints'
file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
file_writer.set_as_default()
def present_topK(model, listA, item_title='eBay', topK=10):
'''
show top 10 similar items using model embedding
:param model: the actual model
:param item_index: dictionary with item name:index format
:param index_item: dictionary with index:item name format
:param item_title: title text
:return: table to print (string)
'''
assert item_title in listA, "Item not in Vocabulary"
emb = model.layers[2].get_weights()[0]
# we started from 1 not zero on dictionary
score = cosine_similarity(emb[listA.index(item_title)+1].reshape(1, -1), emb)[0]
similar_items = np.argsort(score)[::-1][:topK]
res = {'payee': [], 'score': []}
for i in similar_items:
res['payee'] += [listA[i-1]]
res['score'] += [score[i]]
return "\n".join("{}\t{}".format(k, v) for k, v in res.items())
class GenerateExamplesCallback(tf.keras.callbacks.Callback):
def __init__(self):
self.step = 0
def on_epoch_end(self, epoch, logs=None):
self.step += 1
self.model.save('/home/ec2-user/SageMaker/company_similarity/models/embedding_checkpoints/model_{}'.format(epoch))
sim_table = present_topK(self.model, payee_list)
print("\nSimilar Items to 'eBay': ", sim_table)
with file_writer.as_default():
tf.summary.text('Similarity sanity check', data=tf.convert_to_tensor(sim_table), step=epoch)
def on_batch_end(self, batch, logs=None):
if batch % 1000 == 0:
sim_table = present_topK(self.model, payee_list)
print("\nSimilar Items to 'eBay': ", sim_table)
with file_writer.as_default():
tf.summary.text('Similarity sanity check', data=tf.convert_to_tensor(sim_table), step=batch)
print('TensorBoard logging folder: ', log_dir)
print("checkpoint_dir:", checkpoint_dir)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, save_weights_only=True, save_freq=200000)
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, update_freq=10000)
callbacks = [checkpoint_callback, tensorboard_callback, GenerateExamplesCallback()]
# read the data
# train_data = raw_data.map(PreprocessingFeatures(company_list, payee_list)).shuffle(buffer_size=10000).repeat()
train_data = raw_data.map(PreprocessingFeatures(company_list, payee_list)).repeat()
# examples
# next(iter(raw_data.take(1)))
# next(iter(train_data))
# wc -l <filename> on terminal
fileLen = 5851184
STEPS_PER_EPOCH = (fileLen // BATCH_SIZE) + 1
# STEPS_PER_EPOCH = 1000
def build_model(company_embedding=128, payee_embedding=128, loss=tf.keras.losses.binary_crossentropy):
company_input = tf.keras.layers.Input(name='company_input', shape=(1,))
payee_input = tf.keras.layers.Input(name='payee_input', shape=(1,))
company_emb = tf.keras.layers.Embedding(name='company_embedding',
input_dim=len(company_list)+1,
output_dim=company_embedding)(company_input)
company_emb = tf.keras.layers.Flatten()(company_emb)
payee_emb = tf.keras.layers.Embedding(name='payee_embedding',
input_dim=len(payee_list)+1,
output_dim=payee_embedding)(payee_input)
payee_emb = tf.keras.layers.Flatten()(payee_emb)
merged = tf.keras.layers.Dot(name='dot', normalize=True, axes=1)([payee_emb, company_emb])
merged = tf.keras.layers.Reshape(target_shape = [1])(merged)
x = tf.keras.layers.Dense(1, activation='sigmoid')(merged)
# x = tf.keras.layers.Concatenate()([item_emb, device_emb])
# x = tf.keras.layers.Dense(128, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
# x = tf.keras.layers.Dense(64, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
# x = tf.keras.layers.Dense(32, activation='relu')(x)
# # x = tf.keras.layers.BatchNormalization()(x)
# x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=(company_input, payee_input), outputs=x)
model.compile(
loss=loss,
optimizer='adam',
metrics=['accuracy'],
)
return model
model = build_model()
EPOCHS = 5
r = model.fit(train_data,
epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
callbacks=callbacks
)
model.save("/home/ec2user/SageMaker/company_similarity/models/models/embedding_model_final.h5")
print("Training is completed")
Dataset.py
import os
import random
from skimage import io
import cv2
from skimage.transform import resize
import numpy as np
import tensorflow as tf
import keras
import Augmentor
def iter_sequence_infinite(seq):
"""Iterate indefinitely over a Sequence.
# Arguments
seq: Sequence object
# Returns
Generator yielding batches.
"""
while True:
for item in seq:
yield item
# data generator class
class DataGenerator(keras.utils.Sequence):
def __init__(self, ids, imgs_dir, masks_dir, batch_size=10, img_size=128, n_classes=1, n_channels=3, shuffle=True):
self.id_names = ids
self.indexes = np.arange(len(self.id_names))
self.imgs_dir = imgs_dir
self.masks_dir = masks_dir
self.batch_size = batch_size
self.img_size = img_size
self.n_classes = n_classes
self.n_channels = n_channels
self.shuffle = shuffle
self.on_epoch_end()
# for printing the statistics of the function
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.id_names))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation__(self, id_name):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
# Initialization
img_path = os.path.join(self.imgs_dir, id_name) # polyp segmentation/images/id_name.jpg
mask_path = os.path.join(self.masks_dir, id_name) # polyp segmenatation/masks/id_name.jpg
img = io.imread(img_path)
mask = cv2.imread(mask_path)
p = Augmentor.DataPipeline([[img, mask]])
p.resize(probability=1.0, width=self.img_size, height=self.img_size)
p.rotate_without_crop(probability=0.3, max_left_rotation=10, max_right_rotation=10)
#p.random_distortion(probability=0.3, grid_height=10, grid_width=10, magnitude=1)
p.shear(probability=0.3, max_shear_left=1, max_shear_right=1)
#p.skew_tilt(probability=0.3, magnitude=0.1)
p.flip_random(probability=0.3)
sample_p = p.sample(1)
sample_p = np.array(sample_p).squeeze()
p_img = sample_p[0]
p_mask = sample_p[1]
augmented_mask = (p_mask // 255) * 255 # denoising
q = Augmentor.DataPipeline([[p_img]])
q.random_contrast(probability=0.3, min_factor=0.2, max_factor=1.0) # low to High
q.random_brightness(probability=0.3, min_factor=0.2, max_factor=1.0) # dark to bright
sample_q = q.sample(1)
sample_q = np.array(sample_q).squeeze()
image = sample_q
mask = augmented_mask[::, ::, 0]
"""
# reading the image from dataset
## Reading Image
image = io.imread(img_path) # reading image to image vaiable
image = resize(image, (self.img_size, self.img_size), anti_aliasing=True) # resizing input image to 128 * 128
mask = io.imread(mask_path, as_gray=True) # mask image of same size with all zeros
mask = resize(mask, (self.img_size, self.img_size), anti_aliasing=True) # resizing mask to fit the 128 * 128 image
mask = np.expand_dims(mask, axis=-1)
"""
# image normalization
image = image / 255.0
mask = mask / 255.0
return image, mask
def __len__(self):
"Denotes the number of batches per epoch"
return int(np.floor(len(self.id_names) / self.batch_size))
def __getitem__(self, index): # index : batch no.
# Generate indexes of the batch
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
batch_ids = [self.id_names[k] for k in indexes]
imgs = list()
masks = list()
for id_name in batch_ids:
img, mask = self.__data_generation__(id_name)
imgs.append(img)
masks.append(np.expand_dims(mask,-1))
imgs = np.array(imgs)
masks = np.array(masks)
return imgs, masks # return batch
train.py
import argparse
import logging
import os
import sys
from tqdm import tqdm # progress bar
import numpy as np
import matplotlib.pyplot as plt
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import segmentation_models as sm
from segmentation_models.utils import set_trainable
from dataset import DataGenerator, iter_sequence_infinite
def train_model(model, train_gen, valid_gen, epochs, save_cp=True):
total_batch_count = 0
train_img_num = len(train_gen.id_names)
train_batch_num = len(train_gen)
train_gen_out = iter_sequence_infinite(train_gen)
valid_batch_num = len(valid_gen)
valid_img_num = len(valid_gen.id_names)
valid_gen_out = iter_sequence_infinite(valid_gen)
for epoch in range(epochs): # interation as many epochs
set_trainable(model)
epoch_loss = 0 # loss in this epoch
epoch_iou = 0
count = 0
with tqdm(total=train_img_num, desc=f'Epoch {epoch + 1}/{epochs}', position=0, leave=True, unit='img') as pbar: # make progress bar
for _ in range(train_batch_num):
batch = next(train_gen_out)
imgs = batch[0]
true_masks = batch[1]
loss, iou = model.train_on_batch(imgs, true_masks) # value of loss of this batch
epoch_loss += loss
epoch_iou += iou
pbar.set_postfix(**{'Batch loss': loss, 'Batch IoU': iou}) # floating the loss at the post in the pbar
pbar.update(imgs.shape[0]) # update progress
count += 1
total_batch_count += 1
train_gen.on_epoch_end()
print( "Epoch : loss: {}, IoU : {}".format(epoch_loss/count, epoch_iou/count))
# Do validation
validation_model(model, valid_gen_out, valid_batch_num, valid_img_num)
valid_gen.on_epoch_end()
if save_cp:
try:
if not os.path.isdir(checkpoint_dir):
os.mkdir(checkpoint_dir)
logging.info('Created checkpoint directory')
else:
pass
except OSError:
pass
model.save_weights(os.path.join(checkpoint_dir , f'CP_epoch{epoch + 1}.h5'))
logging.info(f'Checkpoint {epoch + 1} saved !')
def validation_model(model, valid_gen_out, valid_batch_num, valid_img_num):
epoch_loss = 0 # loss in this epoch
epoch_iou = 0
count = 0
with tqdm(total=valid_img_num, desc='Validation round', position=0, leave=True, unit='img') as pbar: # make progress bar
for _ in range(valid_batch_num):
batch = next(valid_gen_out)
imgs = batch[0]
true_masks = batch[1]
loss, iou = model.test_on_batch(imgs, true_masks) # value of loss of this batch
epoch_loss += loss
epoch_iou += iou
pbar.set_postfix(**{'Batch, loss': loss, 'Batch IoU': iou}) # floating the loss at the post in the pbar
pbar.update(imgs.shape[0]) # update progress
count += 1
print("Validation loss: {}, IoU: {}".format(epoch_loss / count, epoch_iou / count))
pred_mask = model.predict(np.expand_dims(imgs[0],0))
plt.subplot(131)
plt.imshow(imgs[0])
plt.subplot(132)
plt.imshow(true_masks[0].squeeze(), cmap="gray")
plt.subplot(133)
plt.imshow(pred_mask.squeeze(), cmap="gray")
plt.show()
print()
def get_args():
parser = argparse.ArgumentParser(description='Train the UNet on images and target masks',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-e', '--epochs', metavar='E', type=int, default=50,
help='Number of epochs', dest='epochs')
parser.add_argument('-b', '--batch_size', metavar='B', type=int, nargs='?', default=2,
help='Batch size', dest='batch_size')
parser.add_argument('-l', '--learning-rate', metavar='LR', type=float, nargs='?', default=1e-5,
help='Learning rate', dest='lr')
parser.add_argument('-bb', '--backbone', default='resnet50', metavar='FILE',
help="backcone name")
parser.add_argument('-w', '--weight', dest='load', type=str, default=False,
help='Load model from a .h5 file')
parser.add_argument('-s', '--resizing', dest='resizing', type=int, default=384,
help='Downscaling factor of the images')
parser.add_argument('-v', '--validation', dest='val', type=float, default=20.0,
help='Percent of the data that is used as validation (0-100)')
return parser.parse_args()
if __name__ == '__main__':
img_dir = './data/train/imgs/' # ./data/train/imgs/CVC_Original/'
mask_dir = './data/train/masks/' # ./data/train/masks/CVC_Ground Truth/'
checkpoint_dir = './checkpoints'
args = get_args()
# train path
train_ids = os.listdir(img_dir)
# Validation Data Size
n_val = int(len(train_ids) * args.val/100) # size of validation set
valid_ids = train_ids[:n_val] # list of image ids used for validation of result 0 to 9
train_ids = train_ids[n_val:] # list of image ids used for training dataset
# print(valid_ids, "\n\n")
print("training_size: ", len(train_ids), "validation_size: ", len(valid_ids))
train_gen = DataGenerator(train_ids, img_dir, mask_dir, img_size=args.resizing, batch_size=args.batch_size)
valid_gen = DataGenerator(valid_ids, img_dir, mask_dir, img_size=args.resizing, batch_size=args.batch_size)
print("total training batches: ", len(train_gen))
print("total validaton batches: ", len(valid_gen))
train_steps = len(train_ids) // args.batch_size
valid_steps = len(valid_ids) // args.batch_size
# define model
model = sm.Unet(args.backbone, encoder_weights='imagenet')
optimizer = optimizers.Adam(lr=args.lr, decay=1e-4)
model.compile(
optimizer=optimizer,
# "Adam",
loss=sm.losses.bce_dice_loss, # sm.losses.bce_jaccard_loss, # sm.losses.binary_crossentropy,
metrics=[sm.metrics.iou_score],
)
#model.summary()
callbacks = [
EarlyStopping(patience=6, verbose=1),
ReduceLROnPlateau(factor=0.1, patience=3, min_lr=1e-7, verbose=1),
ModelCheckpoint('./weights.Epoch{epoch:02d}-Loss{loss:.3f}-VIou{val_iou_score:.3f}.h5', verbose=1,
monitor='val_accuracy', save_best_only=True, save_weights_only=True)
]
train_model(model=model, train_gen=train_gen, valid_gen=valid_gen, epochs=args.epochs)
When I try to run this code, some epochs are well progressed but, in 20epochs, it occurs gpu memory overflow error like below
(0) Resource exhausted: OOM when allocating tensor with shape[2,64,96,96] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[{{node decoder_stage2b_bn/FusedBatchNorm}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
so, I think that it is because of data generation.
This code generate batch in this order.
in train.py, initialize Datageneratr class which is sequence model that is implemented in Dataset.py
train_gen = DataGenerator(train_ids, img_dir, mask_dir, img_size=args.resizing, batch_size=args.batch_size)
valid_gen = DataGenerator(valid_ids, img_dir, mask_dir, img_size=args.resizing, batch_size=args.batch_size)
At the first in the function 'train_model' convert Datagenerator(sequence model) to generator with using function 'iter_sequence_infinite'
train_gen_out = iter_sequence_infinite(train_gen)
valid_gen_out = iter_sequence_infinite(valid_gen)
using magic-function, 'next', get batch
batch = next(train_gen_out)
I think that there will be no memory problem but it's occurred.
What is the problem and how to solve it?
Thanks.
I have developed Multivariate Input Multi-Step LSTM Time Series Forecasting Models for my dataset according to the tutorial (https://machinelearningmastery.com/how-to-develop-lstm-models-for-multi-step-time-series-forecasting-of-household-power-consumption/).
Yet, I had a very odd problem, that is, when I run code with smaller samples (50 samples for training, 10 samples for testing), the predictions are correct. but when I run the experiment with full samples (4000 samples for training, 1000 samples for testing), the predictions contain NaN values, which lead to errors.
Then, when I try scaling plus relu activation functions plus regularization as following code, I can get predictions with full samples (4000 samples for training, 1000 samples for testing), but the predictions is still not correct, I want to forecast 96 steps, but all steps I predicted is the same number.
Can you give a useful suggestion to deal with the forecast accuracy issues?
import time
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
import csv
import numpy
from sklearn.preprocessing import MinMaxScaler
from numpy import save
from timeit import default_timer as timer
def scale(train, test):
# fit scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
train = train.astype(float)
test = test.astype(float)
scaler = scaler.fit(train)
# transform train
train = train.reshape(train.shape[0], train.shape[1])
train_scaled = scaler.transform(train)
# transform test
test = test.reshape(test.shape[0], test.shape[1])
test_scaled = scaler.transform(test)
return scaler, train_scaled, test_scaled
# split a univariate dataset into train/test sets
def split_dataset(data):
# split into standard weeks
train, test = data[0:387030, 10:26], data[387030:433881, 10:26]
# train, test = data[0:4850, 10:26], data[4850:5820, 10:26]
# train, test = data[0:387030], data[387029:433880]
# restructure into windows of weekly data
# numpy.savetxt("test.csv", data[387030:433881, :], delimiter=",")
# save('test.npy', data[387030:433881, :])
scaler, train_scaled, test_scaled = scale(train, test)
train_scaled = array(split(train_scaled, len(train_scaled) / 97))
test_scaled = array(split(test_scaled, len(test_scaled) / 97))
return scaler, train_scaled, test_scaled
# create a list of configs to try
def model_configs():
# define scope of configs
# n_input = [12]
n_nodes = [100, 200, 300]
n_epochs = [50, 100]
n_batch = [64]
# n_diff = [12]
# create configs
configs = list()
# for i in n_input:
for j in n_nodes:
for k in n_epochs:
for l in n_batch:
cfg = [j, k, l]
configs.append(cfg)
print('Total configs: %d' % len(configs))
return configs
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
scores = list()
# calculate an RMSE score for each day
for i in range(0, actual.shape[1], 97):
# for i in range():
# calculate mse
mse = mean_squared_error(actual[:, i, :], predicted[:, i, :])
# calculate rmse
rmse = sqrt(mse)
# store
scores.append(rmse)
# calculate overall RMSE
s = 0
for x in range(actual.shape[0]):
for y in range(actual.shape[1]):
for z in range(actual.shape[2]):
s += (actual[x, y, z] - predicted[x, y, z])**2
score = sqrt(s / (actual.shape[0] * actual.shape[1] * actual.shape[2]))
return score, scores
# convert history into inputs and outputs
def to_supervised(train, n_steps_in, n_steps_out=97, overlop=97):
# flatten data
sequences = train.reshape(
(train.shape[0] * train.shape[1], train.shape[2]))
X, y = list(), list()
for i in range(0, len(sequences), overlop):
end_ix = i + n_steps_in
out_end_ix = end_ix + n_steps_out
# check if we are beyond the dataset
if out_end_ix > len(sequences):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
# train the model
def build_model(train, n_input, config):
# unpack config
n_nodes, n_epochs, n_batch = config
# prepare data
train_x, train_y = to_supervised(train, n_input)
# define parameters
verbose, epochs, batch_size = 0, n_epochs, n_batch
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
# reshape output into [samples, timesteps, features]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], n_features))
# define model
model = Sequential()
model.add(
LSTM(
n_nodes,
activation='relu',
input_shape=(
n_timesteps,
n_features), recurrent_dropout=0.6))
model.add(RepeatVector(n_outputs))
model.add(LSTM(n_nodes, activation='relu', return_sequences=True, recurrent_dropout=0.6))
model.add(TimeDistributed(Dense(n_nodes, activation='relu')))
model.add(TimeDistributed(Dense(n_features)))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit(
train_x,
train_y,
epochs=epochs,
batch_size=batch_size,
verbose=verbose)
return model
# make a forecast
def forecast(model, history, n_input):
# flatten data
data = array(history)
data = data.reshape((data.shape[0] * data.shape[1], data.shape[2]))
# retrieve last observations for input data
input_x = data[-n_input:, :]
# reshape into [1, n_input, n]
input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
# forecast the next week
yhat = model.predict(input_x, verbose=0)
# we only want the vector forecast
yhat = yhat[0]
return yhat
# evaluate a single model
def evaluate_model(train, test, n_input, cfg):
start = timer()
# fit model
model = build_model(train, n_input, cfg)
# print("--- %s seconds ---" % (time.time() - start_time))
# history is a list of weekly data
history = [x for x in train]
# walk-forward validation over each week
predictions = list()
for i in range(len(test)):
# predict the week
yhat_sequence = forecast(model, history, n_input)
# store the predictions
predictions.append(yhat_sequence)
# get real observation and add to history for predicting the next week
history.append(test[i, :])
# evaluate predictions days for each week
predictions = array(predictions)
# invert scaling
predictions = predictions.reshape(
(predictions.shape[0] *
predictions.shape[1],
predictions.shape[2]))
predictions = scaler.inverse_transform(predictions)
test = test.reshape((test.shape[0] * test.shape[1], test.shape[2]))
test = scaler.inverse_transform(test)
predictions = array(split(predictions, len(predictions) / 97))
test = array(split(test, len(test) / 97))
score, scores = evaluate_forecasts(test, predictions)
run_time = timer() - start
return cfg[0], cfg[1], cfg[2], score, scores, run_time
# load the new file
dataset = read_csv(
'data_preproccess_5.csv',
header=0,
index_col=0)
# split into train and test
scaler, train_scaled, test_scaled = split_dataset(dataset.values)
# evaluate model and get scores
n_input = 7 * 97
# model configs
cfg_list = model_configs()
scores = [
evaluate_model(
train_scaled,
test_scaled,
n_input,
cfg) for cfg in cfg_list]
provide some sample data
sample data
If you have multistep output, you can easily reshape your predictions and calculate it.
My splitted datasets
`trainX, trainY, testX, testY`
Get Prediction Results
`trainPredict = model.predict(trainX)`
`testPredict = model.predict(testX)`
Reshape the Predictions and Real Values
`trainY = trainY.reshape(-1, )`
`trainPredict = trainPredict.reshape(-1, )`
`testY = testY.reshape(-1, )`
`testPredict = testPredict.reshape(-1, )`
Calculate root mean squared error
`print('Train Root mean squared error: {}'.format(math.sqrt(mean_squared_error(trainY, trainPredict))))`
`print('Test Root mean squared error: {}'.format(math.sqrt(mean_squared_error(testY, testPredict))))`
I meet a really strange problem that my squared loss becomes negative. Here's my code.
#!/usr/bin/python
# -*- coding:utf8 -*-
from __future__ import print_function
from models.vgg16 import VGG16_fixed
from keras.backend.tensorflow_backend import set_session
from scipy.misc import imsave
from models.generative_model_v2 import gen_model_v2
from scripts.image_process import *
from scripts.utils_func import *
from tensorflow.python import debug as tf_debug
import tensorflow as tf
import os
import time
# configure gpu usage
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
set_session(tf.Session(config=config)) # pass gpu setting to Keras
# set learning phase, or batch norm won't work
K.set_learning_phase(1)
# dataset setting
width, height = 256, 256
coco_img_path = '../../dataset/coco/images/train2014/'
sl_img_path = './images/style/'
# a trade-off coefficient between content loss and style loss, which is multiplied with style loss
alpha = 1
# create placeholders for input images
if K.image_data_format() == 'channels_last':
content_img_shape = [width, height, 3]
style_img_shape = [width, height, 3]
else:
content_img_shape = [3, width, height]
style_img_shape = [3, width, height]
with tf.name_scope('input'):
content_img = tf.placeholder(dtype='float32',
shape=(None, content_img_shape[0], content_img_shape[1], content_img_shape[2]),
name='content_img')
style_img = tf.placeholder(dtype='float32',
shape=(None, style_img_shape[0], style_img_shape[1], style_img_shape[2]),
name='style_img')
# load model
main_model, outputs = gen_model_v2(input_content_tensor=content_img, input_style_tensor=style_img)
concact_input = K.concatenate([content_img,
outputs,
style_img], axis=0)
vgg16_model = VGG16_fixed(input_tensor=concact_input,
weights='imagenet', include_top=False)
# get the symbolic outputs of each "key" layer (we gave them unique names).
vgg16_outputs_dict = dict([(layer.name, layer.output) for layer in vgg16_model.layers])
# get relevant layers
content_feature_layers = 'block3_conv3'
style_feature_layers = ['block1_conv2', 'block2_conv2',
'block3_conv3', 'block4_conv3']
# content loss
ct_loss = K.variable(0.)
layer_features = vgg16_outputs_dict[content_feature_layers]
content_img_features = layer_features[0, :, :, :]
outputs_img_features = layer_features[1, :, :, :]
ct_loss += content_loss(content_img_features, outputs_img_features)
# style loss
sl_loss_temp = K.variable(0.)
for layer_name in style_feature_layers:
layer_features = vgg16_outputs_dict[layer_name]
outputs_img_features = layer_features[1, :, :, :]
style_img_features = layer_features[2, :, :, :]
sl = style_loss(style_img_features, outputs_img_features)
sl_loss_temp += (alpha / len(style_feature_layers)) * sl
sl_loss = sl_loss_temp
# combine loss
loss = ct_loss + sl_loss
# write in summary
tf.summary.scalar('content_loss', ct_loss)
tf.summary.scalar("style_loss", sl_loss)
tf.summary.scalar("loss", loss)
# optimization
train_op = tf.train.AdamOptimizer(learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-08).minimize(loss)
with tf.Session(config=config) as sess:
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('./logs/gen_model_v2',
sess.graph)
# initialize all variables
tf.global_variables_initializer().run()
# get training image
ct_img_name = [x for x in os.listdir(coco_img_path) if x.endswith(".jpg")]
ct_img_num = len(ct_img_name)
print("content image number: ", ct_img_num)
sl_img_name = [x for x in os.listdir(sl_img_path) if x.endswith(".jpg")]
sl_img_num = len(sl_img_name)
print("style image number: ", sl_img_num)
# start training
start_time = time.time()
for i in range(1):
itr = 0
for ct_name in ct_img_name:
if itr > 10: # used to train a small sample of ms coco
break
sl_name = sl_img_name[itr % sl_img_num]
_, loss_val, summary = sess.run([train_op, loss, merged],
feed_dict={content_img: preprocess_image(coco_img_path + ct_name, height, width),
style_img: preprocess_image(sl_img_path + sl_name, height, width)})
train_writer.add_summary(summary, itr * (i+1))
print('iteration', itr, 'loss =', loss_val)
itr += 1
end_time = time.time()
print('Training completed in %ds' % (end_time - start_time))
# save model
main_model.save('./models/gen_model_v2_1.h5')
# use images to test
test_ct_img_path = './images/content/train-1.jpg'
test_ct_img = preprocess_image(test_ct_img_path, height, width)
test_sl_img_path = './images/style/starry_night.jpg'
test_sl_img = preprocess_image(test_ct_img_path, height, width)
# feed test images into model
output = sess.run(outputs, feed_dict={content_img: test_ct_img, style_img: test_sl_img})
output = deprocess_image(output)
print('Output image shape:', output.shape[1:4])
imsave('./images/autoencoder/test_v2_1.png', output[0])
and my loss function is defined as below:
#!/usr/bin/python
# -*- coding:utf8 -*-
import numpy as np
from keras import backend as K
import tensorflow as tf
# the gram matrix of an image tensor (feature-wise outer product)
def gram_matrix(x):
assert K.ndim(x) == 3
if K.image_data_format() == 'channels_first':
features = K.batch_flatten(x)
else:
features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
gram = K.dot(features, K.transpose(features))
return gram
def style_loss(featuremap_1, featuremap_2):
assert K.ndim(featuremap_1) == 3
assert K.ndim(featuremap_2) == 3
g1 = gram_matrix(featuremap_1)
g2 = gram_matrix(featuremap_2)
channels = 3
if K.image_data_format() == 'channels_first':
size = featuremap_1.shape[1] * featuremap_1[2]
else:
size = K.shape(featuremap_1)[0] * K.shape(featuremap_1)[1]
size = K.cast(size, tf.float32)
return K.sum(K.square(g1 - g2)) / (4. * (channels ** 2) * (size ** 2))
def content_loss(base, combination):
return K.sum(K.square(combination - base))
So, you can see my loss value is squared using K.square(). How can it be a negative value?
This is the result of my code, that the loss decrease sharply, which seems impossible.
You're starting with a ct_loss as a variable. Just set it to the content loss.
ct_loss = content_loss(content_img_features, outputs_img_features)