Pytorch LSTM not using GPU - gpu

I'm trying to train a pytorch LSTM model connected with couple of MLP layers. The model is coded as follows:
class RNNBlock(nn.Module):
def __init__(self, in_dim, hidden_dim, num_layer=1, dropout=0):
super(RNNBlock, self).__init__()
self.hidden_dim = hidden_dim
self.num_layer = num_layer
self.lstm = nn.LSTM(in_dim, hidden_dim, num_layer, dropout)
def forward(self, onehot, length):
batch_size = onehot.shape[0]
h_in = nn.Parameter(torch.randn(self.num_layer, batch_size, self.hidden_dim))
c_in = nn.Parameter(torch.randn(self.num_layer, batch_size, self.hidden_dim))
packed = nn.utils.rnn.pack_padded_sequence(onehot, length, batch_first=True)
output, (h_out, c_out) = self.lstm(packed, (h_in, c_in))
unpacked, unpacked_length = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
vectors = list()
for i, vector in enumerate(unpacked):
vectors.append(unpacked[i, unpacked_length[i]-1, :].view(1, -1))
out = torch.cat(vectors, 0)
return out
class Predictor(nn.Module):
def __init__(self, in_dim, out_dim, act=None):
super(Predictor, self).__init__()
self.linear = nn.Linear(in_dim, out_dim)
nn.init.xavier_normal_(self.linear.weight)
self.activation = act
def forward(self, x):
out = self.linear(x)
if self.activation != None:
out = self.activation(out)
return out
class RNNNet(nn.Module):
def __init__(self, args):
super(RNNNet, self).__init__()
self.rnnBlock = RNNBlock(args.in_dim, args.hidden_dim, args.num_layer, args.dropout)
self.pred1 = Predictor(args.hidden_dim, args.pred_dim1, act=nn.ReLU())
self.pred2 = Predictor(args.pred_dim1, args.pred_dim2, act=nn.ReLU())
self.pred3 = Predictor(args.pred_dim2, args.out_dim)
def forward(self, onehot, length):
out = self.rnnBlock(onehot, length)
out = self.pred1(out)
out = self.pred2(out)
out = self.pred3(out)
return out
and this is my train and experiment functions
def train(model, device, optimizer, criterion, data_train, bar, args):
epoch_train_loss = 0
epoch_train_mae = 0
for i, batch in enumerate(data_train):
list_onehot = torch.tensor(batch[0]).cuda().float()
list_length = torch.tensor(batch[1]).cuda()
list_logP = torch.tensor(batch[2]).cuda().float()
# Sort onehot tensor with respect to the sequence length.
list_length, list_index = torch.sort(list_length, descending=True)
list_length.cuda()
list_index.cuda()
list_onehot = torch.Tensor([list_onehot.tolist()[i] for i in list_index]).cuda().float()
model.train()
optimizer.zero_grad()
list_pred_logP = model(list_onehot, list_length).squeeze().cuda()
list_pred_logP.require_grad = False
train_loss = criterion(list_pred_logP, list_logP)
train_mae = mean_absolute_error(list_pred_logP.tolist(), list_logP.tolist())
epoch_train_loss += train_loss.item()
epoch_train_mae += train_mae
train_loss.backward()
optimizer.step()
bar.update(len(list_onehot))
epoch_train_loss /= len(data_train)
epoch_train_mae /= len(data_train)
return model, epoch_train_loss, epoch_train_mae
def experiment(dict_partition, device, bar, args):
time_start = time.time()
model = RNNNet(args)
model.cuda()
if args.optim == 'Adam':
optimizer = optim.Adam(model.parameters(),
lr=args.lr,
weight_decay=args.l2_coef)
elif args.optim == 'RMSprop':
optimizer = optim.RMSprop(model.parameters(),
lr=args.lr,
weight_decay=args.l2_coef)
elif args.optim == 'SGD':
optimizer = optim.SGD(model.parameters(),
lr=args.lr,
weight_decay=args.l2_coef)
else:
assert False, 'Undefined Optimizer Type'
criterion = nn.MSELoss()
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)
list_train_loss = list()
list_val_loss = list()
list_train_mae = list()
list_val_mae = list()
data_train = DataLoader(dict_partition['train'], batch_size=args.batch_size, shuffle=args.shuffle)
data_val = DataLoader(dict_partition['val'], batch_size=args.batch_size, shuffle=args.shuffle)
for epoch in range(args.epoch):
scheduler.step()
model, train_loss, train_mae = train(model, device, optimizer, criterion, data_train, bar, args)
list_train_loss.append(train_loss)
list_train_mae.append(train_mae)
mode, val_loss, val_mae = validate(model, device, criterion, data_val, bar, args)
list_val_loss.append(val_loss)
list_val_mae.append(val_mae)
data_test = DataLoader(dict_partition['test'], batch_size=args.batch_size, shuffle=args.shuffle)
mae, std, logP_total, pred_logP_total = test(model, device, data_test, args)
time_end = time.time()
time_required = time_end - time_start
args.list_train_loss = list_train_loss
args.list_val_loss = list_val_loss
args.list_train_mae = list_train_mae
args.list_val_mae = list_val_mae
args.logP_total = logP_total
args.pred_logP_total = pred_logP_total
args.mae = mae
args.std = std
args.time_required = time_required
return args
The list_onehot and list_length tensors are loaded from the DataLoader and uploaded to GPU. Then, to use packed sequence as input, I’ve sorted the both list_onehot and list_length and uploaded to GPU. The model was uploaded to GPU and h_in, c_in tensors and packed sequence object were also uploaded to the GPU. However, when I try to run this code, it does not use GPU but only use CPU. What should I do to use GPU to train this model?

Related

BERT WITH BiDirectional LSTM loss is not decreasing

I am trying port Keras Semantic Similarity example to Pytorch Lightening. I followed all the necessary steps from keras example but somehow loss not at all decreasing. i am just curious where did i make mistake?
Here is the colab link
PyTorch Model
MAX_LENGTH = 128
from transformers import BertModel,AdamW
import pytorch_lightning as pl
class BertWithLSTM(pl.LightningModule):
def __init__(self,train_path,test_path, val_path,num_classes):
super().__init__()
self.bert = BertModel.from_pretrained("bert-base-uncased", output_attentions=True)
self.bert.trainable = False
self.lstm = nn.LSTM(input_size=768, hidden_size=64, batch_first=True, bidirectional=True, dropout=0.3)
self.dropout = nn.Dropout(p=0.3)
self.output = nn.Linear(MAX_LENGTH*2,3)
self.num_classes = num_classes
self.train_path = train_path
self.test_path = test_path
self.val_path = val_path
self.criterion = nn.CrossEntropyLoss()
def train_dataloader(self):
train_dataset = CustomDataset(csv_path=self.train_path)
return DataLoader(dataset=train_dataset, batch_size=32, shuffle=False)
def test_dataloader(self):
test_dataset = CustomDataset(csv_path=self.test_path)
return DataLoader(dataset=test_dataset)
def val_dataloader(self):
val_dataset = CustomDataset(csv_path=self.val_path)
return DataLoader(dataset=val_dataset, batch_size=16, shuffle=True)
def training_step(self,train_batch, batch_idx):
input_ids,attention_mask,token_type_ids,labels = train_batch
output = self(input_ids, attention_mask, token_type_ids, labels)
loss = F.cross_entropy(output, labels)
self.log("training_loss", loss, on_epoch=True, prog_bar=True, logger=True)
return loss
def validation_step(self,val_batch, batch_idx):
input_ids,attention_mask,token_type_ids,labels = val_batch
output = self(input_ids, attention_mask, token_type_ids, labels)
loss = F.cross_entropy(output, labels)
self.log("val_loss", loss, on_epoch=True, prog_bar=True, logger=True)
def forward(self, input_ids,attention_mask,token_type_ids, label ):
bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
sequence_output = bert_output.last_hidden_state
output, (hidden_state, c_n) = self.lstm(sequence_output)
avg_pool = torch.mean(output, dim=1)
max_pool,indices = torch.max(output,dim=1)
output = torch.cat((avg_pool,max_pool),1)
output = self.output(output)
return output
def configure_optimizers(self):
optimizer = torch.optim.AdamW(self.parameters())
return optimizer

Training by party keras

I need your help.
my project in general is about the development of an image analysis algorithm for the quantification of ferritin in sclera.
My code allows to mix segmentation and regression with the sequential model and the output is unique (i.e. the training is done at the same time). I want it to be per part. i.e., it does the segmentation training, it records the result and starts the regression one.
the inputs of the segmentation are the images plus their masks;
the inputs of the regressions are the images plus the values of ferritins.
Segmentation and regression layers are renamed
because some of them share the same name when backbones are used
def load_model(segmentation_model, regression_model, width, height, num_classes = 1):
# Rename segmentation model layers and weights
for layer in segmentation_model.layers:
rename(segmentation_model, layer, layer.name + '_seg')
#for i, w in enumerate(segmentation_model.weights):
# split_name = w.name.split('/')
# new_name = split_name[0] + '_seg' + '/' + split_name[1]
# segmentation_model.weights[i]._handle_name = new_name
# Rename regression model layers
for layer in regression_model.layers:
rename(regression_model, layer, layer.name + '_reg')
#for i, w in enumerate(regression_model.weights):
# split_name = w.name.split('/')
# new_name = split_name[0] + '_reg' + '/' + split_name[1]
# regression_model.weights[i]._handle_name = new_name
image = layers.Input(shape=(width, height, 3), name="img")
mask_image = segmentation_model(image)
if num_classes==1:
mask_image_categorical = K.cast(K.squeeze(mask_image, axis=3) + 0.5, dtype='int32') # Threshold at 0.5
else:
mask_image_categorical = K.argmax(mask_image, axis=3)
masked_layer = mylayers.CustomMasking(mask_value=0)
masked_image = masked_layer.call([image, mask_image_categorical])
value = regression_model(masked_image)
m = models.Model(inputs=image, outputs=[mask_image, value])
#m = models.Model(inputs=image, outputs=[mask_image, value, mask_image_categorical, masked_image])
#for i, w in enumerate(m.weights): print(i, w.name)
m.summary()
return m
def make_train(model, regression_loss_weight, regression_loss_weight_max, train_generator, epochs, val_dataset, validation_steps, weights_path, logger_path, num_classes, focal, enable_plot, init_mode = False):
optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
regression_loss_weight_variable = K.variable(regression_loss_weight, name='regression_loss_weight')
mse = losses.MeanSquaredError()
weighted_mse = weighted_loss(mse, regression_loss_weight_variable)
weighted_mse.trainable = False
if focal:
if num_classes == 1:
bce = mylosses.binary_focal_loss
weighted_bce = weighted_loss(bce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_bce, 'model_regression' : weighted_mse}
else:
fce = mylosses.categorical_focal_loss
weighted_fce = weighted_loss(fce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_fce, 'model_regression' : weighted_mse}
else:
if num_classes == 1:
bce = losses.BinaryCrossentropy(from_logits=True)
weighted_bce = weighted_loss(bce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_bce, 'model_regression' : weighted_mse}
else:
cce = losses.CategoricalCrossentropy()
weighted_cce = weighted_loss(cce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_cce, 'model_regression' : weighted_mse}
metric = metrics.BinaryAccuracy() if num_classes == 1 else metrics.CategoricalAccuracy()
metric_array_segmentation = [metric, mymetrics.iou_score_threshold, mymetrics.f1_score_threshold]
metric_array_regression = [metrics.RootMeanSquaredError(), metrics.MeanAbsoluteError(), metrics.MeanAbsolutePercentageError()]
metric_dict = { 'model_segmentation' : metric_array_segmentation, 'model_regression' : metric_array_regression}
loss_weights = [1.0, 1.0] # Weight for regression is taken into account in weighted_mse loss function
model.compile(optimizer, loss, metric_dict, loss_weights)
loss_weight_callback = LossWeightsCallback(regression_loss_weight_variable, regression_loss_weight, regression_loss_weight_max, epochs, 'val_model_segmentation_f1-score')
freezing_callback = SegmentationModelFreezingCallback(model, 'val_model_segmentation_f1-score', 0.95)
checkpoint = ModelCheckpoint(weights_path, monitor='val_model_regression_root_mean_squared_error', mode='min', verbose=1, save_best_only=True)
csv_logger = CSVLogger(logger_path, append=True, separator=';')
lr_reducer = ReduceLROnPlateau(monitor='val_model_regression_root_mean_squared_error', mode='min', factor=0.2, patience=10, min_lr=10e-7, min_delta=0.01, verbose=1)
earlystopping = EarlyStopping(monitor='val_model_regression_root_mean_squared_error', mode='min', verbose=1, patience=20, restore_best_weights=True)
callbacks_list = [loss_weight_callback, freezing_callback, checkpoint, csv_logger, lr_reducer, earlystopping]
# Test custom masking layer or global model
#instance = train_generator[0]
#imgs = np.squeeze(instance[0], axis=3) if instance[0].shape[3] == 1 else instance[0]
#imsave("unmasked_img.png", imgs[0])
#masks = np.squeeze(instance[1]['model_segmentation'], axis=3) if instance[1]['model_segmentation'].shape[3] == 1 else instance[1]['model_segmentation']
#imsave("mask.png", masks[0] * 255)
#masked_layer = mylayers.CustomMasking(mask_value=0)
#masked_imgs = masked_layer.call([imgs, masks])
#img = K.eval(masked_imgs[0,:,:,:])
#imsave("masked_img.png", img)
#y = model(imgs)
#mask_image = y[0][0,:,:,:]
#value = K.eval(y[1][0])
if init_mode:
instance = train_generator[0]
model.train_on_batch(instance[0][:1], [instance[1]['model_segmentation'][:1], instance[1]['model_regression'][:1]] )
else:
results = model.fit(train_generator, epochs=epochs, validation_data=val_dataset, validation_steps=validation_steps, callbacks=callbacks_list)
model.save_weights(weights_path)
# Display of metrics and loss vs epochs: metric names must match the metric functions
if enable_plot:
if num_classes == 1:
plot_history(results,
metrics = ['model_segmentation_binary_accuracy', 'val_model_segmentation_binary_accuracy', 'model_regression_root_mean_squared_error', 'val_model_regression_root_mean_squared_error', 'model_regression_mean_absolute_error', 'val_model_regression_mean_absolute_error', 'model_regression_mean_absolute_percentage_error', 'val_model_regression_mean_absolute_percentage_error'],
losses = ['model_segmentation_loss', 'model_regression_loss', 'val_model_segmentation_loss', 'val_model_regression_loss'])
else:
plot_history(results,
metrics = ['model_segmentation_categorical_accuracy', 'val_model_segmentation_categorical_accuracy', 'model_segmentation_mean_io_u', 'val_model_segmentation_mean_io_u', 'model_segmentation_f1-score', 'val_model_segmentation_f1-score', 'model_regression_root_mean_squared_error', 'val_model_regression_root_mean_squared_error', 'model_regression_mean_absolute_error', 'val_model_regression_mean_absolute_error', 'model_regression_mean_absolute_percentage_error', 'val_model_regression_mean_absolute_percentage_error'],
losses = ['model_segmentation_loss', 'model_regression_loss', 'val_model_segmentation_loss', 'val_model_regression_loss'])
Defining model and loading weights is not enough when using custom model and/or layer
Model must also be compiled and trained on a minimal set in order to initialize
def init_model(model, loss_weight, loss_weight_max, train_generator, epochs, weights_path, logger_path, num_classes, focal):
make_train(model, loss_weight, loss_weight_max, train_generator, epochs, None, None, '', logger_path, num_classes, focal, False, True)
model.load_weights(weights_path)

How to input csv data in an autoencoder

I am using the code below that implements an autoencoder. How can I feed the autoencoder with data for training and testing?
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
class Autoencoder(object):
def __init__(self, n_input, n_hidden, transfer_function=tf.nn.softplus, optimizer = tf.train.AdamOptimizer()):
self.n_input = n_input
self.n_hidden = n_hidden
self.transfer = transfer_function
network_weights = self._initialize_weights()
self.weights = network_weights
# model
self.x = tf.placeholder(tf.float32, [None, self.n_input])
self.hidden = self.transfer(tf.add(tf.matmul(self.x, self.weights['w1']), self.weights['b1']))
self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])
# cost
self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
self.optimizer = optimizer.minimize(self.cost)
init = tf.global_variables_initializer()
self.sess = tf.Session()
self.sess.run(init)
def _initialize_weights(self):
all_weights = dict()
all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
initializer=tf.contrib.layers.xavier_initializer())
all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32))
all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype=tf.float32))
all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype=tf.float32))
return all_weights
def partial_fit(self, X):
cost, opt = self.sess.run((self.cost, self.optimizer), feed_dict={self.x: X})
return cost
def calc_total_cost(self, X):
return self.sess.run(self.cost, feed_dict = {self.x: X})
def transform(self, X):
return self.sess.run(self.hidden, feed_dict={self.x: X})
def generate(self, hidden = None):
if hidden is None:
hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
return self.sess.run(self.reconstruction, feed_dict={self.hidden: hidden})
def reconstruct(self, X):
return self.sess.run(self.reconstruction, feed_dict={self.x: X})
def getWeights(self):
return self.sess.run(self.weights['w1'])
def getBiases(self):
return self.sess.run(self.weights['b1'])
# I instantiate the class autoencoder, 5 is the dimension of a raw input,
2 is the dimension of the hidden layer
autoencoder = Autoencoder(5, 2, transfer_function=tf.nn.softplus, optimizer
= tf.train.AdamOptimizer())
# I prepare my data**
IRIS_TRAINING = "C:\\Users\\Desktop\\iris_training.csv"
#Feeding data to Autoencoder ???
Train and Test ??
How can I train this model with csv file data? I think I need to run the following instruction as _, c = sess.run([optimizer, cost], feed_dict={self.x: batch_ofd_ata}) inside a loop of epochs, but I am struggling with it.
Check out Stanford CS20SI's tutorial.
https://github.com/chiphuyen/tf-stanford-tutorials/blob/master/examples/05_csv_reader.py

Tensorflow RNN: Perplexity per Epoch remains constant

I am training an RNN-based language-model using Tensorflow. The model is very similar to the PTB model example in the TF tutorials section. However, when I attempt to train the model on my own data, the perplexity of the model does not go down; it remains constant throughout multiple epochs. Could anyone let me know what I might be doing wrong.
I have a feeling that I am not handling the targets properly, but the gist of my code for the targets is:
def batcher(batch_size,unroll_steps,data,pad):
print(len(data))
batches = len(data) / batch_size
inp = []
target = []
for i in range(batches):
#print(len(data[i*batch_size:(i+1)*batch_size]))
x = data[i*batch_size:(i+1)*batch_size]
y = [ line[1:]+[pad] for line in x ]
yield (x,y)
That is, I just shift the data by 1 and use that as the target for the next word in a sentence.
The training script and model (class) are seen below
Training script (excerpt):
def train(session, model, folder,batch_size,unroll_steps,epoch):
word_to_id, id_to_word, train, val = build_inputs(folder,unroll_steps)
pad = word_to_id['<pad>']
costs = 0
iters = 0
train_size = len(train)
batch_size = model.batch_size
batches = train_size / batch_size
state = session.run(model._initial_state)
print("Running epoch %d" % epoch)
for i in range(batches):
fetches = [model.cost, model._final_state, model.logits]
feed_dict = {}
x = train[i*batch_size:(i+1)*batch_size]
y = [ line[1:] +[pad] for line in x ]
feed_dict[model.input] = x
feed_dict[model.targets] = y
feed_dict[model._initial_state] = state
#print("Cell-state complete - Running")
cost, state, logits = session.run(fetches, feed_dict)
#print("Single Run complete")
costs += cost
iters += model.unroll_steps
print("\tEpoch %d: Perplexity is %f" % (epoch, np.exp(costs/iters)))
return np.exp(costs/iters)
Model:
import tensorflow as tf
class LM(object):
def __init__(self, train, max_gradient, batch_size, unroll_steps, vocab, size, layers, learning_rate, init, prob):
self.batch_size = batch_size
self.max_gradient = max_gradient
self.layers = layers
self.learning_rate = learning_rate
self.unroll_steps = unroll_steps
self.init = init
#with tf. name_scope("Paramters"):
with tf.device('/gpu:0'), tf.name_scope("Input"):
self.input = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="input")
self.targets = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="targets")
#self.init = tf.placeholder(tf.float32, shape=[], name="init")
with tf.device('/gpu:0'), tf.name_scope("Embedding"):
embedding = tf.Variable(tf.random_uniform([vocab, size], -self.init, self.init), dtype=tf.float32, name="embedding")
embedded_input = tf.nn.embedding_lookup(embedding, self.input, name="embedded_input")
with tf.device('/gpu:0'), tf.name_scope("RNN"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
lstm_cell = tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
if train and prob < 1.0:
lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=prob)
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell for _ in range(layers)], state_is_tuple=True)
self._initial_state = cell.zero_state(batch_size, tf.float32)
outputs = []
state = self._initial_state
for step in range(unroll_steps):
if step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(embedded_input[:, step, :], state)
outputs.append(cell_output)
with tf.device('/gpu:0'), tf.name_scope("Cost"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
output = tf.reshape(tf.concat(outputs,1), [-1,size])
softmax_w = tf.get_variable("softmax_w", [size, vocab], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [vocab], dtype=tf.float32)
logits = tf.matmul(output, softmax_w) + softmax_b
losses = []
for logit, target in zip([logits], [tf.reshape(self.targets,[-1])]):
target = tf.reshape(target, [-1])
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=target)
losses.append(loss)
self.cost = tf.reduce_sum(losses) / batch_size
self._final_state = state
self.logits = logits
scope.reuse_variables()
if not train:
return
with tf.device('/gpu:0'), tf.name_scope("Train"), tf.variable_scope(tf.get_variable_scope(), reuse=False):
train_variables = tf.trainable_variables()
gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, train_variables),self.max_gradient)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.training = optimizer.apply_gradients(zip(gradients, train_variables))
tf.get_variable_scope().reuse_variables()

Tensorflow GraphDef cannot be larger than 2GB

I'm trying to implement a deep autoencoder with tensorflow. The RBM pretraining works just fine, but when it comes to fine tuning, it raises the error: 'ValueError: GraphDef cannot be larger than 2GB'. My input is an array in the shape of [12396, 8192], and here is my layers: [8192 16384 8192 4096 2048 1024 512 256 512 1024 2048 4096 8192 16384 8192].
I know where the problem is, but I have no idea how to fix it. I have thought about using multiple graph, but what if my input is too big to even store one layer? Besides I don't know how many graph I should use. Set up a graph for every layer? That would be too slow and unnecessary.
Thank you!
def __init__(self, input_num, layers, rbm_learning_rate, deepnn_learning_rate, rbm_num_epoch,
deepnn_num_epoch, momentum=0, batch_size=128, data_type='float32'):
self.input_num = input_num
self.layers = layers
self.n_layers = len(self.layers)
self.rbm_learning_rate = rbm_learning_rate
self.deepnn_learning_rate = deepnn_learning_rate
if momentum == 0:
self.momentum = []
for _ in range(self.n_layers):
self.momentum.append(1)
self.rbm_num_epoch = rbm_num_epoch
self.deepnn_num_epoch = deepnn_num_epoch
self.batch_size = batch_size
self.data_type = data_type
self.rbm_list = []
self.rbm_list.append(RBM(self.input_num, self.layers[0], self.rbm_num_epoch,
self.momentum[0], self.rbm_learning_rate[0], self.batch_size, self.data_type))
for i in range(self.n_layers-1):
self.rbm_list.append(RBM(self.layers[i], self.layers[i+1], self.rbm_num_epoch,
self.momentum[i], self.rbm_learning_rate[i], self.batch_size, self.data_type))
def pretrain(self, train_set):
self.W_list = []
self.b_list = []
self.a_list = []
if not cmp(train_set.dtype, self.data_type):
train_set.dtype = self.data_type
next_train = train_set
for i, rboltz in enumerate(self.rbm_list):
next_train = self._pretrain_and_get_para(rboltz, next_train)
def _pretrain_and_get_para(self, rboltz, next_train):
output, W_out, a_out, b_out = rboltz.fit(next_train)
self.W_list.append(W_out)
self.a_list.append(a_out)
self.b_list.append(b_out)
return output
def fine_tune(self, train_set):
m, _ = train_set.shape
self.num_per_epoch = m / self.batch_size
train_batch = tf.placeholder(self.data_type, [None, self.input_num])
logits = self._build_model(train_batch)
loss = self._loss(logits, train_batch)
train_op = self._training(loss)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for _ in range(self.deepnn_num_epoch):
for i in range(self.num_per_epoch):
_, cost = sess.run([train_op, loss], feed_dict = self._feed_build(train_batch, train_set, i))
print cost
def _feed_build(self, train_batch, train_set, i):
batch = prepare_data.next_batch(train_set, i, self.batch_size)
feed_dict = {train_batch: batch}
return feed_dict
def _build_model(self, train_batch):
middle_layer = self._make_encoder(train_batch)
last_layer = self._make_decoder(middle_layer)
return last_layer
def _make_encoder(self, train_batch):
encoder = []
encoder.append(train_batch)
for i, layer in enumerate(self.layers):
with tf.name_scope('encoder'+str(i)):
W = tf.Variable(self.W_list[i], name = 'weights')
b = tf.Variable(self.b_list[i], name = 'biases')
encoder.append(tf.sigmoid(b + tf.matmul(encoder[i], W)))
return encoder[self.n_layers]
def _make_decoder(self, middle_layer):
decoder = []
decoder.append(middle_layer)
for i, layer in enumerate(self.layers):
with tf.name_scope('decoder'+str(i)):
W = tf.Variable(self.W_list[self.n_layers-i-1], name = 'weights')
a = tf.Variable(self.a_list[self.n_layers-i-1], name = 'biases')
decoder.append(tf.sigmoid(a + tf.matmul(decoder[i], W, transpose_b = True)))
return decoder[self.n_layers]
def _loss(self, logits, labels):
loss = tf.nn.l2_loss(logits-labels)
return loss
def _training(self, loss):
optimizer = tf.train.GradientDescentOptimizer(self.deepnn_learning_rate)
train_op = optimizer.minimize(loss)
return train_op