Keras/Deepchem: epochs in data generator for prediction in graph convolutions affects prediction size - tensorflow

I am using graph convolutions in Deepchem/Keras for predicting molecular properties. Following the Deepchem tutorials I created a data generator. While there is no error in my code below, I fail to understand why the size of pred changes with epoch and batch_size.
First we create some dummy data.
!pip install --pre deepchem
!pip install --pre rdkit
import deepchem as dc
import numpy as np
import tensorflow as tf
from deepchem.feat.mol_graphs import ConvMol
mol = ['C-C-O']*240
ftr = dc.feat.ConvMolFeaturizer(per_atom_fragmentation=False)
X=ftr.featurize(mol)
y = np.arange(0,240,1)
w = np.arange(0,240,1)
ids = np.arange(0,240,1)
ds = dc.data.NumpyDataset(X=X, y=y, ids=ids)
Edit: We use the following function as generator:
def data_generator(dataset, epochs=1, batch_size = 100, pad_batches = True):
print(dataset)
for ind, (X_b, y_b, w_b, ids_b) in enumerate(dataset.iterbatches(batch_size, epochs,
deterministic=False, pad_batches=pad_batches)):
multiConvMol = ConvMol.agglomerate_mols(X_b)
inputs = [multiConvMol.get_atom_features(), multiConvMol.deg_slice, np.array(multiConvMol.membership)]
for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
inputs.append(multiConvMol.get_deg_adjacency_lists()[i])
labels = [y_b]
weights = [w_b]
yield (inputs, labels, weights)
(end edit)
Then we define the model and fit it to the dataset generated above:
batch_size = 100
n_tasks = 1
class TestModel(tf.keras.Model):
def __init__(self, model = 1):
super(TestModel, self).__init__()
self.model = model
#____________Test Model 1___________
if self.model == 1:
self.gc1 = GraphConv(128, activation_fn=tf.nn.tanh)
self.readout = GraphGather(batch_size=batch_size,
activation_fn=tf.nn.tanh)
self.dense2 = layers.Dense(1)
def call(self, inputs):
#____________Test Model 1___________
if self.model == 1:
gc1_output = self.gc1(inputs)
readout_output = self.readout([gc1_output]+ inputs[1:])
dense2_output = self.dense2(readout_output)
return dense2_output
#Fit_generator
print("_________\nFitting:")
testmodel = dc.models.KerasModel(TestModel(1), loss=dc.models.losses.L2Loss())
testmodel.fit_generator(data_generator(ds, epochs=1, batch_size = 100))
Finally we try to predict the dataset labels setting epochs = 2:
#Predict
print("_________\nPredicting:")
pred = testmodel.predict_on_generator(data_generator(ds, epochs = 2, batch_size = 100, pad_batches = True))
print(ds.y.shape, pred.shape)
Giving:
_________
Predicting:
<NumpyDataset X.shape: (240,), y.shape: (240,), w.shape: (240,), ids: [0 1 2 ... 237 238 239], task_names: [0]>
(240,) (600, 1)
However if I change epochs to 1, the size of pred changes (300, 1) i.e. half of what we had before. Similarly, changing the batch_size affects the prediction size too.
Can anyone explain what I'm doing wrong?

Related

Object localization MNIST Tensorflow to Pytorch : Losses doesn't decrease

I am trying to convert a Tensorflow object localization code into Pytorch. In the original code, the author use model.compile / model.fit to train the model so I don't understand how the losses of classification of the MNIST digits and box regressions work. Still, I'm trying to implement my own training loop in Pytorch.
The goal here is, after some preprocessing, past the MNIST digits randomly into a black square image and then, classify and localize (bounding boxes) the digit.
I set two losses : nn.CrossEntropyLoss and nn.MSELoss and I do (loss_1+loss_2).backward() to compute the gradients. I know it's the right way to compute gradients with two losses from here and here.
But still, my loss doesn't decrease whereas it collapses quasi-imediately with the Tensorflow code. I checked the model with torchinfo.summary and it seems behaving as well as the Tensorflow implementation.
EDIT :
I looked for the predicted labels of my model and it doesn't seem to change at all.
This line of code label_preds, bbox_coords_preds = model(digits) always returns the same values
label_preds[0] = tensor([[0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156]], device='cuda:0', grad_fn=<SliceBackward0>)
Here are my questions :
Is my custom network set correctly ?
Are my losses set correctly ?
Why my label predictions don't change ?
Do my training loop work as well as the .compile and .fit Tensorflow methods ?
Thanks a lot !
PYTORCH CODE
class ConvNetwork(nn.Module):
def __init__(self):
super(ConvNetwork, self).__init__()
self.conv2d_1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3)
self.conv2d_2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
self.conv2d_3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.avgPooling2D = nn.AvgPool2d((2,2))
self.dense_1 = nn.Linear(in_features=3136, out_features=128)
self.dense_classifier = nn.Linear(in_features=128, out_features=10)
self.softmax = nn.Softmax(dim=0)
self.dense_regression = nn.Linear(in_features=128, out_features=4)
def forward(self, input):
x = self.avgPooling2D(F.relu(self.conv2d_1(input)))
x = self.avgPooling2D(F.relu(self.conv2d_2(x)))
x = self.avgPooling2D(F.relu(self.conv2d_3(x)))
x = nn.Flatten()(x)
x = F.relu(self.dense_1(x))
output_classifier = self.softmax(self.dense_classifier(x))
output_regression = self.dense_regression(x)
return [output_classifier, output_regression]
######################################################
learning_rate = 0.1
EPOCHS = 1
BATCH_SIZE = 64
model = ConvNetwork()
model = model.to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
classification_loss = nn.CrossEntropyLoss()
regression_loss = nn.MSELoss()
######################################################
begin_time = time.time()
for epoch in range(EPOCHS) :
tot_loss = 0
train_start = time.time()
training_losses = []
print("-"*20)
print(" "*5 + f"EPOCH {epoch+1}/{EPOCHS}")
print("-"*20)
model.train()
for batch, (digits, labels, bbox_coords) in enumerate(training_dataset):
digits, labels, bbox_coords = digits.to(device), labels.to(device), bbox_coords.to(device)
optimizer.zero_grad()
[label_preds, bbox_coords_preds] = model(digits)
class_loss = classification_loss(label_preds, labels)
box_loss = regression_loss(bbox_coords_preds, bbox_coords)
training_loss = class_loss + box_loss
training_loss.backward()
optimizer.step()
######### print part #######################
training_losses.append(training_loss.item())
if batch+1 <= len_training_ds//BATCH_SIZE:
current_training_sample = (batch+1)*BATCH_SIZE
else:
current_training_sample = (batch)*BATCH_SIZE + len_training_ds%BATCH_SIZE
if (batch+1) == 1 or (batch+1)%100 == 0 or (batch+1) == len_training_ds//BATCH_SIZE +1:
print(f"Elapsed time : {(time.time()-train_start)/60:.3f}",\
f" --- Digit : {current_training_sample}/{len_training_ds}",\
f" : loss = {training_loss:.5f}")
if batch+1 == (len_training_ds//BATCH_SIZE)+1:
print(f"Total elapsed time for training : {(time.time()-begin_time)/60:.3f}")
ORIGINAL TENSORFLOW CODE
def feature_extractor(inputs):
x = tf.keras.layers.Conv2D(16, activation='relu', kernel_size=3, input_shape=(75, 75, 1))(inputs)
x = tf.keras.layers.AveragePooling2D((2, 2))(x)
x = tf.keras.layers.Conv2D(32,kernel_size=3,activation='relu')(x)
x = tf.keras.layers.AveragePooling2D((2, 2))(x)
x = tf.keras.layers.Conv2D(64,kernel_size=3,activation='relu')(x)
x = tf.keras.layers.AveragePooling2D((2, 2))(x)
return x
def dense_layers(inputs):
x = tf.keras.layers.Flatten()(inputs)
x = tf.keras.layers.Dense(128, activation='relu')(x)
return x
def classifier(inputs):
classification_output = tf.keras.layers.Dense(10, activation='softmax', name = 'classification')(inputs)
return classification_output
def bounding_box_regression(inputs):
bounding_box_regression_output = tf.keras.layers.Dense(units = '4', name = 'bounding_box')(inputs)
return bounding_box_regression_output
def final_model(inputs):
feature_cnn = feature_extractor(inputs)
dense_output = dense_layers(feature_cnn)
classification_output = classifier(dense_output)
bounding_box_output = bounding_box_regression(dense_output)
model = tf.keras.Model(inputs = inputs, outputs = [classification_output,bounding_box_output])
return model
def define_and_compile_model(inputs):
model = final_model(inputs)
model.compile(optimizer='adam',
loss = {'classification' : 'categorical_crossentropy',
'bounding_box' : 'mse'
},
metrics = {'classification' : 'accuracy',
'bounding_box' : 'mse'
})
return model
inputs = tf.keras.layers.Input(shape=(75, 75, 1,))
model = define_and_compile_model(inputs)
EPOCHS = 10 # 45
steps_per_epoch = 60000//BATCH_SIZE # 60,000 items in this dataset
validation_steps = 1
history = model.fit(training_dataset,
steps_per_epoch=steps_per_epoch,
validation_data=validation_dataset,
validation_steps=validation_steps, epochs=EPOCHS)
loss, classification_loss, bounding_box_loss, classification_accuracy, bounding_box_mse = model.evaluate(validation_dataset, steps=1)
print("Validation accuracy: ", classification_accuracy)
I answering to myself about this bug :
What I found :
I figured that I use a Softmax layer in my code while I'm using the nn.CrossEntropyLoss() as a loss.
What this problem was causing :
This loss already apply a softmax (doc)
Apply a softmax twice must add some noise to the loss and preventing convergence
What I did :
One should let a linear layer as an output for the classification layer.
An other way is to use the NLLLoss (doc) instead and let the softmax layer in the model class.
Also :
I don't fully understand how the .compile() and .fit() Tensorflow methods work but I think it should optimize the training one way or another (I think about the learning rate) since I had to decrease the learning rate to 0.001 in Pytorch to "unstick" the loss and makes it decrease.

Tensorflow 2 custom loss return nan

I have a model, I compile it using binary_crossentropy, the training process goes well, the loss is printed.
model = MyModel()
model.compile(optimizer="adadelta", loss="binary_crossentropy")
data1, data2 = get_random_data(4, 3) # this method return data1:(1000,4),data2:(1000,3)
model.fit([data1, data2], y, batch_size=4)
Then I write a custom loss function, the loss become nan
import tensorflow.keras.backend as K
class MyModel():
...
def batch_loss(self, y_true, y_pred_batch):
bottom = K.sum(K.exp(y_pred_batch))
batch_softmax = K.exp(y_pred_batch) / bottom
batch_log_likelihood = K.log(batch_softmax)
loss = K.sum(batch_log_likelihood)
return loss
model.compile(optimizer="adadelta", loss=model.batch_loss) # change above compile code to this
I use a batch_loss(tf.ones((1,))) to test my loss function, seems it return the correct result.
But when it run together with training, it becomes nan, where should I start to debug?
model and data code (for those who need it to reproduce):
class MyModel(tf.keras.models.Model):
def __init__(self):
super().__init__()
self.t1A = tf.keras.layers.Dense(300, activation='relu', input_dim=1)
self.t1B = tf.keras.layers.Dense(300, activation='relu', input_dim=1)
self.t1v = tf.keras.layers.Dense(128, activation='relu')
self.t2A = tf.keras.layers.Dense(300, activation='relu')
self.t2B = tf.keras.layers.Dense(300, activation='relu')
self.t2v = tf.keras.layers.Dense(128, activation='relu')
self.out = tf.keras.layers.Dot(axes=1)
def call(self, inputs, training=None, mask=None):
u, i = inputs[0], inputs[1]
u = self.t1A(u)
u = self.t1B(u)
u = self.t1v(u)
i = self.t2A(i)
i = self.t2B(i)
i = self.t2v(i)
out = self.out([u, i])
return out
def get_random_data(user_feature_num, item_feature_num):
def get_random_ndarray(data_size, dis_list, feature_num):
data_list = []
for i in range(feature_num):
arr = np.random.randint(dis_list[i], size=data_size)
data_list.append(arr)
data = np.array(data_list)
return np.transpose(data, axes=(1, 0))
uf_dis, if_dis, data_size = [1000, 2, 10, 20], [10000, 50, 60], 1000
y = np.zeros(data_size)
for i in range(int(data_size/10)):
y[i] = 1
return get_random_ndarray(data_size, uf_dis, feature_num=user_feature_num), \
get_random_ndarray(data_size, if_dis, feature_num=item_feature_num), y
The values outputted by your models are quite big. Combined with a call to tf.exp in your function, values quickly grows to nan. You might consider applying an activation function like a sigmoid to keep the values between 0 and 1.
I think your error is caused by calling exp(). This function quickly growing and returns nan.

How to multiply a layer by a constant vector element wise in Keras?

I want to make a weighted average ensemble of 3 of my trained models. So, I want first to multiply the softmax output of a model (element-wise) by a vector and then average the 3 weighted outputs of the 3 models.
I used the following code to multiply the output of the first model by its weight vector:
from keras.layers import Multiply, Average
resnet_weights = np.asarray([[0.91855, 0.99485, 0.89065, 0.96525, 0.98005,
0.93645, 0.6149, 0.934, 0.92505, 0.785, 0.85]], np.float32)
resnet_weight_tensor=tf.constant(resnet_weights, np.float32)
sess = tf.InteractiveSession()
print(resnet_weight_tensor.eval())
sess.close()
resnet_weighted = Multiply()([finetuned_model.layers[-1].output, resnet_weight_tensor])
print(resnet_weighted)
new_model=Model(model.input, resnet_weighted)
However, I'm stuck with the following error:
What can I do?
Use Lambda instead of Multiply, and K.constant instead of tf.constant (is backend-neutral):
resnet_weight_tensor=K.constant(resnet_weights, 'float32')
out = finetuned_model.layers[-1].output
resnet_weighted = Lambda(lambda x: x * resnet_weight_tensor)(out)
FULL EXAMPLE:
## BUILD MODELS
batch_size = 32
num_batches = 100
input_shape = (4,)
num_classes = 3
model_1 = make_model(input_shape, 8, num_classes)
model_2 = make_model(input_shape, 10, num_classes)
model_3 = make_model(input_shape, 12, num_classes)
## BUILD ENSEMBLE
models = (model_1, model_2, model_3)
models_ins = [model.input for model in models]
models_outs = [model.input for model in models]
outputs_weights = [np.random.random((batch_size, num_classes)),
np.random.random((batch_size, num_classes)),
np.random.random((batch_size, num_classes))]
outs_avg = model_outputs_average(models, outputs_weights)
final_out = Dense(num_classes, activation='softmax')(outs_avg)
model_ensemble = Model(inputs=models_ins, outputs=final_out)
model_ensemble.compile('adam', loss='categorical_crossentropy')
### TEST ENSEMBLE
x1 = np.random.randn(batch_size, *input_shape) # toy data
x2 = np.random.randn(batch_size, *input_shape)
x3 = np.random.randn(batch_size, *input_shape)
y = np.random.randint(0,2,(batch_size, num_classes)) # toy labels
model_ensemble.fit([x1,x2,x3], y)
Verify averaging:
[print(layer.name) for layer in model_ensemble.layers] # show layer names
preouts1 = get_layer_outputs(model_ensemble, 'lambda_1', [x1,x2,x3])
preouts2 = get_layer_outputs(model_ensemble, 'lambda_2', [x1,x2,x3])
preouts3 = get_layer_outputs(model_ensemble, 'lambda_3', [x1,x2,x3])
preouts_avg = get_layer_outputs(model_ensemble, 'average_1',[x1,x2,x3])
preouts = np.asarray([preouts1, preouts2, preouts3])
sum_of_diff_of_means = np.sum(np.mean(preouts, axis=0) - preouts_avg)
print(np.sum(np.mean([preouts1, preouts2, preouts3],axis=0) - preouts_avg))
# 4.69e-07
Functions used:
def make_model(input_shape, dense_dim, num_classes=3):
ipt = Input(shape=input_shape)
x = Dense(dense_dim, activation='relu')(ipt)
out = Dense(num_classes, activation='softmax')(x)
model = Model(ipt, out)
model.compile('adam', loss='categorical_crossentropy')
return model
def model_outputs_average(models, outputs_weights):
outs = [model.output for model in models]
out_shape = K.int_shape(outs[0])[1:] # ignore batch dim
assert all([(K.int_shape(out)[1:] == out_shape) for out in outs]), \
"All model output shapes must match"
outs_weights = [K.constant(w, 'float32') for w in outputs_weights]
ow_shape = K.int_shape(outs_weights[0])
assert all([(K.int_shape(w) == ow_shape) for w in outs_weights]), \
"All outputs_weights and model.output shapes must match"
weights_layers = [Lambda(lambda x: x * ow)(out) for ow, out
in zip(outs_weights, outs)]
return Average()(weights_layers)
def get_layer_outputs(model,layer_name,input_data,train_mode=False):
outputs = [layer.output for layer in model.layers if layer_name in layer.name]
layers_fn = K.function([model.input, K.learning_phase()], outputs)
return [layers_fn([input_data,int(train_mode)])][0][0]
The bug is possibly caused by the mixture of kears api and tensorflow api, since your resnet_weight_tensor is a tensor from tensorflow api, while finetuned_model.layers[-1].output is the output from a keras layer. Some discusses can be seen here issue 7362
One walk around is to wrap resnet_weight_tensor into keras Input layer.
from keras.layers import Multiply, Average, Input
resnet_weights = np.asarray([[0.91855, 0.99485, 0.89065, 0.96525, 0.98005,
0.93645, 0.6149, 0.934, 0.92505, 0.785, 0.85]], np.float32)
resnet_weight_tensor=tf.constant(resnet_weights, np.float32)
resnet_weight_input = Input(tensor=resnet_weight_tensor)
sess = tf.InteractiveSession()
print(resnet_weight_tensor.eval())
sess.close()
resnet_weighted = Multiply()([finetuned_model.layers[-1].output, resnet_weight_input])
print(resnet_weighted)
new_model=Model([model.input, resnet_weight_input], resnet_weighted)

Finetuning DNN with continuous outputs in the last layer

Greatly appreciate it if someone could help me out here:
I'm trying to do some finetuning on a regression task --- my inputs are 200X200 RGB images and my prediction output/label is a set of real values (let's say, within [0,10], though scaling is not a big deal here...?) --- on top of InceptionV3 architecture. Here are my functions that take a pretrained Inception model, remove the last layer and add a a new layer, set up for finetuning...
"""
Fine-tuning functions
"""
IM_WIDTH, IM_HEIGHT = 299, 299 #fixed size for InceptionV3
NB_EPOCHS = 3
BAT_SIZE = 32
FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 172
def eucl_dist(inputs):
x, y = inputs
return ((x - y)**2).sum(axis=-1)
def add_new_last_continuous_layer(base_model):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top, for instance:
base_model = InceptionV3(weights='imagenet',include_top=False)
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(FC_SIZE, activation='relu')(x)
predictions = Lambda(eucl_dist, output_shape=(1,))(x)
model = Model(input=base_model.input, output=predictions)
return model
def setup_to_finetune_continuous(model):
"""Freeze the bottom NB_IV3_LAYERS and retrain the remaining top
layers.
note: NB_IV3_LAYERS corresponds to the top 2 inception blocks in
the inceptionv3 architecture
Args:
model: keras model
"""
for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]:
layer.trainable = False
for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
layer.trainable = True
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
loss='eucl_dist')
Here are my implementations:
base_model = InceptionV3(weights = "imagenet",
include_top=False, input_shape=(3,200,200))
model0 = add_new_last_continuous_layer(base_model)
setup_to_finetune_continuous(model0)
history=model0.fit(train_x, train_y, validation_data = (test_x, test_y), nb_epoch=epochs, batch_size=32)
scores = model0.evaluate(test_x, test_y, verbose = 0)
features = model0.predict(X_train)
where train_x is a (168435, 3, 200, 200) numpy array and train_y is a (168435,) numpy array. The same goes for test_x and test_y except the number of observations is 42509.
I got the TypeError: Tensor object is not iterable bug which occurred at predictions = Lambda(eucl_dist, output_shape=(1,))(x)'' when going through theadd_new_last_continuous_layer()`` function. Could you anyone kindly give me some guidance to get around that and what the problem is? Greatly appreciated and happy holidays!
EDIT:
Changed the functions to:
def eucl_dist(inputs):
x, y = inputs
return ((x - y)**2).sum(axis=-1)
def add_new_last_continuous_layer(base_model):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top, for instance:
base_model = InceptionV3(weights='imagenet',include_top=False)
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x1 = Dense(FC_SIZE, activation='relu')(x)
x2 = Dense(FC_SIZE, activation='relu')(x)
predictions = Lambda(eucl_dist, output_shape=eucl_dist_shape)([x1,x2])
model = Model(input=base_model.input, output=predictions)
return model
Your output shape for the lambda layer is wrong. Define your functions like this:
from keras import backend as K
def euclidean_distance(vects):
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
def eucl_dist_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)
predictions = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([input1, input2])

why am I getting a 100% error rate (RNN for spam)

I am learning tensor flow by modifying some examples I've found. To start off with I have taken an RNN example to try against the "Spam" data set from UCI.
My code and the sample data set can be found in full here:
https://trinket.io/python/c7d6b95452
When I run the code I get a 100% error rate. I figure even if this data set was not well suited for this particular model that I'd get at least something better than that, so I don't think it's my choice of a sample data set.
Below is my Python code. If anyone can suggest how to modify this to get the model to work properly I would appreciate it! I'd also appreciate any general tensor flow advice too.
# Example for my blog post at:
# https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/
import functools
import os
import sets
import random
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
def lazy_property(function):
attribute = '_' + function.__name__
#property
#functools.wraps(function)
def wrapper(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return wrapper
class SequenceClassification:
def __init__(self, data, target, dropout, num_hidden=200, num_layers=3):
self.data = data
self.target = target
self.dropout = dropout
self._num_hidden = num_hidden
self._num_layers = num_layers
self.prediction
self.error
self.optimize
#lazy_property
def prediction(self):
# Recurrent network.
network = rnn_cell.GRUCell(self._num_hidden)
network = rnn_cell.DropoutWrapper(
network, output_keep_prob=self.dropout)
network = rnn_cell.MultiRNNCell([network] * self._num_layers)
output, _ = tf.nn.dynamic_rnn(network, self.data, dtype=tf.float32)
# Select last output.
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
# Softmax layer.
weight, bias = self._weight_and_bias(
self._num_hidden, int(self.target.get_shape()[1]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
return prediction
#lazy_property
def cost(self):
cross_entropy = -tf.reduce_sum(self.target *tf.log(self.prediction))
return cross_entropy
#lazy_property
def optimize(self):
learning_rate = 0.003
optimizer = tf.train.RMSPropOptimizer(learning_rate)
return optimizer.minimize(self.cost)
#lazy_property
def error(self):
mistakes = tf.not_equal(
tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
return tf.reduce_mean(tf.cast(mistakes, tf.float32))
#staticmethod
def _weight_and_bias(in_size, out_size):
weight = tf.truncated_normal([in_size, out_size], stddev=0.01)
bias = tf.constant(0.1, shape=[out_size])
return tf.Variable(weight), tf.Variable(bias)
def main():
sample_size=10
num_classes=2 #spam or ham
##
# import spam data
##
spam_data=[]
spam_data_train=[]
spam_data_test=[]
data_dir="."
data_file="spam.csv"
with open(os.path.join(data_dir, data_file), "r") as file_handle:
for row in file_handle:
spam_data.append(row)
spam_data=[line.rstrip().split(",") for line in spam_data if len(line) >=1]
random.shuffle(spam_data)
spam_data_train=spam_data[0:int(len(spam_data)*.8)]
spam_data_test=spam_data[int(len(spam_data)*.8):int(len(spam_data))]
def next_train_batch(batch_size):
a=random.sample(spam_data_train, batch_size)
return [np.array([line[:-1] for line in a]), np.array([line[len(line)-1] for line in a])]
def train_batch():
return [np.array([line[:-1] for line in spam_data_train]),np.array([line[len(line)-1] for line in spam_data_train])]
def next_test_batch(batch_size):
a=random.sample(spam_data_test, batch_size)
return [np.array([line[:-1] for line in a]), np.array([line[len(line)-1] for line in a])]
def test_batch():
return [np.array([line[:-1] for line in spam_data_test]),np.array([line[len(line)-1] for line in spam_data_test])]
t=train_batch();
train_input=t[0]
train_target=t[1]
test=test_batch()
test_input=t[0]
test_target=t[1]
training_data = tf.placeholder(tf.float32, [None, sample_size, len(train_input[0])], "training_data")
training_target = tf.placeholder(tf.float32, [None, sample_size], "training_target")
testing_data = tf.placeholder(tf.float32, [None, len(test_input), len(test_input[0])], "testing_data")
testing_target = tf.placeholder(tf.float32, [None, len(test_target)], "testing_target")
dropout = tf.placeholder(tf.float32)
training_model = SequenceClassification(training_data, training_target, dropout)
tf.get_variable_scope().reuse_variables()
testing_model = SequenceClassification(testing_data, testing_target, dropout)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
for epoch in range(sample_size):
for _ in range(100):
sample=random.sample(range(0,len(train_input)-1),sample_size)
batch_train = [train_input[i] for i in sample]
batch_target = [train_target[i] for i in sample]
sess.run(training_model.optimize, {
training_data: [batch_train], training_target: [batch_target] , dropout: 0.5})
error = sess.run(testing_model.error, {
testing_data: [test_input], testing_target: [test_target], dropout: 1.0})
print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
if __name__ == '__main__':
main()