I'm trying to tune my CNN's hyperparameters with Grid Search with:
def create_model():
model = Sequential()
model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape=(178, 268, 1)))
model.compile(loss = 'binary_crossentropy',
optimizer = 'adam',
metrics = ['acc'])
return model
model = KerasClassifier(build_fn = create_model(), verbose = 1)
epochs = [10, 20, 30]
batch_size = [40, 60, 80, 100]
param_grid = dict(batch_size = batch_size, epochs = epochs)
grid = GridSearchCV(estimator = model, param_grid = param_grid, n_jobs = 3,error_score = "raise", cv = 3, scoring = "accuracy")
results = grid.fit(x_train, y_train)
I keep getting this error and I don't know what I am doing wrong. I need to give x and y variables but it says that the function takes one argument:
The above exception was the direct cause of the following exception:
TypeError Traceback (most recent call last)
Input In [57], in <cell line: 1>()
----> 1 results = grid.fit(x_train, y_train)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sklearn/model_selection/_search.py:875, in BaseSearchCV.fit(self, X, y, groups, **fit_params)
869 results = self._format_results(
870 all_candidate_params, n_splits, all_out, all_more_results
871 )
873 return results
--> 875 self._run_search(evaluate_candidates)
-> 1389 evaluate_candidates(ParameterGrid(self.param_grid))
BaseSearchCV.fit.<locals>.evaluate_candidates(candidate_params, cv, more_results)
--> 822 out = parallel(
823 delayed(_fit_and_score)(
824 clone(base_estimator),
825 X,
826 y,
827 train=train,
828 test=test,
829 parameters=parameters,
830 split_progress=(split_idx, n_splits),
831 candidate_progress=(cand_idx, n_candidates),
832 **fit_and_score_kwargs,
833 )
--> 1098 self.retrieve()
--> 975 self._output.extend(job.get(timeout=self.timeout))
976 else:
977 self._output.extend(job.get())
--> 567 return future.result(timeout=timeout)
--> 439 return self.__get_result()
--> 391 raise self._exception
392 finally:
393 # Break a reference cycle with the exception in self._exception
394 self = None
TypeError: TargetReshaper.transform() takes 1 positional argument but 2 were given
Can someone highlight what is wrong?
I'm trying to train my first CNN. I split the training images into train and validation data by randomly choosing indices and using Subset and DataLoader. The validation and training splits don't have any of the same indices, so that's not the problem. They also cover the entire dataset.
train = datasets.ImageFolder('train_images', transform=transform)
val_split = random.sample(range(len(img_sizes)), int(0.1 * len(img_sizes)))
train_split = [x for x in range(len(img_sizes)) if x not in val_split]
train_data = Subset(train, train_split)
val_data = Subset(train, val_split)
train_loader = DataLoader(train_data, batch_size = 10, shuffle = True)
val_loader = DataLoader(val_data, batch_size = 10, shuffle = False)
However, when I try to enumerate through the train_loader, I get this index out of range error:
IndexError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_8652\2928585573.py in <module>
14 # Run the training batches
---> 15 for b, (X_train, y_train) in enumerate(train_loader):
17 # Apply the model
D:\dum\envs\pytorchenv\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
558 if self.num_workers == 0: # same-process loading
559 indices = next(self.sample_iter) # may raise StopIteration
--> 560 batch = self.collate_fn([self.dataset[i] for i in indices])
561 if self.pin_memory:
562 batch = _utils.pin_memory.pin_memory_batch(batch)
D:\dum\envs\pytorchenv\lib\site-packages\torch\utils\data\dataloader.py in <listcomp>(.0)
558 if self.num_workers == 0: # same-process loading
559 indices = next(self.sample_iter) # may raise StopIteration
--> 560 batch = self.collate_fn([self.dataset[i] for i in indices])
561 if self.pin_memory:
562 batch = _utils.pin_memory.pin_memory_batch(batch)
D:\dum\envs\pytorchenv\lib\site-packages\torch\utils\data\dataset.py in __getitem__(self, idx)
106 def __getitem__(self, idx):
--> 107 return self.dataset[self.indices[idx]]
109 def __len__(self):
D:\dum\envs\pytorchenv\lib\site-packages\torchvision\datasets\folder.py in __getitem__(self, index)
129 tuple: (sample, target) where target is class_index of the target class.
130 """
--> 131 path, target = self.samples[index]
132 sample = self.loader(path)
133 if self.transform is not None:
IndexError: list index out of range
Anyone know what the problem is?
I am trying to create a variable with a dynamic shape and updating it using SGD. Without momentum, the following code works :-
import tensorflow as tf
x = tf.Variable(tf.random.normal((32,3)), shape=[None,3])
with tf.GradientTape() as tape:
y = tf.reduce_sum(x)
grads = tape.gradient(y, x)
opt = tf.keras.optimizers.SGD(0.01)
opt.apply_gradients([[grads, x]])
But, the replacing the line opt = tf.keras.optimizers.SGD(0.01) with opt = tf.keras.optimizers.SGD(0.01, momentum=0.9) throws an error -
<ipython-input-6-66726ccd04f3> in <module>()
9 grads = tape.gradient(y, x)
10 opt = tf.keras.optimizers.SGD(0.01, momentum=0.9)
---> 11 opt.apply_gradients([[grads, x]])
5 frames
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/optimizer_v2.py in apply_gradients(self, grads_and_vars, name, experimental_aggregate_gradients)
637 # Create iteration if necessary.
638 with tf.init_scope():
--> 639 self._create_all_weights(var_list)
641 if not grads_and_vars:
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/optimizer_v2.py in _create_all_weights(self, var_list)
823 _ = self.iterations
824 self._create_hypers()
--> 825 self._create_slots(var_list)
827 def __getattribute__(self, name):
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/gradient_descent.py in _create_slots(self, var_list)
117 if self._momentum:
118 for var in var_list:
--> 119 self.add_slot(var, "momentum")
121 def _prepare_local(self, var_device, var_dtype, apply_state):
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/optimizer_v2.py in add_slot(self, var, slot_name, initializer, shape)
913 dtype=var.dtype,
914 trainable=False,
--> 915 initial_value=initial_value)
916 backend.track_variable(weight)
917 slot_dict[slot_name] = weight
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/traceback_utils.py in error_handler(*args, **kwargs)
151 except Exception as e:
152 filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153 raise e.with_traceback(filtered_tb) from None
154 finally:
155 del filtered_tb
/usr/local/lib/python3.7/dist-packages/keras/initializers/initializers_v2.py in __call__(self, shape, dtype, **kwargs)
143 if _PARTITION_SHAPE in kwargs:
144 shape = kwargs[_PARTITION_SHAPE]
--> 145 return tf.zeros(shape, dtype)
ValueError: Cannot convert a partially known TensorShape (None, 3) to a Tensor.
How can I resolve this?
Instead of taking shape as (20,3) in tf.GradientTape you can consider shape (20,3) while initializing the variable.
import tensorflow as tf
x = tf.Variable(tf.random.normal((20,3)))
with tf.GradientTape() as tape:
y = tf.reduce_sum(x)
grads = tape.gradient(y, x)
opt = tf.keras.optimizers.SGD(0.01)
opt.apply_gradients([[grads, x]])
The output of the above code is: <tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>
import tensorflow as tf
x = tf.Variable(tf.random.normal((20,3)))
with tf.GradientTape() as tape:
y = tf.reduce_sum(x)
grads = tape.gradient(y, x)
opt = tf.keras.optimizers.SGD(0.01,momentum=0.9)
opt.apply_gradients([[grads, x]])
The output of the above code is: <tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>
I previously was able to run the search method of keras tuner on my model with GPU runtime of Google colab. But when I switched to the TPU runtime, I get the following error. I haven't been able to come to the conclusion of how to access a google cloud storage for the TPU runtime to save the checkpoint folder that the keras tuner saves model checkpoints in. I also don't know how to do it and I'm getting the following error. Please help me resolve this issue.
My code:
def post_se(hp):
ip = Input(shape=(6, 128))
x = Masking()(ip)
x = LSTM(units=hp.Choice('lstm_1', values = [8,16,32,64,128,256,512]),return_sequences=True)(x)
x = Dropout(hp.Choice(name='Dropout', values = [0.0,0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]))(x)
x = LSTM(units=hp.Choice('lstm_2', values = [8,16,32,64,128,256,512]))(x)
x = Dropout(hp.Choice(name='Dropout_2', values = [0.0,0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]))(x)
y = Permute((2, 1))(ip)
y = Conv1D(hp.Choice('conv_1_filter', values = [32,64,128,256,512]), hp.Choice(name='conv_1_filter_size', values = [3,5,7,8,9]), padding='same', kernel_initializer='he_uniform')(y)
y = BatchNormalization()(y)
y = Activation('relu')(y)
y = squeeze_excite_block(y)
y = Conv1D(hp.Choice('conv_2_filter', values = [32,64,128,256,512]), hp.Choice(name='conv_2_filter_size',values = [3,5,7,8,9]), padding='same', kernel_initializer='he_uniform')(y)
y = BatchNormalization()(y)
y = Activation('relu')(y)
y = squeeze_excite_block(y)
y = Conv1D(hp.Choice('conv_3_filter', values = [32,64,128,256,512,]), hp.Choice(name='conv_3_filter_size',values = [3,5,7,8,9]), padding='same', kernel_initializer='he_uniform')(y)
y = BatchNormalization()(y)
y = Activation('relu')(y)
y = GlobalAveragePooling1D()(y)
x = concatenate([x,y])
# batch_size = hp.Choice('batch_size', values=[32, 64, 128, 256, 512, 1024, 2048, 4096])
out = Dense(num_classes, activation='softmax')(x)
model = Model(ip, out)
if gpu:
opt = keras.optimizers.Adam(learning_rate=0.001)
if tpu:
opt = keras.optimizers.Adam(learning_rate=8*0.001)
model.compile(optimizer=opt, loss='categorical_crossentropy',metrics=['accuracy'])
# model.summary()
return model
if gpu:
tuner = kt.tuners.BayesianOptimization(post_se,
# Will stop training if the "val_loss" hasn't improved in 30 epochs.
tuner.search(X_train, train_label, epochs=200, validation_split=0.1, shuffle=True, callbacks=[tensorflow.keras.callbacks.EarlyStopping('val_loss', patience=30)])
if tpu:
with strategy.scope():
tuner = kt.tuners.BayesianOptimization(post_se,
# Will stop training if the "val_loss" hasn't improved in 30 epochs.
tuner.search(X_train, train_label, epochs=200, validation_split=0.1, shuffle=True, callbacks=[tensorflow.keras.callbacks.EarlyStopping('val_loss', patience=30)])
The error log
UnimplementedError Traceback (most recent call last)
/usr/lib/python3.7/contextlib.py in __exit__(self, type, value, traceback)
129 try:
--> 130 self.gen.throw(type, value, traceback)
131 except StopIteration as exc:
10 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py in resource_creator_scope(resource_type, resource_creator)
2957 resource_creator):
-> 2958 yield
<ipython-input-15-24c1e1bb603d> in <module>()
17 # Will stop training if the "val_loss" hasn't improved in 30 epochs.
---> 18 tuner.search(X_train, train_label, epochs=200, validation_split=0.1, shuffle=True, callbacks=[tensorflow.keras.callbacks.EarlyStopping('val_loss', patience=30)])
/usr/local/lib/python3.7/dist-packages/keras_tuner/engine/base_tuner.py in search(self, *fit_args, **fit_kwargs)
178 self.on_trial_begin(trial)
--> 179 results = self.run_trial(trial, *fit_args, **fit_kwargs)
180 # `results` is None indicates user updated oracle in `run_trial()`.
/usr/local/lib/python3.7/dist-packages/keras_tuner/engine/tuner.py in run_trial(self, trial, *args, **kwargs)
303 copied_kwargs["callbacks"] = callbacks
--> 304 obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
/usr/local/lib/python3.7/dist-packages/keras_tuner/engine/tuner.py in _build_and_fit_model(self, trial, *args, **kwargs)
233 model = self._try_build(hp)
--> 234 return self.hypermodel.fit(hp, model, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras_tuner/engine/hypermodel.py in fit(self, hp, model, *args, **kwargs)
136 """
--> 137 return model.fit(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py in _numpy(self)
1116 except core._NotOkStatusException as e: # pylint: disable=protected-access
-> 1117 raise core._status_to_exception(e) from None # pylint: disable=protected-access
UnimplementedError: File system scheme '[local]' not implemented (file: './untitled_project/trial_78ed6883514d67dc6222064095c134cb/checkpoints/epoch_0/checkpoint_temp/part-00000-of-00001')
Encountered when executing an operation using EagerExecutor. This error cancels all future operations and poisons their output tensors.
During handling of the above exception, another exception occurred:
IndexError Traceback (most recent call last)
<ipython-input-15-24c1e1bb603d> in <module>()
16 seed=42)
17 # Will stop training if the "val_loss" hasn't improved in 30 epochs.
---> 18 tuner.search(X_train, train_label, epochs=200, validation_split=0.1, shuffle=True, callbacks=[tensorflow.keras.callbacks.EarlyStopping('val_loss', patience=30)])
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in __exit__(self, exception_type, exception_value, traceback)
454 "tf.distribute.set_strategy() out of `with` scope."),
455 e)
--> 456 _pop_per_thread_mode()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribution_strategy_context.py in _pop_per_thread_mode()
65 def _pop_per_thread_mode():
---> 66 ops.get_default_graph()._distribution_strategy_stack.pop(-1) # pylint: disable=protected-access
IndexError: pop from empty list
For some extra info, I am attaching my code in this post.
This is your error:
UnimplementedError: File system scheme '[local]' not implemented (file: './untitled_project/trial_78ed6883514d67dc6222064095c134cb/checkpoints/epoch_0/checkpoint_temp/part-00000-of-00001')
See https://stackoverflow.com/a/62881833/14043558 for a solution.
I am trying to learn CNN with my own data. The shape of the data is (1224, 15, 23). 1224 is the number of data, and each data is (15, 23). CNN is built with PyTorch.
I think there is no logical error because conv2D needs 4-D tensor, and I feed (batch, channel, x, y).
when I build an instance of the Net class I got this error.
TypeError: argument 0 is not a Variable
I have been using PyTroch for half of a year but this error is the first time and I am still confused.
Here is my code.
class Net(nn.Module):
def __init__(self, n):
self.conv = nn.Sequential(nn.Conv2d(1, 32, kernel_size=3, stride=1),
nn.Conv2d(32, 64, kernel_size=3, stride=1),
nn.Conv2d(64, 64, kernel_size=3, stride=1), # 64 x 9 x 17
conv_out_size = self._get_conv_out(input_shape)
self.fc = nn.Sequential(nn.Linear(64 * 9 * 17, 128),
nn.Linear(128, n)
def _get_conv_out(self, shape):
o = self.conv(torch.zeros(1, *shape))
return int(np.prod(o.size()))
def forward(self, x):
conv_out = self.conv(x).view(x.size()[0], -1)
return sefl.fc(conv_out)
if __name__=='__main__':
num_epochs = 1
num_classes = 2
input_shape = train_img[0].shape # 1, 15, 23
net = Net(num_classes)
iteration = 51
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)
loss_list= []
batch_index = 0
# train
for epoch in range(num_epochs):
for i in range(iteration):
input_img = torch.FloatTensor(train_img[batch_index: batch_index + BATCH_SIZE])
print(input_img.size()) # 24, 1, 15, 23
outputs = net(input_img)
loss = criterion(outputs, labels)
# Backprop
And the error message:
TypeError Traceback (most recent call last)
<ipython-input-179-0f6bc7588c29> in <module>
4 input_shape = train_img[0].shape # 1, 15, 23
----> 6 net = Net(num_classes)
7 iteration = 51
<ipython-input-178-8a68d4a0dc4a> in __init__(self, n)
11 )
---> 13 conv_out_size = self._get_conv_out(input_shape)
14 self.fc = nn.Sequential(nn.Linear(64 * 9 * 17, 128),
15 nn.ReLU(),
<ipython-input-178-8a68d4a0dc4a> in _get_conv_out(self, shape)
19 def _get_conv_out(self, shape):
---> 20 o = self.conv(torch.zeros(1, *shape))
21 return int(np.prod(o.size()))
C:\DTools\Anaconda3\envs\tensorflow\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
355 result = self._slow_forward(*input, **kwargs)
356 else:
--> 357 result = self.forward(*input, **kwargs)
358 for hook in self._forward_hooks.values():
359 hook_result = hook(self, input, result)
C:\DTools\Anaconda3\envs\tensorflow\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
65 def forward(self, input):
66 for module in self._modules.values():
---> 67 input = module(input)
68 return input
C:\DTools\Anaconda3\envs\tensorflow\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
355 result = self._slow_forward(*input, **kwargs)
356 else:
--> 357 result = self.forward(*input, **kwargs)
358 for hook in self._forward_hooks.values():
359 hook_result = hook(self, input, result)
C:\DTools\Anaconda3\envs\tensorflow\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
280 def forward(self, input):
281 return F.conv2d(input, self.weight, self.bias, self.stride,
--> 282 self.padding, self.dilation, self.groups)
C:\DTools\Anaconda3\envs\tensorflow\lib\site-packages\torch\nn\functional.py in conv2d(input, weight, bias, stride, padding, dilation, groups)
88 _pair(0), groups, torch.backends.cudnn.benchmark,
89 torch.backends.cudnn.deterministic, torch.backends.cudnn.enabled)
---> 90 return f(input, weight, bias)
TypeError: argument 0 is not a Variable
Your code actually works for PyTorch >= 0.4.1. I guess your PyTorch version is < 0.4 and so you need to pass a Variable in the following line.
o = conv(torch.autograd.Variable(torch.zeros(1, *x.shape)))
In PyTorch >= 0.4.1, the concept of Variable no longer exists. So, torch.FloatTensor can be directly passed to NN layers.
def acc(output, label):
correct_preds = output.argmax(axis=1) == label.astype('float32')
return correct_preds.mean().asscalar()
for epoch in range(10):
train_loss, train_acc, valid_acc = 0., 0., 0.
tic = time()
for data, label in train_data:
data = data.copyto(mx.cpu(0))
label = label.copyto(mx.cpu(0))
with autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
train_loss += loss.mean().asscalar()
train_acc += acc(output, label)
When running this part I get the error and my dataset is in pascol voc format
Traceback (most recent call last)
<ipython-input-7-9926ba7deb21> in <module>()
12 label = label.copyto(mx.cpu(0))
13 with autograd.record():
---> 14 output = net(data)
15 loss = softmax_cross_entropy(output, label)
/home/manasi/.local/lib/python2.7/site-packages/mxnet/gluon/block.pyc in __call__(self, *args)
539 hook(self, args)
--> 541 out = self.forward(*args)
543 for hook in self._forward_hooks.values():
/home/manasi/.local/lib/python2.7/site-packages/mxnet/gluon/nn/basic_layers.pyc in forward(self, x)
51 def forward(self, x):
52 for block in self._children.values():
---> 53 x = block(x)
54 return x
/home/manasi/.local/lib/python2.7/site-packages/mxnet/gluon/block.pyc in __call__(self, *args)
539 hook(self, args)
--> 541 out = self.forward(*args)
543 for hook in self._forward_hooks.values():
/home/manasi/.local/lib/python2.7/site-packages/mxnet/gluon/block.pyc in forward(self, x, *args)
911 params = {i: j.data(ctx) for i, j in self._reg_params.items()}
912 except DeferredInitializationError:
--> 913 self._deferred_infer_shape(x, *args)
914 for _, i in self.params.items():
915 i._finish_deferred_init()
/home/manasi/.local/lib/python2.7/site-packages/mxnet/gluon/block.pyc in _deferred_infer_shape(self, *args)
792 error_msg = "Deferred initialization failed
because shape"\
793 " cannot be inferred. {}".format(e)
--> 794 raise ValueError(error_msg)
796 def _call_cached_op(self, *args):
ValueError: Deferred initialization failed because shape cannot be inferred. Error in operator conv2_fwd: [10:56:15] src/operator/nn/convolution.cc:196: Check failed: dilated_ksize_x <= AddPad(dshape[3], param_.pad[1]) (5 vs. 3) kernel size exceed input
kernel size exceed input error is usually seen when your input image is too small for the network. You either need to resize your input image, or change the network architecture to remove layers that reduce the spatial dimensions of the feature maps (e.g. pooling layers, or convolution with stride).