Error resulting from ImageDataGenerator during data augmentation - tensorflow

Can someone please help me in fixing the error? The code works fine before the for loop. Before the for loop, an array of the image was printed. Is there something wrong with the for loop? The output should be a file stored with augmented images of the input image. The input image is a jpg image.
The code I wrote:
import keras
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=45,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='contrast',
cval=125
)
x = io.imread('mona.jpg')
x = x.reshape((1, ) + x.shape) #Array with shape (1, 256, 256, 3)
i = 0
for batch in data_gen.flow(x, batch_size=16, save_to_dir='/Users/ghad/Desktop',
save_prefix='aug',
save_format='jpg'):
i += 1
if i > 20:
The generated error:
RuntimeError Traceback (most recent call last)
Input In [14], in <cell line: 31>()
28 x = x.reshape((1, ) + x.shape) #Array with shape (1, 256, 256, 3)
30 i = 0
---> 31 for batch in data_gen.flow(x, batch_size=16,
32 save_to_dir='/Users/ghadahalhabib/Desktop',
33 save_prefix='aug',
34 save_format='jpg'):
35 i += 1
36 if i > 20:
File ~/opt/anaconda3/envs/tensorflow/lib/python3.9/site-packages/keras/preprocessing/image.py:148, in Iterator.__next__(self, *args, **kwargs)
147 def __next__(self, *args, **kwargs):
--> 148 return self.next(*args, **kwargs)
File ~/opt/anaconda3/envs/tensorflow/lib/python3.9/site-packages/keras/preprocessing/image.py:160, in Iterator.next(self)
157 index_array = next(self.index_generator)
158 # The transformation of images is not under thread lock
159 # so it can be done in parallel
--> 160 return self._get_batches_of_transformed_samples(index_array)
File ~/opt/anaconda3/envs/tensorflow/lib/python3.9/site-packages/keras/preprocessing/image.py:709, in NumpyArrayIterator._get_batches_of_transformed_samples(self, index_array)
707 x = self.x[j]
708 params = self.image_data_generator.get_random_transform(x.shape)
--> 709 x = self.image_data_generator.apply_transform(
710 x.astype(self.dtype), params)
711 x = self.image_data_generator.standardize(x)
712 batch_x[i] = x
File ~/opt/anaconda3/envs/tensorflow/lib/python3.9/site-packages/keras/preprocessing/image.py:1800, in ImageDataGenerator.apply_transform(self, x, transform_parameters)
1797 img_col_axis = self.col_axis - 1
1798 img_channel_axis = self.channel_axis - 1
-> 1800 x = apply_affine_transform(
1801 x,
1802 transform_parameters.get('theta', 0),
1803 transform_parameters.get('tx', 0),
1804 transform_parameters.get('ty', 0),
1805 transform_parameters.get('shear', 0),
1806 transform_parameters.get('zx', 1),
1807 transform_parameters.get('zy', 1),
1808 row_axis=img_row_axis,
1809 col_axis=img_col_axis,
1810 channel_axis=img_channel_axis,
1811 fill_mode=self.fill_mode,
1812 cval=self.cval,
1813 order=self.interpolation_order)
1815 if transform_parameters.get('channel_shift_intensity') is not None:
1816 x = apply_channel_shift(x,
1817 transform_parameters['channel_shift_intensity'],
1818 img_channel_axis)
File ~/opt/anaconda3/envs/tensorflow/lib/python3.9/site-packages/keras/preprocessing/image.py:2324, in apply_affine_transform(x, theta, tx, ty, shear, zx, zy, row_axis, col_axis, channel_axis, fill_mode, cval, order)
2321 final_affine_matrix = transform_matrix[:2, :2]
2322 final_offset = transform_matrix[:2, 2]
-> 2324 channel_images = [ndimage.interpolation.affine_transform( # pylint: disable=g-complex-comprehension
2325 x_channel,
2326 final_affine_matrix,
2327 final_offset,
2328 order=order,
2329 mode=fill_mode,
2330 cval=cval) for x_channel in x]
2331 x = np.stack(channel_images, axis=0)
2332 x = np.rollaxis(x, 0, channel_axis + 1)
File ~/opt/anaconda3/envs/tensorflow/lib/python3.9/site-packages/keras/preprocessing/image.py:2324, in <listcomp>(.0)
2321 final_affine_matrix = transform_matrix[:2, :2]
2322 final_offset = transform_matrix[:2, 2]
-> 2324 channel_images = [ndimage.interpolation.affine_transform( # pylint: disable=g-complex-comprehension
2325 x_channel,
2326 final_affine_matrix,
2327 final_offset,
2328 order=order,
2329 mode=fill_mode,
2330 cval=cval) for x_channel in x]
2331 x = np.stack(channel_images, axis=0)
2332 x = np.rollaxis(x, 0, channel_axis + 1)
File ~/opt/anaconda3/envs/tensorflow/lib/python3.9/site-packages/scipy/ndimage/interpolation.py:574, in affine_transform(input, matrix, offset, output_shape, output, order, mode, cval, prefilter)
572 npad = 0
573 filtered = input
--> 574 mode = _ni_support._extend_mode_to_code(mode)
575 matrix = numpy.asarray(matrix, dtype=numpy.float64)
576 if matrix.ndim not in [1, 2] or matrix.shape[0] < 1:
File ~/opt/anaconda3/envs/tensorflow/lib/python3.9/site-packages/scipy/ndimage/_ni_support.py:54, in _extend_mode_to_code(mode)
52 return 6
53 else:
---> 54 raise RuntimeError('boundary mode not supported')
RuntimeError: boundary mode not supported

for the code
for batch in data_gen.flow(x, batch_size=16, save_to_dir='/Users/ghad/Desktop', save_prefix='aug', save_format='jpg'):
you are inputting only a single image but asking to produce 16 augmented images. That won't work. Normal the length of x is LARGER than the batch size. Set the batch size to 1. That way you will produce 1 augment image each time you feed a new image into the generator

Related

AttributeError: 'Adam' object has no attribute 'get_weights'

I am pretty new to tensorflow Keras and there is a Problem Running Cross Validation that I could not fix. It all worked before I installed featurewiz (conda install -c conda-forge featurewiz).
from sklearn.model_selection import KFold, cross_validate, cross_val_score
from scikeras.wrappers import KerasClassifier
estimator = KerasClassifier(model, epochs=500, batch_size=10) #, verbose = 0
kfold = KFold(n_splits=5, shuffle=True)
results = cross_validate(estimator, X, y, cv=kfold, scoring=['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted'], return_train_score=True)
print(results)
Error:
WARNING:absl:Found untraced functions such as _update_step_xla while saving (showing 1 of 1). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: ram:///var/folders/c4/ywdtx99d1vl0ptsg1fy494_40000gn/T/tmpsuvxkjb9/assets
INFO:tensorflow:Assets written to: ram:///var/folders/c4/ywdtx99d1vl0ptsg1fy494_40000gn/T/tmpsuvxkjb9/assets
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
File ~/tensorflow-test/env/lib/python3.8/site-packages/joblib/parallel.py:862, in Parallel.dispatch_one_batch(self, iterator)
861 try:
--> 862 tasks = self._ready_batches.get(block=False)
863 except queue.Empty:
864 # slice the iterator n_jobs * batchsize items at a time. If the
865 # slice returns less than that, then the current batchsize puts
(...)
868 # accordingly to distribute evenly the last items between all
869 # workers.
File ~/tensorflow-test/env/lib/python3.8/queue.py:167, in Queue.get(self, block, timeout)
166 if not self._qsize():
--> 167 raise Empty
168 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
Cell In[5], line 6
4 estimator = KerasClassifier(model, epochs=500, batch_size=10) #, verbose = 0
5 kfold = KFold(n_splits=5, shuffle=True) #seed, damit shuffle gleich bleibt , random_state=1337
----> 6 results = cross_validate(estimator, X, y, cv=kfold, scoring=['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted'], return_train_score=True)
8 print(results)
File ~/tensorflow-test/env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py:266, in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
263 # We clone the estimator to make sure that all the folds are
264 # independent, and that it is pickle-able.
265 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
--> 266 results = parallel(
267 delayed(_fit_and_score)(
268 clone(estimator),
269 X,
270 y,
271 scorers,
272 train,
273 test,
274 verbose,
275 None,
276 fit_params,
277 return_train_score=return_train_score,
278 return_times=True,
279 return_estimator=return_estimator,
280 error_score=error_score,
281 )
282 for train, test in cv.split(X, y, groups)
283 )
285 _warn_or_raise_about_fit_failures(results, error_score)
287 # For callabe scoring, the return type is only know after calling. If the
288 # return type is a dictionary, the error scores can now be inserted with
289 # the correct key.
File ~/tensorflow-test/env/lib/python3.8/site-packages/joblib/parallel.py:1085, in Parallel.__call__(self, iterable)
1076 try:
1077 # Only set self._iterating to True if at least a batch
1078 # was dispatched. In particular this covers the edge
(...)
1082 # was very quick and its callback already dispatched all the
1083 # remaining jobs.
1084 self._iterating = False
-> 1085 if self.dispatch_one_batch(iterator):
1086 self._iterating = self._original_iterator is not None
1088 while self.dispatch_one_batch(iterator):
File ~/tensorflow-test/env/lib/python3.8/site-packages/joblib/parallel.py:873, in Parallel.dispatch_one_batch(self, iterator)
870 n_jobs = self._cached_effective_n_jobs
871 big_batch_size = batch_size * n_jobs
--> 873 islice = list(itertools.islice(iterator, big_batch_size))
874 if len(islice) == 0:
875 return False
File ~/tensorflow-test/env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py:268, in <genexpr>(.0)
263 # We clone the estimator to make sure that all the folds are
264 # independent, and that it is pickle-able.
265 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
266 results = parallel(
267 delayed(_fit_and_score)(
--> 268 clone(estimator),
269 X,
270 y,
271 scorers,
272 train,
273 test,
274 verbose,
275 None,
276 fit_params,
277 return_train_score=return_train_score,
278 return_times=True,
279 return_estimator=return_estimator,
280 error_score=error_score,
281 )
282 for train, test in cv.split(X, y, groups)
283 )
285 _warn_or_raise_about_fit_failures(results, error_score)
287 # For callabe scoring, the return type is only know after calling. If the
288 # return type is a dictionary, the error scores can now be inserted with
289 # the correct key.
File ~/tensorflow-test/env/lib/python3.8/site-packages/sklearn/base.py:89, in clone(estimator, safe)
87 new_object_params = estimator.get_params(deep=False)
88 for name, param in new_object_params.items():
---> 89 new_object_params[name] = clone(param, safe=False)
90 new_object = klass(**new_object_params)
91 params_set = new_object.get_params(deep=False)
File ~/tensorflow-test/env/lib/python3.8/site-packages/sklearn/base.py:70, in clone(estimator, safe)
68 elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
69 if not safe:
---> 70 return copy.deepcopy(estimator)
71 else:
72 if isinstance(estimator, type):
File ~/tensorflow-test/env/lib/python3.8/copy.py:153, in deepcopy(x, memo, _nil)
151 copier = getattr(x, "__deepcopy__", None)
152 if copier is not None:
--> 153 y = copier(memo)
154 else:
155 reductor = dispatch_table.get(cls)
File ~/tensorflow-test/env/lib/python3.8/site-packages/scikeras/_saving_utils.py:117, in deepcopy_model(model, memo)
116 def deepcopy_model(model: keras.Model, memo: Dict[Hashable, Any]) -> keras.Model:
--> 117 _, (model_bytes, optimizer_weights) = pack_keras_model(model)
118 new_model = unpack_keras_model(model_bytes, optimizer_weights)
119 memo[model] = new_model
File ~/tensorflow-test/env/lib/python3.8/site-packages/scikeras/_saving_utils.py:108, in pack_keras_model(model)
106 optimizer_weights = None
107 if model.optimizer is not None:
--> 108 optimizer_weights = model.optimizer.get_weights()
109 model_bytes = np.asarray(memoryview(b.read()))
110 return (
111 unpack_keras_model,
112 (model_bytes, optimizer_weights),
113 )
AttributeError: 'Adam' object has no attribute 'get_weights'
I created a Tensorflow enviroment on my M1 Macbook following https://github.com/mrdbourke/m1-machine-learning-test.
It all worked, I got following results:
TensorFlow has access to the following devices:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
TensorFlow version: 2.11.0
I also installed featurewiz, I am not sure if there are some Problems installing it (I did conda install -c conda-forge featurewiz)
SciKeras doesn't work with TensorFlow 2.11. The TensorFlow team release a breaking change in a minor version bump (they removed the get_weights() method). It will be fixed in SciKeras soon: https://github.com/adriangb/scikeras/pull/287
Edit: that PR was merged so the new version of SciKeras (v0.10.0) should solve this issue.

ValueError: Invalid parameter n_estimators for estimator LogisticRegression(random_state=42)

I already looked at the other similar questions, but they did not help me. I'm attempting to use GridSearchCV. I'm using three pipelines to predict nfl play data. It works pretty well until the grid search part.
Here is my code.
pipe_nfl1_1 = Pipeline([
('ssc', StandardScaler()),
('lr', LogisticRegression(random_state=42))
])
pipe_nfl1_2 = Pipeline([
('mms', MinMaxScaler()),
('rfc', RandomForestClassifier(random_state=42))
])
pipe_nfl1_3 = Pipeline([
('mms', MinMaxScaler()),
('svc', svm.SVC(random_state=42))
])
pipelines1 = [pipe_nfl1_1, pipe_nfl1_2, pipe_nfl1_3]
pipe_dict1 = {0: 'Logistic Regression', 1: 'Random Forest', 2: 'SVC'}
for pipe in pipelines1:
pipe.fit(X_train1, y_train1)
print('Pipeline test accuracy for predicting 1st downs:')
for idx, val in enumerate(pipelines1):
print(' %s: %.4f' % (pipe_dict1[idx], val.score(X_test1, y_test1)))
best_acc1 = 0.0
best_clf1 = 0
best_pipe1 = ''
for idx, val in enumerate(pipelines1):
if val.score(X_test1, y_test1) > best_acc1:
best_acc1 = val.score(X_test1, y_test1)
best_pipe1 = val
best_clf1 = idx
best_acc1 *= 100
print('Classifier with best accuracy for predicting 1st downs is %s with %.2f' % (pipe_dict1[best_clf1], best_acc1) + '%')
param_grid1 = {
'lr__n_estimators': [2, 4, 6]
}
grid_search1 = GridSearchCV(pipe_nfl1_1, param_grid1, cv=2)
# fine-tune the hyperparameters
grid_search1.fit(X_train1, y_train1)
# get the best model
final_model1 = grid_search1.best_estimator_
grid_search.best_score_
But I'm getting an error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-33-6b0007d9b8f1> in <module>
2
3 # fine-tune the hyperparameters
----> 4 grid_search1.fit(X_train1, y_train1)
5
6 # get the best model
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
734 return results
735
--> 736 self._run_search(evaluate_candidates)
737
738 # For multi-metric evaluation, store the best_index_, best_params_ and
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1186 def _run_search(self, evaluate_candidates):
1187 """Search all candidates in param_grid"""
-> 1188 evaluate_candidates(ParameterGrid(self.param_grid))
1189
1190
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
706 n_splits, n_candidates, n_candidates * n_splits))
707
--> 708 out = parallel(delayed(_fit_and_score)(clone(base_estimator),
709 X, y,
710 train=train, test=test,
~\AppData\Local\Programs\Python\Python38\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1027 # remaining jobs.
1028 self._iterating = False
-> 1029 if self.dispatch_one_batch(iterator):
1030 self._iterating = self._original_iterator is not None
1031
~\AppData\Local\Programs\Python\Python38\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
845 return False
846 else:
--> 847 self._dispatch(tasks)
848 return True
849
~\AppData\Local\Programs\Python\Python38\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
763 with self._lock:
764 job_idx = len(self._jobs)
--> 765 job = self._backend.apply_async(batch, callback=cb)
766 # A job can complete so quickly than its callback is
767 # called before we get here, causing self._jobs to
~\AppData\Local\Programs\Python\Python38\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~\AppData\Local\Programs\Python\Python38\lib\site-packages\joblib\parallel.py in __call__(self)
250 # change the default number of processes to -1
251 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 252 return [func(*args, **kwargs)
253 for func, args, kwargs in self.items]
254
~\AppData\Local\Programs\Python\Python38\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
250 # change the default number of processes to -1
251 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 252 return [func(*args, **kwargs)
253 for func, args, kwargs in self.items]
254
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
518 cloned_parameters[k] = clone(v, safe=False)
519
--> 520 estimator = estimator.set_params(**cloned_parameters)
521
522 start_time = time.time()
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\pipeline.py in set_params(self, **kwargs)
139 self
140 """
--> 141 self._set_params('steps', **kwargs)
142 return self
143
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\utils\metaestimators.py in _set_params(self, attr, **params)
51 self._replace_estimator(attr, name, params.pop(name))
52 # 3. Step parameters and other initialisation arguments
---> 53 super().set_params(**params)
54 return self
55
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\base.py in set_params(self, **params)
259
260 for key, sub_params in nested_params.items():
--> 261 valid_params[key].set_params(**sub_params)
262
263 return self
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\base.py in set_params(self, **params)
247 key, delim, sub_key = key.partition('__')
248 if key not in valid_params:
--> 249 raise ValueError('Invalid parameter %s for estimator %s. '
250 'Check the list of available parameters '
251 'with `estimator.get_params().keys()`.' %
ValueError: Invalid parameter n_estimators for estimator LogisticRegression(random_state=42). Check the list of available parameters with `estimator.get_params().keys()`.
I've done LogisticRegression.get_params().keys() to get the keys, but it returns get_params() missing 1 required positional argument: 'self'.
You shouldn't have the leading underscores in the parameter names. You want your param_grid1 dict to consist of keys that are actually parameters accepted by the model you're using. That would be n_estimators for RandomForest, and C for LogisticRegression. With that said, n_estimators is a parameter for the model RandomForest, but it's not a parameter for LogisticRegression. C is a parameter for LogisticRegression.
I think what you want to do is a grid search over the parameter space of the model that performs best, right? In that case, your param_grid1 variable should be updated to the model that performs best. The parameters accepted by the models you're testing vary from model to model.

How to fix "Exception: Data must be 1-dimensional" error when running Kmeans

I have resolved all errors up till now. I am not quite sure I understand the problem except for I get the error "Exception: Data must be 1-dimensional".
Here is my code. Here is a link to the excel file im using.
import pandas as pd
import numpy as np
import warnings
from sklearn import preprocessing
from sklearn.preprocessing import LabelBinarizer
from sklearn.cluster import KMeans
df1 = pd.read_excel('PERM_Disclosure_Data_FY2018_EOYV2.xlsx', 'PERM_FY2018')
warnings.filterwarnings("ignore")
df1 = df1.dropna(subset=['PW_AMOUNT_9089'])
df1 = df1.dropna(subset=['CASE_STATUS'])
df1 = df1.dropna(subset=['PW_SOC_TITLE'])
df1.CASE_STATUS[df1['CASE_STATUS']=='Certified-Expired'] = 'Certified'
df1 = df1[df1.CASE_STATUS != 'Withdrawn']
df1 = df1.dropna()
df1 = df1[df1.PW_AMOUNT_9089 != '#############']
df1 = df1.dropna(subset=['PW_AMOUNT_9089'])
df1 = df1.dropna(subset=['CASE_STATUS'])
df1 = df1.dropna(subset=['PW_SOC_TITLE'])
df1.PW_AMOUNT_9089 = df1.PW_AMOUNT_9089.astype(float)
df1=df1.iloc[:, [2,4,5]]
enc = LabelBinarizer()
y = enc.fit_transform(df1.CASE_STATUS)[:, [0]]
at this point the output for y is an array:
array([[0],
[0],
[0],
...,
[1],
[1],
[0]])
then I define XZ
le = preprocessing.LabelEncoder()
X = df1.iloc[:, [1]]
Z = df1.iloc[:, [2]]
X2 = X.apply(le.fit_transform)
XZ = pd.concat([X2,Z], axis=1)
the output for XZ is:
PW_SOC_TITLE PW_AMOUNT_9089
12 176 60778.0
13 456 100901.0
14 134 134389.0
15 134 104936.0
16 134 95160.0
17 294 66976.0
18 73 38610.0
19 598 122533.0
20 220 109574.0
21 99 67850.0
22 399 132018.0
23 68 56118.0
24 139 136781.0
25 134 111405.0
26 598 58573.0
27 362 75067.0
28 598 85862.0
29 572 33301.0
30 598 112840.0
31 134 134971.0
32 176 100568.0
33 176 100568.0
34 626 19614.0
35 153 26354.0
36 405 79248.0
37 220 93350.0
38 139 153213.0
39 598 131997.0
40 598 131997.0
41 1 90438.0
... ... ...
119741 495 23005.0
119742 63 46030.0
119743 153 20301.0
119744 95 21965.0
119745 153 29890.0
119746 295 79680.0
119747 349 79498.0
119748 223 38930.0
119749 223 38930.0
119750 570 39160.0
119751 302 119392.0
119752 598 106001.0
119753 416 64230.0
119754 598 115482.0
119755 99 80205.0
119756 134 78329.0
119757 598 109325.0
119758 598 109325.0
119759 570 49770.0
119760 194 18117.0
119761 404 46987.0
119762 189 35131.0
119763 73 49900.0
119764 323 32240.0
119765 372 28122.0
119766 468 67974.0
119767 399 78520.0
119768 329 25875.0
119769 329 25875.0
119770 601 82098.0
I then continue:
from sklearn.model_selection import train_test_split
XZ_train, XZ_test, y_train, y_test = train_test_split(XZ, y,
test_size = .25,
random_state=20,
stratify=y )
# loading library
from pandas_ml import ConfusionMatrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
# instantiate learning model loop(k = i)
for weights in ['uniform', 'distance']:
for i in range(1,11,2):
knn = KNeighborsClassifier(n_neighbors=i, weights=weights)
# fitting the model
knn.fit(XZ_train, y_train)
# predict the response
pred = knn.predict(XZ_test)
confusion = ConfusionMatrix(y_test, pred)
if i<11:
# evaluate accuracy
print('Weight Measure:', knn.weights)
print('n_neighbors=', knn.n_neighbors)
print('Accuracy=', accuracy_score(y_test, pred))
#print('')
#print('Confusion Matrix')
#print(confusion)
print('-----------------------------')
The error I get is as follows:
G:\Anaconda\lib\site-packages\ipykernel_launcher.py:11: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
# This is added back by InteractiveShellApp.init_path()
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-20-bf6054d911ba> in <module>
12 # predict the response
13 pred = knn.predict(XZ_test)
---> 14 confusion = ConfusionMatrix(y_test, pred)
15 if i<11:
16 # evaluate accuracy
G:\Anaconda\lib\site-packages\pandas_ml\confusion_matrix\cm.py in __new__(cls, y_true, y_pred, *args, **kwargs)
21 if len(set(uniq_true) - set(uniq_pred)) == 0:
22 from pandas_ml.confusion_matrix.bcm import BinaryConfusionMatrix
---> 23 return BinaryConfusionMatrix(y_true, y_pred, *args, **kwargs)
24 return LabeledConfusionMatrix(y_true, y_pred, *args, **kwargs)
25
G:\Anaconda\lib\site-packages\pandas_ml\confusion_matrix\bcm.py in __init__(self, *args, **kwargs)
19 def __init__(self, *args, **kwargs):
20 # super(BinaryConfusionMatrix, self).__init__(y_true, y_pred)
---> 21 super(BinaryConfusionMatrix, self).__init__(*args, **kwargs)
22 assert self.len() == 2, \
23 "Binary confusion matrix must have len=2 but \
G:\Anaconda\lib\site-packages\pandas_ml\confusion_matrix\abstract.py in __init__(self, y_true, y_pred, labels, display_sum, backend, true_name, pred_name)
31 self._y_true.name = self.true_name
32 else:
---> 33 self._y_true = pd.Series(y_true, name=self.true_name)
34
35 if isinstance(y_pred, pd.Series):
G:\Anaconda\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
273 else:
274 data = _sanitize_array(data, index, dtype, copy,
--> 275 raise_cast_failure=True)
276
277 data = SingleBlockManager(data, index, fastpath=True)
G:\Anaconda\lib\site-packages\pandas\core\series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
4163 elif subarr.ndim > 1:
4164 if isinstance(data, np.ndarray):
-> 4165 raise Exception('Data must be 1-dimensional')
4166 else:
4167 subarr = com._asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
Is the data I am passing through not the correct type? The datatypes match the datatypes I've used in a past project so I thought I could replicate it here. For those wondering X is Company names that I encoded, Y is binarized case status, and Z is a wage amount in the float dtype.
"...the output for y is an array..." The array that you show is two-dimensional, with shape (n, 1). (One of the dimensions is trivial, but it is still 2-d.) Do something like y[:, 0] or y.ravel() to get a 1-d version.

Pytorch on google-colaboratory GPU - Illegal memory access

I am using pytorch(0.4.0) on google-colaboratory ( NVIDIA-SMI 396.44 Driver Version: 396.44)
When running my code outside any function, I am able to send pytorch tensors and model to the GPU :
...
model.cuda()
data_tensor = data_tensor.cuda()
...
And my CNN model is trained successfully with 98% accurancy.
But when I put the same code in a function,
def main(...):
....
model.cuda()
data_tensor= data_tensor.cuda()
...
if __name__ == "__main__":
main('...)
I have the following error:
cuda runtime error (77) : an illegal memory access was encountered at /pytorch/aten/src/THC/generic/THCTensorCopy.c:20
UPDATE(18/11/21):
It turned out that being part or not of a function is irrelevant. Usually, I have first a CUDNN_STATUS_EXECUTION_FAILED error then the second time a cuda runtime error (77) as shown below. But it sometimes works a few times before failing.
CUDNN_STATUS_EXECUTION_FAILED (first try) :
RuntimeError Traceback (most recent call last)
<ipython-input-27-53476e08e017> in <module>()
1 main('mnist', 'to', 'ndd', Xd=16, epo=5, bs=100, tXn=-1, vXn=300,
----> 2 lr=0.05, suf="s1", n_class=10, cuda=True)
<ipython-input-23-918584456207> in main(ds, framework, format, Xd, epo, bs, tXn, vXn, lr, suf, n_class, cuda)
12 opt = torch.optim.SGD(net.parameters(), lr)
13
---> 14 train(net, opt, Xd, epo, bs, cuda, tXn, tX, tT, vX, vT,lr)
15
<ipython-input-26-6b574a9e8af6> in train(model, optimizer, Xd, epo, bs, cuda, Xn, tX, tT, vX, vT, lr)
26 #t = t.cuda()
27 optimizer.zero_grad()
---> 28 z = model(x)
29 bat_loss = criterion(z, t)
30 bat_loss.backward()
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
<ipython-input-22-b4bc2e0b39b8> in forward(self, X)
10 H0 = torch.zeros(self.n_H, X.size(0), self.Wh)
11 C0 = torch.zeros(self.n_H, X.size(0), self.Wh)
---> 12 O, (Hn, Cn), = self.lstm1(X, (H0, C0))
13 O = self.linear1(O[:, -1, :])
14 return O
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
190 flat_weight=flat_weight
191 )
--> 192 output, hidden = func(input, self.all_weights, hx, batch_sizes)
193 if is_packed:
194 output = PackedSequence(output, batch_sizes)
/usr/local/lib/python3.6/dist-packages/torch/nn/_functions/rnn.py in forward(input, *fargs, **fkwargs)
321 func = decorator(func)
322
--> 323 return func(input, *fargs, **fkwargs)
324
325 return forward
/usr/local/lib/python3.6/dist-packages/torch/nn/_functions/rnn.py in forward(input, weight, hx, batch_sizes)
285 batch_first, dropout, train, bool(bidirectional),
286 list(batch_sizes.data) if variable_length else (),
--> 287 dropout_ts)
288
289 if cx is not None:
RuntimeError: CUDNN_STATUS_EXECUTION_FAILED
cuda runtime error (77) (other tries):
RuntimeError Traceback (most recent call last)
<ipython-input-28-53476e08e017> in <module>()
1 main('mnist', 'to', 'ndd', Xd=16, epo=5, bs=100, tXn=-1, vXn=300,
----> 2 lr=0.05, suf="s1", n_class=10, cuda=True)
<ipython-input-23-918584456207> in main(ds, framework, format, Xd, epo, bs, tXn, vXn, lr, suf, n_class, cuda)
12 opt = torch.optim.SGD(net.parameters(), lr)
13
---> 14 train(net, opt, Xd, epo, bs, cuda, tXn, tX, tT, vX, vT,lr)
15
<ipython-input-26-6b574a9e8af6> in train(model, optimizer, Xd, epo, bs, cuda, Xn, tX, tT, vX, vT, lr)
4 if cuda and torch.cuda.is_available():
5 print("tX type (before):", tX.type())
----> 6 model.cuda()
7 tX = tX.cuda()
8 tT = tT.cuda()
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in cuda(self, device)
247 Module: self
248 """
--> 249 return self._apply(lambda t: t.cuda(device))
250
251 def cpu(self):
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
174 def _apply(self, fn):
175 for module in self.children():
--> 176 module._apply(fn)
177
178 for param in self._parameters.values():
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in _apply(self, fn)
109
110 def _apply(self, fn):
--> 111 ret = super(RNNBase, self)._apply(fn)
112 self.flatten_parameters()
113 return ret
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
180 # Tensors stored in modules are graph leaves, and we don't
181 # want to create copy nodes, so we have to unpack the data.
--> 182 param.data = fn(param.data)
183 if param._grad is not None:
184 param._grad.data = fn(param._grad.data)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in <lambda>(t)
247 Module: self
248 """
--> 249 return self._apply(lambda t: t.cuda(device))
250
251 def cpu(self):
RuntimeError: cuda runtime error (77) : an illegal memory access was encountered at /pytorch/aten/src/THC/generic/THCTensorCopy.c:20
It now works with Pytorch 1.0 using:
!pip3 install https://download.pytorch.org/whl/cu80/torch-1.0.0-cp36-cp36m-linux_x86_64.whl

DBSCAN plot - The color values passed in plt.plot() is throwing ValueError

I am using DBSCAN to perform clustering on a dataset. I think it is because of the color argument passed to the markerfacecolor in plt.plot() which is not a single value. Please let me know if am wrong here. My features are latitude, longitude,speed_mph,speedlimit_mph,vehicle_id,driver_id.
Here is my clustering code
dbsc = DBSCAN(eps = .5, min_samples = 5).fit(df_cont)
labels = dbsc.labels_
print(labels)
num_clusters = len(set(labels))
clusters = pd.Series([df_cont[labels == n] for n in range(num_clusters)])
print('Number of clusters: {}'.format(num_clusters))
# No of clusters : 5687
core_samples = np.zeros_like(labels, dtype = bool)
core_samples[dbsc.core_sample_indices_] = True
unique_labels = np.unique(labels)
colors = plt.cm.Spectral(np.linspace(0,1, len(unique_labels)))
for (label, color) in zip(unique_labels, colors):
class_member_mask = (labels == label)
xy = df_cont[class_member_mask & core_samples]
print("color:",color)
# color: [ 0.61960784 0.00392157 0.25882353 1. ]
plt.plot(xy.values[:,0],xy.values[:,1], marker='o', markerfacecolor = color, markersize = 10)
xy2 = df_cont[class_member_mask & ~core_samples]
plt.plot(xy2.values[:,0],xy2.values[:,1], 'o', markerfacecolor = color, markersize = 5)
plt.title("DBSCAN Driver - Speed MPH")
plt.xlabel("driver")
plt.ylabel("Speed")
plt.show()
Here is the error message thrown
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-105-0192647e6baf> in <module>()
3 xy = df_cont[class_member_mask & core_samples]
4 print("color:",color)
----> 5 plt.plot(xy.values[:,0],xy.values[:,1], marker='o', markerfacecolor = color, markersize = 10)
6
7 xy2 = df_cont[class_member_mask & ~core_samples]
/home/radiance/anaconda3/lib/python3.6/site-packages/matplotlib/pyplot.py in plot(*args, **kwargs)
3315 mplDeprecation)
3316 try:
-> 3317 ret = ax.plot(*args, **kwargs)
3318 finally:
3319 ax._hold = washold
/home/radiance/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
1896 warnings.warn(msg % (label_namer, func.__name__),
1897 RuntimeWarning, stacklevel=2)
-> 1898 return func(ax, *args, **kwargs)
1899 pre_doc = inner.__doc__
1900 if pre_doc is None:
/home/radiance/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py in plot(self, *args, **kwargs)
1404 kwargs = cbook.normalize_kwargs(kwargs, _alias_map)
1405
-> 1406 for line in self._get_lines(*args, **kwargs):
1407 self.add_line(line)
1408 lines.append(line)
/home/radiance/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_base.py in _grab_next_args(self, *args, **kwargs)
405 return
406 if len(remaining) <= 3:
--> 407 for seg in self._plot_args(remaining, kwargs):
408 yield seg
409 return
/home/radiance/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_base.py in _plot_args(self, tup, kwargs)
393 ncx, ncy = x.shape[1], y.shape[1]
394 for j in xrange(max(ncx, ncy)):
--> 395 seg = func(x[:, j % ncx], y[:, j % ncy], kw, kwargs)
396 ret.append(seg)
397 return ret
/home/radiance/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_base.py in _makeline(self, x, y, kw, kwargs)
300 default_dict = self._getdefaults(None, kw)
301 self._setdefaults(default_dict, kw)
--> 302 seg = mlines.Line2D(x, y, **kw)
303 return seg
304
/home/radiance/anaconda3/lib/python3.6/site-packages/matplotlib/lines.py in __init__(self, xdata, ydata, linewidth, linestyle, color, marker, markersize, markeredgewidth, markeredgecolor, markerfacecolor, markerfacecoloralt, fillstyle, antialiased, dash_capstyle, solid_capstyle, dash_joinstyle, solid_joinstyle, pickradius, drawstyle, markevery, **kwargs)
418 self._markerfacecoloralt = None
419
--> 420 self.set_markerfacecolor(markerfacecolor)
421 self.set_markerfacecoloralt(markerfacecoloralt)
422 self.set_markeredgecolor(markeredgecolor)
/home/radiance/anaconda3/lib/python3.6/site-packages/matplotlib/lines.py in set_markerfacecolor(self, fc)
1204 if fc is None:
1205 fc = 'auto'
-> 1206 if self._markerfacecolor != fc:
1207 self.stale = True
1208 self._markerfacecolor = fc
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Also I tried to do clustering taking my lat, long with other features. DBSCAN throwed error that only two features are allowed. Should I ask this as a separate question?
dbsc = DBSCAN(eps = .5, min_samples = 5, algorithm='ball_tree', metric='haversine').fit(np.radians(df_cont))
The contents of df_cont are-
{'Day': [1, 1, 1, 1, 1],
'Month': [6, 6, 6, 6, 6],
'Year': [2015, 2015, 2015, 2015, 2015],
'driver_id': [5693, 5693, 916461, 1145487, 1145487],
'latitude': [34.640141, 34.64373, 34.551254, 35.613663, 35.614525],
'longitude': [-77.938721,
-77.9394,
-78.78463,
-78.470596,
-78.47466999999999],
'speed_mph': [64, 64, 1, 62, 61],
'speedlimit_mph': [70, 70, 55, 70, 70],
'vehicle_id': [1208979, 1208979, 1262441, 1280223, 1280223]}
I got the error fixed by using a scatter plot. plt.scatter(xy.values[:,0],xy.values[:,1],s=10,c=color,marke‌​r='o')