Related
I am attempting to quantize a model that is being used as a chess engine.
the input is a np array of ints
array([[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 1],
[0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 1, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 1, 0],
[0, 0, 0, 1, 0, 1, 0, 0],
[1, 1, 1, 0, 1, 1, 1, 0],
[1, 0, 1, 0, 1, 1, 0, 1]],
[[1, 0, 0, 0, 1, 0, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]]], dtype=int16)
with the output giving an evaluation of the board between 0 and 1. I would like to quantize this but no matter what I add to the converter I cannot change the dtype of
interpreter.get_output_details()[0]['dtype']
from tf.float32
def representative_dataset():
for y in y_train:
yield {
"eval": y,
}
keras_model = tf.keras.models.load_model('model.h5')
converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
#converter.representative_dataset = representative_dataset #if not commente
converter.inference_input_type = tf.uint8
#should only quantize the fixed params like weights
tflite_quant_model = converter.convert()
If I only have the line: converter.inference_input_type = tf.uint8
I get the error ValueError: The inference_input_type and
inference_output_type must be tf.float32.
If I have the line
converter.representative_dataset = representative_dataset
and I use tf.uint8 I get the error
KeyError: 'input_1'
with the traceback
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_18712/1100160250.py in <module>
6 #should only quantize the fixed params like weights
7
----> 8 tflite_quant_model = converter.convert()
packages\tensorflow\lite\python\lite.py in wrapper(self, *args, **kwargs)
931 def wrapper(self, *args, **kwargs):
932 # pylint: disable=protected-access
--> 933 return self._convert_and_export_metrics(convert_func, *args, **kwargs)
934 # pylint: enable=protected-access
935
packages\tensorflow\lite\python\lite.py in _convert_and_export_metrics(self, convert_func, *args, **kwargs)
909 self._save_conversion_params_metric()
910 start_time = time.process_time()
--> 911 result = convert_func(self, *args, **kwargs)
912 elapsed_time_ms = (time.process_time() - start_time) * 1000
913 if result:
-packages\tensorflow\lite\python\lite.py in convert(self)
1340 Invalid quantization parameters.
1341 """
-> 1342 saved_model_convert_result = self._convert_as_saved_model()
1343 if saved_model_convert_result:
1344 return saved_model_convert_result
packages\tensorflow\lite\python\lite.py in _convert_as_saved_model(self)
1322 self._convert_keras_to_saved_model(temp_dir))
1323 if self.saved_model_dir:
-> 1324 return super(TFLiteKerasModelConverterV2,
1325 self).convert(graph_def, input_tensors, output_tensors)
1326 finally:
packages\tensorflow\lite\python\lite.py in convert(self, graph_def, input_tensors, output_tensors)
1139 **converter_kwargs)
1140
-> 1141 return self._optimize_tflite_model(
1142 result, self._quant_mode, quant_io=self.experimental_new_quantizer)
1143
packages\tensorflow\lite\python\convert_phase.py in wrapper(*args, **kwargs)
213 except Exception as error:
214 report_error_message(str(error))
--> 215 raise error from None # Re-throws the exception.
216
217 return wrapper
packages\tensorflow\lite\python\convert_phase.py in wrapper(*args, **kwargs)
203 def wrapper(*args, **kwargs):
204 try:
--> 205 return func(*args, **kwargs)
206 except ConverterError as converter_error:
207 if converter_error.errors:
packages\tensorflow\lite\python\lite.py in _optimize_tflite_model(self, model, quant_mode, quant_io)
869 q_bias_type = quant_mode.bias_type()
870 q_allow_float = quant_mode.is_allow_float()
--> 871 model = self._quantize(model, q_in_type, q_out_type, q_activations_type,
872 q_bias_type, q_allow_float)
873
packages\tensorflow\lite\python\lite.py in _quantize(self, result, input_type, output_type, activations_type, bias_type, allow_float)
611 custom_op_registerers_by_func)
612 if self._experimental_calibrate_only or self.experimental_new_quantizer:
--> 613 calibrated = calibrate_quantize.calibrate(
614 self.representative_dataset.input_gen)
615
packages\tensorflow\lite\python\convert_phase.py in wrapper(*args, **kwargs)
213 except Exception as error:
214 report_error_message(str(error))
--> 215 raise error from None # Re-throws the exception.
216
217 return wrapper
packages\tensorflow\lite\python\convert_phase.py in wrapper(*args, **kwargs)
203 def wrapper(*args, **kwargs):
204 try:
--> 205 return func(*args, **kwargs)
206 except ConverterError as converter_error:
207 if converter_error.errors:
packages\tensorflow\lite\python\optimize\calibrator.py in calibrate(self, dataset_gen)
224 dataset_gen: A generator that generates calibration samples.
225 """
--> 226 self._feed_tensors(dataset_gen, resize_input=True)
227 return self._calibrator.Calibrate()
\tensorflow\lite\python\optimize\calibrator.py in _feed_tensors(self, dataset_gen, resize_input)
108 self._interpreter = Interpreter(model_content=self._model_content)
109 signature_key = None
--> 110 input_array = self._create_input_array_from_dict(None, sample)
111 elif isinstance(sample, list):
112 signature_key = None
\tensorflow\lite\python\optimize\calibrator.py in _create_input_array_from_dict(self, signature_key, inputs)
84 key=lambda item: item[1]["index"])
85 for input_name, _ in input_details:
---> 86 input_array.append(inputs[input_name])
87 return input_array
88
KeyError: 'input_1'
I was thinking this could be something wrong with my representative_dataset function but am unsure what to do here. I am really hoping to not convert my board representations to float32s
I have the following 2 x 2 matrix
1 0
1 1
I want to expand this matrix with dimensions in powers of 2. For example the matrix with dimension 4 would look like:
1 0 0 0
1 1 0 0
1 0 1 0
1 1 1 1
Essentially, I want to retain the original matrix wherever 1 occurs in the base matrix and fill up zeros where 0 occurs in the base matrix? Is there a fast way to do this in numpy or scipy? I want to be able to expand this to any power of 2, say 512 or 1024.
For relatively small values of the powers of 2 (say up to 10), you can recursively replace every 1 with the inital matrix a using numpy block:
import numpy as np
a = np.array([[1, 0], [1, 1]])
def generate(a, k):
z = np.zeros_like(a)
result = a.copy()
for _ in range(1, k):
result = eval(f"np.block({str(result.tolist()).replace('1', 'a').replace('0', 'z')})")
return result
Example for k=3 (8x8 result matrix) generate(a, 3):
array([[1, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 0, 0, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 1, 0, 0, 0],
[1, 1, 0, 0, 1, 1, 0, 0],
[1, 0, 1, 0, 1, 0, 1, 0],
[1, 1, 1, 1, 1, 1, 1, 1]])
You can combine tile and repeat.
>>> np.tile(arr, (2, 2))
array([[1, 0, 1, 0],
[1, 1, 1, 1],
[1, 0, 1, 0],
[1, 1, 1, 1]]
>>> np.repeat(np.repeat(arr, 2, axis=1), 2, axis=0)
array([[1, 1, 0, 0],
[1, 1, 0, 0],
[1, 1, 1, 1],
[1, 1, 1, 1]])
Then just multiply:
def tile_mask(a):
tiled = np.tile(a, (2, 2))
mask = np.repeat(
np.repeat(a, 2, axis=1),
2, axis=0
)
return tiled * mask
>>> tile_mask(arr)
array([[1, 0, 0, 0],
[1, 1, 0, 0],
[1, 0, 1, 0],
[1, 1, 1, 1]])
I don't know of a good way to do this for higher powers besides recursion though:
def tile_mask(a, n=2):
if n > 2:
a = tile_mask(a, n-1)
tiled = np.tile(a, (2, 2))
mask = np.repeat(
np.repeat(a, 2, axis=1),
2, axis=0
)
return tiled * mask
>>> tile_mask(arr, 3)
array([[1, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 0, 0, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 1, 0, 0, 0],
[1, 1, 0, 0, 1, 1, 0, 0],
[1, 0, 1, 0, 1, 0, 1, 0],
[1, 1, 1, 1, 1, 1, 1, 1]])
I have a array S:
S = array([[980, 100],
[ 3, 5]])
I need to resize him or fill a zeros array to size (6,6). My desire output is:
out = array([[980, 100, 0, 0, 0, 0],
[3, 5, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0]], dtype=int32)
Anyone can help?
I figure it out.
Create a zeros matrix to desired size matrix
zeros = np.zeros((6,6))
your array
array = np.array([[1,2,5,6],[3,4,4,3],[5,6,2,8]])
#getting shape
lenx, leny = array.shape
fill the zeros matrix with your array
zeros[:lenx,:leny] = array
I have a numpy array with 1s & 0s (or bools if that's easier)
I would like to find the distance from each 1 its closest 'edge' (an edge is where a 1 meets a 0).
Toy example:
Original array:
array([[0, 0, 0, 0],
[0, 1, 1, 1],
[0, 1, 1, 1],
[0, 1, 1, 1]])
Result:
array([[0, 0, 0, 0],
[0, 1, 1, 1],
[0, 1, 2, 1],
[0, 1, 1, 1]])
If possible, I'd like to use the 'cityblock' distance, but that's lower priority
Thanks!
Here's a vectorized approach using binary_erosion & cdist(..'cityblock') -
from scipy.ndimage.morphology import binary_erosion
from scipy.spatial.distance import cdist
def dist_from_edge(img):
I = binary_erosion(img) # Interior mask
C = img - I # Contour mask
out = C.astype(int) # Setup o/p and assign cityblock distances
out[I] = cdist(np.argwhere(C), np.argwhere(I), 'cityblock').min(0) + 1
return out
Sample run -
In [188]: img.astype(int)
Out[188]:
array([[0, 0, 0, 0, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1],
[0, 0, 1, 1, 1, 1, 1],
[0, 0, 0, 1, 0, 0, 0]])
In [189]: dist_from_edge(img)
Out[189]:
array([[0, 0, 0, 0, 1, 0, 0],
[0, 1, 1, 1, 2, 1, 0],
[0, 1, 2, 2, 3, 2, 1],
[0, 1, 2, 3, 2, 2, 1],
[0, 0, 1, 2, 1, 1, 1],
[0, 0, 0, 1, 0, 0, 0]])
Here's an input, output on a human blob -
Here's one way you can do this with scipy.ndimage.distance_transform_cdt (or scipy.ndimage.distance_transform_bf):
import numpy as np
from scipy.ndimage import distance_transform_cdt
def distance_from_edge(x):
x = np.pad(x, 1, mode='constant')
dist = distance_transform_cdt(x, metric='taxicab')
return dist[1:-1, 1:-1]
For example:
In [327]: a
Out[327]:
array([[0, 0, 0, 0],
[0, 1, 1, 1],
[0, 1, 1, 1],
[0, 1, 1, 1]])
In [328]: distance_from_edge(a)
Out[328]:
array([[0, 0, 0, 0],
[0, 1, 1, 1],
[0, 1, 2, 1],
[0, 1, 1, 1]], dtype=int32)
In [329]: x
Out[329]:
array([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]])
In [330]: distance_from_edge(x)
Out[330]:
array([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 2, 2, 2, 2, 1, 0, 0, 0, 1, 0, 0],
[1, 2, 3, 3, 2, 1, 0, 0, 1, 2, 1, 0],
[1, 2, 3, 3, 2, 1, 0, 0, 0, 1, 0, 0],
[1, 2, 3, 3, 2, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 2, 1, 0, 0, 0, 1, 1, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 2, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]], dtype=int32)
If you don't pad the array with zeros, you get the distance to the nearest 0 in the array:
In [335]: distance_transform_cdt(a, metric='taxicab')
Out[335]:
array([[0, 0, 0, 0],
[0, 1, 1, 1],
[0, 1, 2, 2],
[0, 1, 2, 3]], dtype=int32)
In [336]: distance_transform_cdt(x, metric='taxicab')
Out[336]:
array([[6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0],
[5, 5, 4, 3, 2, 1, 0, 0, 0, 1, 0, 0],
[4, 4, 4, 3, 2, 1, 0, 0, 1, 2, 1, 0],
[3, 3, 4, 3, 2, 1, 0, 0, 0, 1, 0, 0],
[2, 2, 3, 3, 2, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 2, 1, 0, 0, 0, 1, 1, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 2, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2]], dtype=int32)
Here a different method that uses scipy.ndimage.binary_erosion. I wrote this before I discovered the distance transform function. I'm sure there are much more efficient methods, but this should work reasonably well for images that are not too big.
import numpy as np
from scipy.ndimage import binary_erosion
def distance_from_edge(x):
dist = np.zeros_like(x, dtype=int)
while np.count_nonzero(x) > 0:
dist += x # Assumes x is an array of 0s and 1s, or bools.
x = binary_erosion(x)
return dist
For example,
In [291]: a
Out[291]:
array([[0, 0, 0, 0],
[0, 1, 1, 1],
[0, 1, 1, 1],
[0, 1, 1, 1]])
In [292]: distance_from_edge(a)
Out[292]:
array([[0, 0, 0, 0],
[0, 1, 1, 1],
[0, 1, 2, 1],
[0, 1, 1, 1]])
In [293]: x
Out[293]:
array([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]])
In [294]: distance_from_edge(x)
Out[294]:
array([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 2, 2, 2, 2, 1, 0, 0, 0, 1, 0, 0],
[1, 2, 3, 3, 2, 1, 0, 0, 1, 2, 1, 0],
[1, 2, 3, 3, 2, 1, 0, 0, 0, 1, 0, 0],
[1, 2, 3, 3, 2, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 2, 1, 0, 0, 0, 1, 1, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 2, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]])
Is there TensorFlow native function that does unpooling for Deconvolutional Networks ?
I have written this in normal python, but it is getting complicated when want to translate it to TensorFlow as it's objects does not even support item assignment at the moment, and I think this is a great inconvenience with TF.
I don't think there is an official unpooling layer yet which is frustrating because you have to use image resize (bilinear interpolation or nearest neighbor) which is like an average unpooling operation and it's reaaaly slow. Look at the tf api in the section 'image' and you will find it.
Tensorflow has a maxpooling_with_argmax thing where you get you maxpooled output as well as the activation map which is nice as you could use it in an unpooling layer to preserve the 'lost' spacial information but it seems as there isn't such an unpooling operation that does it. I guess that they are planning to add it ... soon.
Edit: I found some guy on google discuss a week ago who seems to have implemented something like this but I personally haven't tried it yet.
https://github.com/ppwwyyxx/tensorpack/blob/master/tensorpack/models/pool.py#L66
There is a couple of tensorflow implementations here pooling.py
Namely:
1) unpool operation (source) that utilizes output of tf.nn.max_pool_with_argmax. Although please notice, that as of tensorflow 1.0 tf.nn.max_pool_with_argmax is GPU-only
2) upsample operation that mimics inverse of max-pooling by filling positions of unpooled region with either zeros or copies of max element.
Comparing to tensorpack it allows copies of elements instead of zeros and supports strides other than [2, 2].
No recompile, back-prop friendly.
Illustration:
I was searching for a maxunpooling operation and tried implementing it. I came up with some kind of hacky implementation for the gradient, as I was struggling with CUDA.
The code is here, you will need to build it from source with GPU support.
Below is a demo application. No warranties, though!
There also exists an open issue for this operation.
import tensorflow as tf
import numpy as np
def max_pool(inp, k=2):
return tf.nn.max_pool_with_argmax_and_mask(inp, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding="SAME")
def max_unpool(inp, argmax, argmax_mask, k=2):
return tf.nn.max_unpool(inp, argmax, argmax_mask, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding="SAME")
def conv2d(inp, name):
w = weights[name]
b = biases[name]
var = tf.nn.conv2d(inp, w, [1, 1, 1, 1], padding='SAME')
var = tf.nn.bias_add(var, b)
var = tf.nn.relu(var)
return var
def conv2d_transpose(inp, name, dropout_prob):
w = weights[name]
b = biases[name]
dims = inp.get_shape().dims[:3]
dims.append(w.get_shape()[-2]) # adpot channels from weights (weight definition for deconv has switched input and output channel!)
out_shape = tf.TensorShape(dims)
var = tf.nn.conv2d_transpose(inp, w, out_shape, strides=[1, 1, 1, 1], padding="SAME")
var = tf.nn.bias_add(var, b)
if not dropout_prob is None:
var = tf.nn.relu(var)
var = tf.nn.dropout(var, dropout_prob)
return var
weights = {
"conv1": tf.Variable(tf.random_normal([3, 3, 3, 16])),
"conv2": tf.Variable(tf.random_normal([3, 3, 16, 32])),
"conv3": tf.Variable(tf.random_normal([3, 3, 32, 32])),
"deconv2": tf.Variable(tf.random_normal([3, 3, 16, 32])),
"deconv1": tf.Variable(tf.random_normal([3, 3, 1, 16])) }
biases = {
"conv1": tf.Variable(tf.random_normal([16])),
"conv2": tf.Variable(tf.random_normal([32])),
"conv3": tf.Variable(tf.random_normal([32])),
"deconv2": tf.Variable(tf.random_normal([16])),
"deconv1": tf.Variable(tf.random_normal([ 1])) }
## Build Miniature CEDN
x = tf.placeholder(tf.float32, [12, 20, 20, 3])
y = tf.placeholder(tf.float32, [12, 20, 20, 1])
p = tf.placeholder(tf.float32)
conv1 = conv2d(x, "conv1")
maxp1, maxp1_argmax, maxp1_argmax_mask = max_pool(conv1)
conv2 = conv2d(maxp1, "conv2")
maxp2, maxp2_argmax, maxp2_argmax_mask = max_pool(conv2)
conv3 = conv2d(maxp2, "conv3")
maxup2 = max_unpool(conv3, maxp2_argmax, maxp2_argmax_mask)
deconv2 = conv2d_transpose(maxup2, "deconv2", p)
maxup1 = max_unpool(deconv2, maxp1_argmax, maxp1_argmax_mask)
deconv1 = conv2d_transpose(maxup1, "deconv1", None)
## Optimizing Stuff
loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(deconv1, y))
optimizer = tf.train.AdamOptimizer(learning_rate=1).minimize(loss)
## Test Data
np.random.seed(123)
batch_x = np.where(np.random.rand(12, 20, 20, 3) > 0.5, 1.0, -1.0)
batch_y = np.where(np.random.rand(12, 20, 20, 1) > 0.5, 1.0, 0.0)
prob = 0.5
with tf.Session() as session:
tf.set_random_seed(123)
session.run(tf.initialize_all_variables())
print "\n\n"
for i in range(10):
session.run(optimizer, feed_dict={x: batch_x, y: batch_y, p: prob})
print "step", i + 1
print "loss", session.run(loss, feed_dict={x: batch_x, y: batch_y, p: 1.0}), "\n\n"
Edit 29.11.17
Some time back, I reimplemented it in a clean fashion against TensorFlow 1.0, the forward operations are also available as CPU-version. You can find it in this branch, I recommend you looking up the last few commits if you want to use it.
Nowadays there's a Tensorflow Addon MaxUnpooling2D:
Unpool the outputs of a maximum pooling operation.
tfa.layers.MaxUnpooling2D(
pool_size: Union[int, Iterable[int]] = (2, 2),
strides: Union[int, Iterable[int]] = (2, 2),
padding: str = 'SAME',
**kwargs
)
This class can e.g. be used as
import tensorflow as tf
import tensorflow_addons as tfa
pooling, max_index = tf.nn.max_pool_with_argmax(input, 2, 2, padding='SAME')
unpooling = tfa.layers.MaxUnpooling2D()(pooling, max_index)
I checked this which shagas mentioned here and it is working.
x = [[[[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3]],
[[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3]],
[[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3],
[1, 1, 2,2, 3, 3]]]]
x = np.array(x)
inp = tf.convert_to_tensor(x)
out = UnPooling2x2ZeroFilled(inp)
out
Out[19]:
<tf.Tensor: id=36, shape=(1, 6, 12, 6), dtype=int64, numpy=
array([[[[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0]],
[[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0]],
[[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0],
[1, 1, 2, 2, 3, 3],
[0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0]]]])>
out1 = tf.keras.layers.MaxPool2D()(out)
out1
Out[37]:
<tf.Tensor: id=118, shape=(1, 3, 6, 6), dtype=int64, numpy=
array([[[[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3]],
[[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3]],
[[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3],
[1, 1, 2, 2, 3, 3]]]])>
If you need max unpooling then you can use (though I didn't check it) this one
Here it is my implementation. You should apply the max-pooling using tf.nn.max_pool_with_argmax and then pass the argmax result of tf.nn.max_pool_with_argmax
def unpooling(inputs, output_shape, argmax):
"""
Performs unpooling, as explained in:
https://www.oreilly.com/library/view/hands-on-convolutional-neural/9781789130331/6476c4d5-19f2-455f-8590-c6f99504b7a5.xhtml
:param inputs: Input Tensor.
:param output_shape: Desired output shape. For example, on 2D unpooling, this should be 4D (because of number of samples and channels).
:param argmax: Result argmax from tf.nn.max_pool_with_argmax
https://www.tensorflow.org/api_docs/python/tf/nn/max_pool_with_argmax
"""
flat_output_shape = tf.cast(tf.reduce_prod(output_shape), tf.int64)
updates = tf.reshape(inputs, [-1])
indices = tf.expand_dims(tf.reshape(argmax, [-1]), axis=-1)
ret = tf.scatter_nd(indices, updates, shape=[flat_output_shape])
ret = tf.reshape(ret, output_shape)
return ret
This has a small bug/feature that is that if argmax has a repeated value it will perform an addition instead of just putting the value once. Beware of this if stride is 1. I don't know, however, if this is desired or not.