I have produced a naive implementation of "erosion". The performance is not relevant since I just trying to understand the algorithm. However, the output of my implementation does not match the one I get from scipy.ndimage. What is wrong with my implementation ?
Here is my implementation with a small test case:
import numpy as np
from PIL import Image
# a small image to play with a cross structuring element
imgmat = np.array([
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0],
])
imgmat2 = np.where(imgmat == 0, 0, 255).astype(np.uint8)
imarr = Image.fromarray(imgmat2).resize((100, 200))
imarr = np.array(imgrrr)
imarr = np.where(imarr == 0, 0, 1)
se_mat3 = np.array([
[0,1,0],
[1,1,1],
[0,1,0]
])
se_mat31 = np.where(se_mat3 == 1, 0, 1)
The imarr is .
My implementation of erosion:
%%cython -a
import numpy as np
cimport numpy as cnp
cdef erosionC(cnp.ndarray[cnp.int_t, ndim=2] img,
cnp.ndarray[cnp.int_t, ndim=2] B, cnp.ndarray[cnp.int_t, ndim=2] X):
"""
X: image coordinates
struct_element_mat: black and white image, black region is considered as the shape
of structuring element
This operation checks whether (B *includes* X) = $B \subset X$
as per defined in
Serra (Jean), « Introduction to mathematical morphology »,
Computer Vision, Graphics, and Image Processing,
vol. 35, nᵒ 3 (septembre 1986).
URL : https://linkinghub.elsevier.com/retrieve/pii/0734189X86900022..
doi: 10.1016/0734-189X(86)90002-2
Consulted le 6 août 2020, p. 283‑305.
"""
cdef cnp.ndarray[cnp.int_t, ndim=1] a, x, bx
cdef cnp.ndarray[cnp.int_t, ndim=2] Bx, B_frame, Xcp, b
cdef bint check
a = B[0] # get an anchor point from the structuring element coordinates
B_frame = B - a # express the se element coordinates in with respect to anchor point
Xcp = X.copy()
b = img.copy()
for x in X: # X contains the foreground coordinates in the image
Bx = B_frame + x # translate relative coordinates with respect to foreground coordinates considering it as the anchor point
check = True # this is erosion so if any of the se coordinates is not in foreground coordinates we consider it a miss
for bx in Bx: # Bx contains all the translated coordinates of se
if bx not in Xcp:
check = False
if check:
b[x[0], x[1]] = 1 # if there is a hit
else:
b[x[0], x[1]] = 0 # if there is no hit
return b
def erosion(img: np.ndarray, struct_el_mat: np.ndarray, foregroundValue = 0):
B = np.argwhere(struct_el_mat == 0)
X = np.argwhere(img == foregroundValue)
nimg = erosionC(img, B, X)
return np.where(nimg == 1, 255, 0)
The calling code for both is:
from scipy import ndimage as nd
err = nd.binary_erosion(imarr, se_mat3)
imerrCustom = erosion(imarr, se_mat31, foregroundValue=1)
err produces
imerrCustom produces
In the end, I am still not sure about it, but after having read several papers more, I assume that my interpretation of X as foreground coordinates was an error. It should have probably been the entire image that is being iterated.
As I have stated I am not sure if this interpretation is correct as well. But I made a new implementation which iterates over the image, and it gives a more plausible result. I am sharing it in here, hoping that it might help someone:
%%cython -a
import numpy as np
cimport numpy as cnp
cdef dilation_c(cnp.ndarray[cnp.uint8_t, ndim=2] X,
cnp.ndarray[cnp.uint8_t, ndim=2] SE):
"""
X: boolean image
SE: structuring element matrix
origin: coordinate of the origin of the structuring element
This operation checks whether (B *hits* X) = $B \cap X \not = \emptyset$
as per defined in
Serra (Jean), « Introduction to mathematical morphology »,
Computer Vision, Graphics, and Image Processing,
vol. 35, nᵒ 3 (septembre 1986).
URL : https://linkinghub.elsevier.com/retrieve/pii/0734189X86900022..
doi: 10.1016/0734-189X(86)90002-2
Consulted le 6 août 2020, p. 283‑305.
The algorithm adapts DILDIRECT of
Najman (Laurent) et Talbot (Hugues),
Mathematical morphology: from theory to applications,
2013. ISBN : 9781118600788, p. 329
to the formula given in
Jähne (Bernd),
Digital image processing,
6th rev. and ext. ed, Berlin ; New York,
2005. TA1637 .J34 2005.
ISBN : 978-3-540-24035-8.
"""
cdef cnp.ndarray[cnp.uint8_t, ndim=2] O
cdef list elst
cdef int r, c, X_rows, X_cols, SE_rows, SE_cols, se_r, se_c
cdef cnp.ndarray[cnp.int_t, ndim=1] bp
cdef list conds
cdef bint check, b, p, cond
O = np.zeros_like(X)
X_rows, X_cols = X.shape[:2]
SE_rows, SE_cols = SE.shape[:2]
# a boolean convolution
for r in range(0, X_rows-SE_rows):
for c in range(0, X_cols - SE_cols):
conds = []
for se_r in range(SE_rows):
for se_c in range(SE_cols):
b = <bint>SE[se_r, se_c]
p = <bint>X[se_r+r, se_c+c]
conds.append(b and p)
O[r,c] = <cnp.uint8_t>any(conds)
return O
def dilation_erosion(
img: np.ndarray,
struct_el_mat: np.ndarray,
foregroundValue: int = 1,
isErosion: bool = False):
"""
img: image matrix
struct_el: NxN mesh grid of the structuring element whose center is SE's origin
structuring element is encoded as 1
foregroundValue: value to be considered as foreground in the image
"""
B = struct_el_mat.astype(np.uint8)
if isErosion:
X = np.where(img == foregroundValue, 0, 1).astype(np.uint8)
else:
X = np.where(img == foregroundValue, 1, 0).astype(np.uint8)
nimg = dilation_c(X, B)
foreground, background = (255, 0) if foregroundValue == 1 else (0, 1)
if isErosion:
return np.where(nimg == 1, background, foreground).astype(np.uint8)
else:
return np.where(nimg == 1, foreground, background).astype(np.uint8)
# return nimg
I am doing some work using image processing and sparse coding. Problem is, the following code works only on some images.
Here is the image that it works perfectly on:
And here is the image that it loops forever on:
Here is the code:
import cv2
import numpy as np
import networkx as nx
from preproc import Preproc
# From https://github.com/vicariousinc/science_rcn/blob/master/science_rcn/learning.py
def sparsify(bu_msg, suppress_radius=3):
"""Make a sparse representation of the edges by greedily selecting features from the
output of preprocessing layer and suppressing overlapping activations.
Parameters
----------
bu_msg : 3D numpy.ndarray of float
The bottom-up messages from the preprocessing layer.
Shape is (num_feats, rows, cols)
suppress_radius : int
How many pixels in each direction we assume this filter
explains when included in the sparsification.
Returns
-------
frcs : see train_image.
"""
frcs = []
img = bu_msg.max(0) > 0
while True:
r, c = np.unravel_index(img.argmax(), img.shape)
print(r, c)
if not img[r, c]:
break
frcs.append((bu_msg[:, r, c].argmax(), r, c))
img[r - suppress_radius:r + suppress_radius + 1,
c - suppress_radius:c + suppress_radius + 1] = False
return np.array(frcs)
if __name__ == '__main__':
img = cv2.imread('https://i.stack.imgur.com/Nb08A.png', 0)
img2 = cv2.imread('https://i.stack.imgur.com/2MW93.png', 0)
prp = Preproc()
bu_msg = prp.fwd_infer(img)
frcs = sparsify(bu_msg)
and the accompanying preprocessing code:
"""A pre-processing layer of the RCN model. See Sec S8.1 for details.
"""
import numpy as np
from scipy.ndimage import maximum_filter
from scipy.ndimage.filters import gaussian_filter
from scipy.signal import fftconvolve
class Preproc(object):
"""
A simplified preprocessing layer implementing Gabor filters and suppression.
Parameters
----------
num_orients : int
Number of edge filter orientations (over 2pi).
filter_scale : float
A scale parameter for the filters.
cross_channel_pooling : bool
Whether to pool across neighboring orientation channels (cf. Sec S8.1.4).
Attributes
----------
filters : [numpy.ndarray]
Kernels for oriented Gabor filters.
pos_filters : [numpy.ndarray]
Kernels for oriented Gabor filters with all-positive values.
suppression_masks : numpy.ndarray
Masks for oriented non-max suppression.
"""
def __init__(self,
num_orients=16,
filter_scale=2.,
cross_channel_pooling=False):
self.num_orients = num_orients
self.filter_scale = filter_scale
self.cross_channel_pooling = cross_channel_pooling
self.suppression_masks = generate_suppression_masks(filter_scale=filter_scale,
num_orients=num_orients)
def fwd_infer(self, img, brightness_diff_threshold=18.):
"""Compute bottom-up (forward) inference.
Parameters
----------
img : numpy.ndarray
The input image.
brightness_diff_threshold : float
Brightness difference threshold for oriented edges.
Returns
-------
bu_msg : 3D numpy.ndarray of float
The bottom-up messages from the preprocessing layer.
Shape is (num_feats, rows, cols)
"""
filtered = np.zeros((len(self.filters),) + img.shape, dtype=np.float32)
for i, kern in enumerate(self.filters):
filtered[i] = fftconvolve(img, kern, mode='same')
localized = local_nonmax_suppression(filtered, self.suppression_masks)
# Threshold and binarize
localized *= (filtered / brightness_diff_threshold).clip(0, 1)
localized[localized < 1] = 0
if self.cross_channel_pooling:
pooled_channel_weights = [(0, 1), (-1, 1), (1, 1)]
pooled_channels = [-np.ones_like(sf) for sf in localized]
for i, pc in enumerate(pooled_channels):
for channel_offset, factor in pooled_channel_weights:
ch = (i + channel_offset) % self.num_orients
pos_chan = localized[ch]
if factor != 1:
pos_chan[pos_chan > 0] *= factor
np.maximum(pc, pos_chan, pc)
bu_msg = np.array(pooled_channels)
else:
bu_msg = localized
# Setting background to -1
bu_msg[bu_msg == 0] = -1.
return bu_msg
#property
def filters(self):
return get_gabor_filters(
filter_scale=self.filter_scale, num_orients=self.num_orients, weights=False)
#property
def pos_filters(self):
return get_gabor_filters(
filter_scale=self.filter_scale, num_orients=self.num_orients, weights=True)
def get_gabor_filters(size=21, filter_scale=4., num_orients=16, weights=False):
"""Get Gabor filter bank. See Preproc for parameters and returns."""
def _get_sparse_gaussian():
"""Sparse Gaussian."""
size = 2 * np.ceil(np.sqrt(2.) * filter_scale) + 1
alt = np.zeros((int(size), int(size)), np.float32)
alt[int(size // 2), int(size // 2)] = 1
gaussian = gaussian_filter(alt, filter_scale / np.sqrt(2.), mode='constant')
gaussian[gaussian < 0.05 * gaussian.max()] = 0
return gaussian
gaussian = _get_sparse_gaussian()
filts = []
for angle in np.linspace(0., 2 * np.pi, num_orients, endpoint=False):
acts = np.zeros((size, size), np.float32)
x, y = np.cos(angle) * filter_scale, np.sin(angle) * filter_scale
acts[int(size / 2 + y), int(size / 2 + x)] = 1.
acts[int(size / 2 - y), int(size / 2 - x)] = -1.
filt = fftconvolve(acts, gaussian, mode='same')
filt /= np.abs(filt).sum() # Normalize to ensure the maximum output is 1
if weights:
filt = np.abs(filt)
filts.append(filt)
return filts
def generate_suppression_masks(filter_scale=4., num_orients=16):
"""
Generate the masks for oriented non-max suppression at the given filter_scale.
See Preproc for parameters and returns.
"""
size = 2 * int(np.ceil(filter_scale * np.sqrt(2))) + 1
cx, cy = size // 2, size // 2
filter_masks = np.zeros((num_orients, size, size), np.float32)
# Compute for orientations [0, pi), then flip for [pi, 2*pi)
for i, angle in enumerate(np.linspace(0., np.pi, num_orients // 2, endpoint=False)):
x, y = np.cos(angle), np.sin(angle)
for r in range(1, int(np.sqrt(2) * size / 2)):
dx, dy = round(r * x), round(r * y)
if abs(dx) > cx or abs(dy) > cy:
continue
filter_masks[i, int(cy + dy), int(cx + dx)] = 1
filter_masks[i, int(cy - dy), int(cx - dx)] = 1
filter_masks[num_orients // 2:] = filter_masks[:num_orients // 2]
return filter_masks
def local_nonmax_suppression(filtered, suppression_masks, num_orients=16):
"""
Apply oriented non-max suppression to the filters, so that only a single
orientated edge is active at a pixel. See Preproc for additional parameters.
Parameters
----------
filtered : numpy.ndarray
Output of filtering the input image with the filter bank.
Shape is (num feats, rows, columns).
Returns
-------
localized : numpy.ndarray
Result of oriented non-max suppression.
"""
localized = np.zeros_like(filtered)
cross_orient_max = filtered.max(0)
filtered[filtered < 0] = 0
for i, (layer, suppress_mask) in enumerate(zip(filtered, suppression_masks)):
competitor_maxs = maximum_filter(layer, footprint=suppress_mask, mode='nearest')
localized[i] = competitor_maxs <= layer
localized[cross_orient_max > filtered] = 0
return localized
The problem I found was that np.unravel_index returns all the positions of features for the first image, whereas it only returns (0, 0) continuously for the second. My hypothesis is that it is either a problem with the preprocessing code, or it is a bug in the np.unravel_index function itself, but I am not too sure.
Okay, so turns out there is an underlying problem when calling argmax on the image. I rewrote the sparsification script to not use argmax and it works exactly the same. It should now work with any image.
def sparsify(bu_msg, suppress_radius=3):
"""Make a sparse representation of the edges by greedily selecting features from the
output of preprocessing layer and suppressing overlapping activations.
Parameters
----------
bu_msg : 3D numpy.ndarray of float
The bottom-up messages from the preprocessing layer.
Shape is (num_feats, rows, cols)
suppress_radius : int
How many pixels in each direction we assume this filter
explains when included in the sparsification.
Returns
-------
frcs : see train_image.
"""
frcs = []
img = bu_msg.max(0) > 0
for (r, c), _ in np.ndenumerate(img):
if img[r, c]:
frcs.append((bu_msg[:, r, c].argmax(), r, c))
img[r - suppress_radius:r + suppress_radius + 1,
c - suppress_radius:c + suppress_radius + 1] = False
return np.array(frcs)