paintGL does not draw anything on the screen - pyqt5

I am trying to create PyQt5/ PyOpenGL code based on this example from the OpenGL Wiki.
The paintGL() call does not render anything to the screen. The StackOverflow questions with similar titles that I read describe different issues from what I am facing:
Note: I cannot post more than 2 links at this time due to rep limitations. I have added all related StackOverflow links as comments to this question.
I also went through the Common OpenGL Mistakes page and it does not seem to help (quite possibly I did not understand the page well).
The glGetErrors function reports GL.GL_NO_ERROR at all the below stages:
When creating the VAO binding
On creating the VBO bindings,
After calling glBindVertexArray and glEnableVertexAttribArray
The glGetShaderiv and glGetProgramiv return GL.GL_TRUE, so there do not seem to be compiling and linking errors.
The GL context seems to have been correctly created. The self.context().isValid() check returns True.
I am using PyQt5.5, Python 3.4.0 with PyOpenGL 3.1. Also, GPU Caps Viewer reports that Open GL 4.5 is available on my machine.
Any help or pointers to get moving is much appreciated.
The code is below.
from PyQt5 import QtGui, QtCore
import sys, array
from OpenGL import GL
# Create the Vertex Shader script and the Fragment Shader script here.
DeltaVertexShader = '''
#version 420 core
layout (location = 0) in vec4 vertexPositions;
layout (location = 1) in vec4 vertexColours;
smooth out vec4 fragmentColours;
void main()
{
gl_Position = vertexPositions;
fragmentColours = vertexColours;
}
'''
DeltaFragmentShader = '''
#version 420 core
smooth in vec4 fragmentColours;
out vec4 finalColours;
void main()
{
finalColours = fragmentColours;
}
'''
class PulleysWithWeights3D( QtGui.QOpenGLWindow ):
def __init__(self):
super().__init__()
# These are the 4D coordinates of a triangle that needs to be drawn.
_vertexPos = [ 0.075, 0.075, 0.075, 1.0, \
0.275, 0.275, 0.275, 1.0, \
0.550, 0.550, 0.550, 1.0 ]
# These are the RGBA colours for each vertex.
_vertexCols = [ 0.875, 0.525, 0.075, 0.500, \
0.875, 0.525, 0.075, 0.500, \
0.875, 0.525, 0.075, 0.500 ]
self._deltaPositions = array.array( 'f', _vertexPos )
self._deltaColours = array.array( 'f', _vertexCols )
self._appSurfaceFormat = QtGui.QSurfaceFormat()
self._appSurfaceFormat.setProfile( QtGui.QSurfaceFormat.CoreProfile )
self._appSurfaceFormat.setMajorVersion( 4 )
self._appSurfaceFormat.setMinorVersion( 2 )
self._appSurfaceFormat.setRenderableType( QtGui.QSurfaceFormat.OpenGL )
self._appSurfaceFormat.setSamples( 16 )
self._appSurfaceFormat.setSwapBehavior( QtGui.QSurfaceFormat.DoubleBuffer )
self.setSurfaceType( QtGui.QSurface.OpenGLSurface )
self.setFormat( self._appSurfaceFormat )
self.setIcon( QtGui.QIcon('OpenGL.png') )
self.setTitle( 'Pulleys3D' )
self.setMinimumSize( QtCore.QSize( 1280, 640 ) )
self.show()
def initializeGL(self):
GL.glClearColor( 0.0, 0.0, 0.00, 1.0 ) # RGBA
GL.glClearDepthf(1.0)
GL.glClearStencil(0)
GL.glClear( GL.GL_COLOR_BUFFER_BIT | GL.GL_DEPTH_BUFFER_BIT | GL.GL_STENCIL_BUFFER_BIT )
# Initialize a VAO.
self._vaoColouredDelta = GL.glGenVertexArrays(1)
GL.glBindVertexArray( self._vaoColouredDelta )
# Initialize the VBOs.
self._vboPositions = GL.glGenBuffers(1) # Create the position buffer.
self._vboShades = GL.glGenBuffers(1) # Create the colours buffer.
# Bind the positions VBO to the GL_ARRAY_BUFFER target.
GL.glBindBuffer( GL.GL_ARRAY_BUFFER, self._vboPositions )
# Initialize the positions buffer with the positions array data.
GL.glBufferData( GL.GL_ARRAY_BUFFER, self._deltaPositions.buffer_info()[1] * self._deltaPositions.itemsize, \
self._deltaPositions.tobytes(), GL.GL_STATIC_DRAW )
GL.glEnableVertexAttribArray(0) # Enable attribute array 0, i.e., the positions array.
GL.glVertexAttribPointer( 0, 4, GL.GL_FLOAT, GL.GL_FALSE, 0, None )
GL.glBindBuffer( GL.GL_ARRAY_BUFFER, 0 )
GL.glBindBuffer( GL.GL_ARRAY_BUFFER, self._vboShades )
GL.glBufferData( GL.GL_ARRAY_BUFFER, self._deltaColours.buffer_info()[1] * self._deltaColours.itemsize, \
self._deltaColours.tobytes(), GL.GL_STATIC_DRAW )
GL.glEnableVertexAttribArray(1) # Enable attribute array 1, i.e., the colours array.
GL.glVertexAttribPointer( 1, 4, GL.GL_FLOAT, GL.GL_FALSE, 0, None )
GL.glBindBuffer( GL.GL_ARRAY_BUFFER, 0 )
GL.glBindVertexArray(0) # Unbind the VAO.
# Initialize the vertex shader and fragment shader program objects.
_vertexShaderObj = GL.glCreateShader( GL.GL_VERTEX_SHADER )
_fragmentShaderObj = GL.glCreateShader( GL.GL_FRAGMENT_SHADER )
GL.glShaderSource( _vertexShaderObj, DeltaVertexShader )
GL.glShaderSource( _fragmentShaderObj, DeltaFragmentShader )
GL.glCompileShader( _vertexShaderObj )
GL.glCompileShader( _fragmentShaderObj )
if GL.glGetShaderiv( _vertexShaderObj, GL.GL_COMPILE_STATUS ) != GL.GL_TRUE:
# There was a compilation error. Print it.
print( 'Vertex Shader Compilation failed! Details follow.' )
print( 'Compilation errors are: %s ' %( str( GL.glGetShaderInfoLog( _vertexShaderObj ) ) ) )
# Delete this shader.
GL.glDeleteShader( _vertexShaderObj )
return # Don't continue execution.
elif GL.glGetShaderiv( _fragmentShaderObj, GL.GL_COMPILE_STATUS ) != GL.GL_TRUE:
print( 'Fragment Shader Compilation failed! Details follow.' )
print( 'Compilation errors are: %s ' %( str( GL.glGetShaderInfoLog( _fragmentShaderObj ) ) ) )
# Delete this shader.
GL.glDeleteShader( _fragmentShaderObj )
return # Don't continue execution.
self._renderProgram = GL.glCreateProgram()
GL.glAttachShader( self._renderProgram, _vertexShaderObj )
GL.glAttachShader( self._renderProgram, _fragmentShaderObj )
GL.glLinkProgram( self._renderProgram )
if GL.glGetProgramiv( self._renderProgram, GL.GL_LINK_STATUS ) != GL.GL_TRUE:
print( 'Shader linking failed! Details follow.' )
print( 'Linking errors are: %s ' %( str( GL.glGetProgramInfoLog( self._renderProgram ) ) ) )
GL.glDetachShader( self._renderProgram, _vertexShaderObj )
GL.glDetachShader( self._renderProgram, _fragmentShaderObj )
GL.glDeleteShader( _vertexShaderObj )
GL.glDeleteShader( _fragmentShaderObj )
# Delete the program.
GL.glDeleteProgram( self._renderProgram )
return # Don't continue execution.
# Always detach shaders after a successful link.
GL.glDetachShader( self._renderProgram, _vertexShaderObj )
GL.glDetachShader( self._renderProgram, _fragmentShaderObj )
def resizeGL(self, wd, ht ):
# Resize the viewport by calling glViewport with the new height and width.
GL.glViewport( 0, 0, wd, ht )
def paintGL(self):
GL.glClearColor( 0.0, 0.0, 0.00, 1.0 ) # RGBA
GL.glClearDepthf(1.0)
GL.glClearStencil(0)
GL.glClear( GL.GL_COLOR_BUFFER_BIT | GL.GL_DEPTH_BUFFER_BIT | GL.GL_STENCIL_BUFFER_BIT )
# Initialize the shader program to paint.
GL.glUseProgram( self._renderProgram )
GL.glBindVertexArray( self._vaoColouredDelta )
# Draw the array.
GL.glDrawArrays( GL.GL_TRIANGLES, 0, 3 )
# Swap the front and back buffers so the newly drawn content is visible.
self.context().swapBuffers( self )
# Unbind the vertex array and the program.
GL.glBindVertexArray(0)
GL.glUseProgram(0)
if __name__ == '__main__':
_pww3dApp = QtGui.QGuiApplication( sys.argv )
_pww3d = PulleysWithWeights3D()
sys.exit( _pww3dApp.exec_() )

The XYZ values used caused a triangle too small to be noticed to be rendered.
Scaling up the X, Y, Z values will result in the triangle being displayed. E.g.,
_vertexPos = [ 0.775, 0.775, 0.775, 1.0, \
0.275, 0.275, 0.275, 1.0, \
0.550, 0.550, 0.550, 1.0 ]

Related

Non Linear MPC optimization of a 2 dimensional drone

I am trying to simulate a drone on a 2-dimensional lunar surface. The drone can apply thrust the z-axis of the body, and the drone can change the angle of its body from -90 degrees to +90 degrees.
The first planned acceleration in the y direction that the MPC function gives is a negative value that exceeds the the lunar accel_g, which I set to be 1.635 m/s^2; thus, the drone cancels out the initial velocity really quickly. This should not happen since I set the constraints of body angle in such that the thrust will never be able to reduce the vertical velocity: vertical velocity of the drone should be reduced only by the lunar gravity. I can not find what is wrong with the code.
** is there a way I can apply rotation to the marker of the plot? I want to change the cross marker so that it can represent the changes in attitude. **
function run_mpc(initial_position, initial_velocity, initial_angle)
model = Model(Ipopt.Optimizer)
Δt = 0.1
num_time_steps = 20 # Change this -> Affects Optimization
max_acceleration_Thr = 3 # Max Thrust / Mass
max_pitch_angle = 90
accel_g = 1.635 # 1/6 of Earth G
des_pos = [-1,0]
#variables model begin
position[1:2, 1:num_time_steps]
velocity[1:2, 1:num_time_steps]
acceleration[1:2, 1:num_time_steps]
-max_pitch_angle <= angle[1:num_time_steps] <= max_pitch_angle
0 <= accel_Thr[1:num_time_steps] <= max_acceleration_Thr
end
# Dynamics constraints
#NLconstraint(model, [i=2:num_time_steps, j=[1]], acceleration[j, i] == accel_Thr[i-1]*sind(angle[i-1]))
#NLconstraint(model, [i=2:num_time_steps, j=[2]], acceleration[j, i] == (accel_Thr[i-1]*cosd(angle[i-1]))-accel_g)
#NLconstraint(model, [i=2:num_time_steps, j=1:2],
velocity[j, i] == velocity[j, i - 1] + (acceleration[j, i - 1]) * Δt)
#NLconstraint(model, [i=2:num_time_steps, j=1:2],
position[j, i] == position[j, i - 1] + velocity[j, i - 1] * Δt)
# Cost function: minimize final position and final velocity
# For Moving to [-2,0] with min. vertical velocity,
# sum(([-2,0]-position[:, end]).^2)+ sum(velocity[[2], end].^2)
#NLobjective(model, Min,
100 * sum((des_pos[i]-position[i, num_time_steps])^2 for i in 1:2)+ sum(velocity[i, num_time_steps]^2 for i in 1:2))
# Initial conditions:
#NLconstraint(model, [i=1:2], position[i, 1] == initial_position[i])
#NLconstraint(model, [i=1:2], velocity[i, 1] == initial_velocity[i])
#NLconstraint(model, angle[1] == initial_angle)
optimize!(model)
return value.(position), value.(velocity), value.(acceleration), value.(angle[2:end])
end;
begin
# The robot's starting position and velocity
q = [1.0, 0.0]
v = [-2.0, 2.0]
ang = 45
Δt = 0.1
# Recording Position, Acceleration, Attitude, Planned Positions
qs_x = []
qs_y = []
as_x = []
as_y = []
angs = []
q_plans = []
u_plans = []
anim = #animate for i in 1:90 # This determies the number of MPC to be run
# Plot the current position & Attitude
plot(label = "Drone",[q[1]], [q[2]], marker=(:rect, 10), xlim=(-2, 2), ylim=(-2, 2))
plot!(label = "Body Axis",[q[1]], [q[2]], marker=(:cross, 18, :grey))
push!(qs_x,q[1])
push!(qs_y,q[2])
# Run the MPC control optimization
q_plan, v_plan, u_plan, ang_plan = run_mpc(q, v, ang)
# Draw the planned future states from the MPC optimization
plot!(label = "Opt. Path", q_plan[1, :], q_plan[2, :], linewidth=5, arrow=true, c=:orange)
# Draw the planned acceleration
plot!(label = "Opt. Accel",u_plan[1, 1:2], u_plan[2, 1:2], linewidth=3, arrow=true, c=:red)
# Save Acceleration & Angle Data to csv
u = u_plan[:, 1]
push!(as_x, u[1])
push!(as_y, u[2])
push!(angs, ang)
push!(u_plans, u_plan)
# Apply the planned acceleration&Attitude and simulate one step in time
global ang = ang_plan[1]
global v += u * Δt
global q += v * Δt
end
gif(anim, "~/Downloads/NLmpc_angle.gif", fps=60)
end

PYQGIS: How to use QgsRasterFileWriter.writeRaster to create raster from numpy array

I am trying to use the method writeRaster from qgis.core.writeRaster to create a singleBand raster of float and Nans but according to the documentation, I need to provide theses inputs:
writeRaster(
self, # OK
pipe: QgsRasterPipe, # Q1
nCols: int, # OK
nRows: int, # OK
outputExtent: QgsRectangle, # Q2
crs: QgsCoordinateReferenceSystem, # OK
feedback: QgsRasterBlockFeedback = None # OK
) → QgsRasterFileWriter.WriterError
I have 2 questions here:
Q1: What is a QgsRasterPipe, how to use it and what is its purpose?
The documentation says: Constructor for QgsRasterPipe. Base class for processing modules.
Few examples online of writeRaster just initialize this object. So what do I need to provide in the argument pipe ?
Q2: The argument outputExtent of type QgsRectangle seems to be the bounding area of my raster: QgsRectangle(x_min, y_min, x_max, y_max). But here is my question: Where do I declare the values of pixels?
Here is the script (not working) I have for the moment:
import os
import numpy
from qgis.core import (
QgsMapLayer,
QgsRasterFileWriter,
QgsCoordinateReferenceSystem,
QgsRasterPipe,
)
def write_to_geotiff(data: list, filename: str, epsg: str, layer: str=None) -> None:
x_data = data[0]
y_data = data[1]
z_data = data[2]
nx, ny = len(x_data), len(y_data)
QgsRasterFileWriter.writeRaster(
QgsRasterPipe(),
nCols=nx,
nRows=ny,
QgsRectangle(
min(x_data),
min(y_data),
max(x_data),
max(y_data)
),
crs = QgsCoordinateReferenceSystem(f"epsg:{epsg}"),
)
if __name__ == "__main__":
filename = r"C:\Users\vince\Downloads\test.gpkg"
x_data = numpy.asarray([0, 1, 2])
y_data = numpy.asarray([0, 1])
z_data = numpy.asarray([
[0.1, numpy.nan],
[0.5, 139.5],
[150.98, numpy.nan],
])
epsg = "4326"
write_to_geotiff(
[x_data, y_data, z_data],
filename,
epsg
)
I saw this answer for Q1, the data is in the pipe variable. But I don t know how to create a qgsRasterBlock from my numpy array...
I get it using the method QgsRasterFileWriter.createOneBandRaster creating a provider.
You can get the bloc of the provider of type QgsRasterBlock and use the method setValue to associate values.
writer = QgsRasterFileWriter(filename)
provider = QgsRasterFileWriter.createOneBandRaster(
writer,
dataType=Qgis.Float32,
width=nx,
height=ny,
extent=QgsRectangle(
min(x_data),
min(y_data),
max(x_data),
max(y_data)
),
crs = QgsCoordinateReferenceSystem(f"epsg:{epsg}"),
)
provider.setNoDataValue(1, -1)
provider.setEditable(True)
block = provider.block(
bandNo=1,
boundingBox=provider.extent(),
width=provider.xSize(),
height=provider.ySize()
)
for ix in range(nx):
for iy in range(ny):
value = z_data[ix][iy]
if value == numpy.nan:
continue
block.setValue(iy, ix, value)
provider.writeBlock(
block=block,
band=1,
xOffset=0,
yOffset=0
)
provider.setEditable(False)
This will create a tiffile:

Multiple grouped charts with altair

My data has 4 attributes: dataset (D1/D2), model (M1/M2), layer (L1/L2), scene (S1/S2). I can make a chart grouped by scenes and then merge plots horizontally and vertically (pic above).
However, I would like to have 'double grouping' by scene and dataset, like merging the D1 and D2 plots by placing blue/orange bars from next to each other but with different opacity or pattern/hatch.
Basically something like this (pretend that the black traits are a hatch pattern).
Here is the code to reproduce the first plot
import numpy as np
import itertools
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import os
import altair as alt
alt.renderers.enable('altair_viewer')
np.random.seed(0)
################################################################################
model_keys = ['M1', 'M2']
data_keys = ['D1', 'D2']
scene_keys = ['S1', 'S2']
layer_keys = ['L1', 'L2']
ys = []
models = []
dataset = []
layers = []
scenes = []
for sc in scene_keys:
for m in model_keys:
for d in data_keys:
for l in layer_keys:
for s in range(10):
data_y = list(np.random.rand(10) / 10)
ys += data_y
scenes += [sc] * len(data_y)
models += [m] * len(data_y)
dataset += [d] * len(data_y)
layers += [l] * len(data_y)
# ------------------------------------------------------------------------------
df = pd.DataFrame({'Y': ys,
'Model': models,
'Dataset': dataset,
'Layer': layers,
'Scenes': scenes})
bars = alt.Chart(df, width=100, height=90).mark_bar().encode(
# field to group columns on
x=alt.X('Scenes:N',
title=None,
axis=alt.Axis(
grid=False,
title=None,
labels=False,
),
),
# field to use as Y values and how to calculate
y=alt.Y('Y:Q',
aggregate='mean',
axis=alt.Axis(
grid=True,
title='Y',
titleFontWeight='normal',
),
),
# field to use for sorting
order=alt.Order('Scenes',
sort='ascending',
),
# field to use for color segmentation
color=alt.Color('Scenes',
legend=alt.Legend(orient='bottom',
padding=-10,
),
title=None,
),
)
error_bars = alt.Chart(df).mark_errorbar(extent='ci').encode(
x=alt.X('Scenes:N'),
y=alt.Y('Y:Q'),
)
text = alt.Chart(df).mark_text(align='center',
baseline='line-bottom',
color='black',
dy=-5 # y-shift
).encode(
x=alt.X('Scenes:N'),
y=alt.Y('mean(Y):Q'),
text=alt.Text('mean(Y):Q', format='.1f'),
)
chart_base = bars + error_bars + text
chart_base = chart_base.facet(
# field to use to use as the set of columns to be represented in each group
column=alt.Column('Layer:N',
# header=alt.Header(
# labelFontStyle='bold',
# ),
title=None,
sort=list(set(models)), # get unique indices
),
spacing={"row": 0, "column": 15},
)
def unique(sequence):
seen = set()
return [x for x in sequence if not (x in seen or seen.add(x))]
for i, m in enumerate(unique(models)):
chart_imnet = chart_base.transform_filter(
alt.FieldEqualPredicate(field='Dataset', equal='D1'),
).transform_filter(
alt.FieldEqualPredicate(field='Model', equal=m)
)
chart_places = chart_base.transform_filter(
alt.FieldEqualPredicate(field='Dataset', equal='D2')
).transform_filter(
alt.FieldEqualPredicate(field='Model', equal=m)
)
if i == 0:
title_params = dict({'align': 'center', 'anchor': 'middle', 'dy': -10})
chart_imnet = chart_imnet.properties(title=alt.TitleParams('D1', **title_params))
chart_places = chart_places.properties(title=alt.TitleParams('D2', **title_params))
chart_places = alt.concat(chart_places,
title=alt.TitleParams(
m,
baseline='middle',
orient='right',
anchor='middle',
angle=90,
# dy=10,
dx=30 if i == 0 else 0,
),
)
if i == 0:
chart = (chart_imnet | chart_places).resolve_scale(x='shared')
else:
chart = (chart & (chart_imnet | chart_places).resolve_scale(x='shared'))
chart.save('test.html')
For now, I don't know a good answer, but once https://github.com/altair-viz/altair/pull/2528 is accepted you can use the xOffset encoding channel as such:
alt.Chart(df, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity("Dataset:N"),
xOffset=alt.XOffset("Dataset:N"),
column=alt.Column('Layer:N'),
row=alt.Row("Model:N")
).resolve_scale(x='independent')
Which will result in:
See Colab Notebook or Vega Editor
EDIT
To control the opacity and legend names one can do as such
alt.Chart(df, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity("Dataset:N",
scale=alt.Scale(domain=['D1', 'D2'],
range=[0.2, 1.0]),
legend=alt.Legend(labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'")),
xOffset=alt.XOffset("Dataset:N"),
column=alt.Column('Layer:N'),
row=alt.Row("Model:N")
).resolve_scale(x='independent')

NV12 to YUV444 speed up

I have a code that converts image from nv12 to yuv444
for h in range(self.img_shape[0]):
# centralize yuv 444 data for inference framework
for w in range(self.img_shape[1]):
yuv444_res[h][w][0] = (nv12_y_data[h * self.img_shape[1] +w]).astype(np.int8)
yuv444_res[h][w][1] = (nv12_u_data[int(h / 2) * int(self.img_shape[1] / 2) +int(w / 2)]).astype(np.int8)
yuv444_res[h][w][2] = (nv12_v_data[int(h / 2) * int(self.img_shape[1] / 2) +int(w / 2)]).astype(np.int8)
Since for loop is very slow in python, much slower than numpy. I was wondering if this conversion can be done in NumPy calculation.
Update on 06/15/2021:
I was able to get this piece of code with fancy indexing from this page External Link:
yuv444 = np.empty([self.height, self.width, 3], dtype=np.uint8)
yuv444[:, :, 0] = nv12_data[:self.width * self.height].reshape(
self.height, self.width)
u = nv12_data[self.width * self.height::2].reshape(
self.height // 2, self.width // 2)
yuv444[:, :, 1] = Image.fromarray(u).resize((self.width, self.height))
v = nv12_data[self.width * self.height + 1::2].reshape(
self.height // 2, self.width // 2)
yuv444[:, :, 2] = Image.fromarray(v).resize((self.width, self.height))
data[0] = yuv444.astype(np.int8)
If the PIL is used to replace the deprecated imresize, then the code match the old code 100%
Update on 06/19/2021:
After a closer look at the answer Rotem given, I realize that his way is quicker.
#nv12_data is reshaped to one dimension
y = nv12_data[:self.width * self.height].reshape(
self.height, self.width)
shrunk_u = nv12_data[self.width * self.height::2].reshape(
self.height // 2, self.width // 2)
shrunk_v = nv12_data[self.width * self.height + 1::2].reshape(
self.height // 2, self.width // 2)
u = cv2.resize(shrunk_u, (self.width, self.height),
interpolation=cv2.INTER_NEAREST)
v = cv2.resize(shrunk_v, (self.width, self.height),
interpolation=cv2.INTER_NEAREST)
yuv444 = np.dstack((y, u, v))
Also, I did a time comparison for processing 1000 pics. Turns out the cv reshape is quicker and guarantees the same result.
cv time: 4.417593002319336, pil time: 5.395732164382935
Update on 06/25/2021:
Pillow resize has different default resample param values in different versions.
5.1.0:
def resize(self, size, resample=NEAREST, box=None):
8.1.0:
def resize(self, size, resample=BICUBIC, box=None, reducing_gap=None):
It would be a good idea to specify the resample strategy used.
You may use the process described in my following post, in reverse order (without the RGB part).
Illustration:
Start by creating a synthetic sample image in NV12 format, using FFmpeg (command line tool).
The sample image is used for testing.
Executing from Python using subprocess module:
import subprocess as sp
import shlex
sp.run(shlex.split('ffmpeg -y -f lavfi -i testsrc=size=192x108:rate=1:duration=1 -vcodec rawvideo -pix_fmt nv12 nv12.yuv'))
sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x162 -pixel_format gray -i nv12.yuv -pix_fmt gray nv12_gray.png'))
Read the sample image, and executing the code from your post (used as reference):
import numpy as np
import cv2
nv12 = cv2.imread('nv12_gray.png', cv2.IMREAD_GRAYSCALE)
cols, rows = nv12.shape[1], nv12.shape[0]*2//3
# Reference implementation - using for-loops (the solution is in the part below):
################################################################################
nv12_y_data = nv12[0:rows, :].flatten()
nv12_u_data = nv12[rows:, 0::2].flatten()
nv12_v_data = nv12[rows:, 1::2].flatten()
yuv444_res = np.zeros((rows, cols, 3), np.uint8)
for h in range(rows):
# centralize yuv 444 data for inference framework
for w in range(cols):
yuv444_res[h][w][0] = (nv12_y_data[h * cols + w]).astype(np.int8)
yuv444_res[h][w][1] = (nv12_u_data[int(h / 2) * int(cols / 2) + int(w / 2)]).astype(np.int8)
yuv444_res[h][w][2] = (nv12_v_data[int(h / 2) * int(cols / 2) + int(w / 2)]).astype(np.int8)
################################################################################
My suggested solution applies the following stages:
Separate U and V into two "half size" matrices shrunk_u and shrunk_v.
Resize shrunk_u and shrunk_v to full image size matrices using cv2.resize.
In my code sample I used nearest neighbor interpolation for getting the same result as your result.
It is recommended to replace it with linear interpolation for better quality.
Use np.dstack for merging Y, U and V into YUV (3 color channels) image.
Here is the complete code sample:
import numpy as np
import subprocess as sp
import shlex
import cv2
sp.run(shlex.split('ffmpeg -y -f lavfi -i testsrc=size=192x108:rate=1:duration=1 -vcodec rawvideo -pix_fmt nv12 nv12.yuv'))
sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x162 -pixel_format gray -i nv12.yuv -pix_fmt gray nv12_gray.png'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x108 -pixel_format nv12 -i nv12.yuv -vcodec rawvideo -pix_fmt yuv444p yuv444.yuv'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x324 -pixel_format gray -i yuv444.yuv -pix_fmt gray yuv444_gray.png'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x108 -pixel_format yuv444p -i yuv444.yuv -pix_fmt rgb24 rgb.png'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x108 -pixel_format gbrp -i yuv444.yuv -filter_complex "extractplanes=g+b+r[g][b][r],[r][g][b]mergeplanes=0x001020:gbrp[v]" -map "[v]" -vcodec rawvideo -pix_fmt rgb24 yuvyuv.yuv'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video#_size 576x108 -pixel_format gray -i yuvyuv.yuv -pix_fmt gray yuvyuv_gray.png'))
nv12 = cv2.imread('nv12_gray.png', cv2.IMREAD_GRAYSCALE)
cols, rows = nv12.shape[1], nv12.shape[0]*2//3
nv12_y_data = nv12[0:rows, :].flatten()
nv12_u_data = nv12[rows:, 0::2].flatten()
nv12_v_data = nv12[rows:, 1::2].flatten()
yuv444_res = np.zeros((rows, cols, 3), np.uint8)
for h in range(rows):
# centralize yuv 444 data for inference framework
for w in range(cols):
yuv444_res[h][w][0] = (nv12_y_data[h * cols + w]).astype(np.int8)
yuv444_res[h][w][1] = (nv12_u_data[int(h / 2) * int(cols / 2) + int(w / 2)]).astype(np.int8)
yuv444_res[h][w][2] = (nv12_v_data[int(h / 2) * int(cols / 2) + int(w / 2)]).astype(np.int8)
y = nv12[0:rows, :]
shrunk_u = nv12[rows:, 0::2].copy()
shrunk_v = nv12[rows:, 1::2].copy()
u = cv2.resize(shrunk_u, (cols, rows), interpolation=cv2.INTER_NEAREST) # Resize U channel (use NEAREST interpolation - fastest, but lowest quality).
v = cv2.resize(shrunk_v, (cols, rows), interpolation=cv2.INTER_NEAREST) # Resize V channel
yuv444 = np.dstack((y, u, v))
is_eqaul = np.all(yuv444 == yuv444_res)
print('is_eqaul = ' + str(is_eqaul)) # is_eqaul = True
# Convert to RGB for display
yvu = np.dstack((y, v, u)) # Use COLOR_YCrCb2BGR, because it's uses the corrected conversion coefficients.
rgb = cv2.cvtColor(yvu, cv2.COLOR_YCrCb2BGR)
# Show results:
cv2.imshow('nv12', nv12)
cv2.imshow('yuv444_res', yuv444_res)
cv2.imshow('yuv444', yuv444)
cv2.imshow('rgb', rgb)
cv2.waitKey()
cv2.destroyAllWindows()
Input (NV12 displayed as Grayscale):
Output (after converting to RGB):
Seems to be a prime case for fancy indexing (advanced indexing).
Something like this should do the trick, though I didn't verify it on an actual image. I've added a section to reconstruct the image in the beginning, because it is easier to work with the array as a whole than broken into parts. Likely, you can refactor this and avoid splitting it to begin with.
# reconstruct image array
y = nv12_y_data.reshape(self.image_shape[0], self.image_shape[1])
u = nv12_u_data.reshape(self.image_shape[0], self.image_shape[1])
v = nv12_v_data.reshape(self.image_shape[0], self.image_shape[1])
img = np.stack((y,u,v), axis=-1)
# take every index twice until half the range
idx_h = np.repeat(np.arange(img.shape[0] // 2), 2)[:, None]
idx_w = np.repeat(np.arange(img.shape[1] // 2), 2)[None, :]
# convert
yuv444 = np.empty_like(img, dtype=np.uint8)
yuv444[..., 0] = img[..., 0]
yuv444[..., 1] = img[idx_h, idx_w, 1]
yuv444[..., 2] = img[idx_h, idx_w, 2]
If this is along your critical path, and you want to tease out a little more performance, you could consider processing the image channel first, which will be faster on modern CPUs (but not GPUs).
This answer is just another way to do it, and is not the quickest way to get the job done, but definitely should be easy to understand. I have checked the generated files with yuvplayer application as well to confirm it works.
#height mentioned is height of nv12 file and so is the case with width
def convert_nv12toyuv444(filename= 'input.nv12',height=2358,width=2040):
nv12_data = np.fromfile(filename, dtype=np.uint8)
imageSize = (height, width)
npimg = nv12_data.reshape(imageSize)
y_height = npimg.shape[0] * (2/3)
y_wid = npimg.shape[1]
y_height = int(y_height)
y_wid = int(y_wid)
y_data= npimg[:y_height,:y_wid]
uv_data=npimg[y_height:,:y_wid]
shrunkU= uv_data[:, 0 : :2]
shrunkV= uv_data[:, 1 : :2]
u = cv2.resize(shrunkU, (y_wid, y_height),
interpolation=cv2.INTER_NEAREST)
v = cv2.resize(shrunkV, (y_wid, y_height),
interpolation=cv2.INTER_NEAREST)
yuv444 = np.dstack((y_data, u, v))

What is wrong with my cython implementation of erosion operation of mathematical morphology

I have produced a naive implementation of "erosion". The performance is not relevant since I just trying to understand the algorithm. However, the output of my implementation does not match the one I get from scipy.ndimage. What is wrong with my implementation ?
Here is my implementation with a small test case:
import numpy as np
from PIL import Image
# a small image to play with a cross structuring element
imgmat = np.array([
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0],
])
imgmat2 = np.where(imgmat == 0, 0, 255).astype(np.uint8)
imarr = Image.fromarray(imgmat2).resize((100, 200))
imarr = np.array(imgrrr)
imarr = np.where(imarr == 0, 0, 1)
se_mat3 = np.array([
[0,1,0],
[1,1,1],
[0,1,0]
])
se_mat31 = np.where(se_mat3 == 1, 0, 1)
The imarr is .
My implementation of erosion:
%%cython -a
import numpy as np
cimport numpy as cnp
cdef erosionC(cnp.ndarray[cnp.int_t, ndim=2] img,
cnp.ndarray[cnp.int_t, ndim=2] B, cnp.ndarray[cnp.int_t, ndim=2] X):
"""
X: image coordinates
struct_element_mat: black and white image, black region is considered as the shape
of structuring element
This operation checks whether (B *includes* X) = $B \subset X$
as per defined in
Serra (Jean), « Introduction to mathematical morphology »,
Computer Vision, Graphics, and Image Processing,
vol. 35, nᵒ 3 (septembre 1986).
URL : https://linkinghub.elsevier.com/retrieve/pii/0734189X86900022..
doi: 10.1016/0734-189X(86)90002-2
Consulted le 6 août 2020, p. 283‑305.
"""
cdef cnp.ndarray[cnp.int_t, ndim=1] a, x, bx
cdef cnp.ndarray[cnp.int_t, ndim=2] Bx, B_frame, Xcp, b
cdef bint check
a = B[0] # get an anchor point from the structuring element coordinates
B_frame = B - a # express the se element coordinates in with respect to anchor point
Xcp = X.copy()
b = img.copy()
for x in X: # X contains the foreground coordinates in the image
Bx = B_frame + x # translate relative coordinates with respect to foreground coordinates considering it as the anchor point
check = True # this is erosion so if any of the se coordinates is not in foreground coordinates we consider it a miss
for bx in Bx: # Bx contains all the translated coordinates of se
if bx not in Xcp:
check = False
if check:
b[x[0], x[1]] = 1 # if there is a hit
else:
b[x[0], x[1]] = 0 # if there is no hit
return b
def erosion(img: np.ndarray, struct_el_mat: np.ndarray, foregroundValue = 0):
B = np.argwhere(struct_el_mat == 0)
X = np.argwhere(img == foregroundValue)
nimg = erosionC(img, B, X)
return np.where(nimg == 1, 255, 0)
The calling code for both is:
from scipy import ndimage as nd
err = nd.binary_erosion(imarr, se_mat3)
imerrCustom = erosion(imarr, se_mat31, foregroundValue=1)
err produces
imerrCustom produces
In the end, I am still not sure about it, but after having read several papers more, I assume that my interpretation of X as foreground coordinates was an error. It should have probably been the entire image that is being iterated.
As I have stated I am not sure if this interpretation is correct as well. But I made a new implementation which iterates over the image, and it gives a more plausible result. I am sharing it in here, hoping that it might help someone:
%%cython -a
import numpy as np
cimport numpy as cnp
cdef dilation_c(cnp.ndarray[cnp.uint8_t, ndim=2] X,
cnp.ndarray[cnp.uint8_t, ndim=2] SE):
"""
X: boolean image
SE: structuring element matrix
origin: coordinate of the origin of the structuring element
This operation checks whether (B *hits* X) = $B \cap X \not = \emptyset$
as per defined in
Serra (Jean), « Introduction to mathematical morphology »,
Computer Vision, Graphics, and Image Processing,
vol. 35, nᵒ 3 (septembre 1986).
URL : https://linkinghub.elsevier.com/retrieve/pii/0734189X86900022..
doi: 10.1016/0734-189X(86)90002-2
Consulted le 6 août 2020, p. 283‑305.
The algorithm adapts DILDIRECT of
Najman (Laurent) et Talbot (Hugues),
Mathematical morphology: from theory to applications,
2013. ISBN : 9781118600788, p. 329
to the formula given in
Jähne (Bernd),
Digital image processing,
6th rev. and ext. ed, Berlin ; New York,
2005. TA1637 .J34 2005.
ISBN : 978-3-540-24035-8.
"""
cdef cnp.ndarray[cnp.uint8_t, ndim=2] O
cdef list elst
cdef int r, c, X_rows, X_cols, SE_rows, SE_cols, se_r, se_c
cdef cnp.ndarray[cnp.int_t, ndim=1] bp
cdef list conds
cdef bint check, b, p, cond
O = np.zeros_like(X)
X_rows, X_cols = X.shape[:2]
SE_rows, SE_cols = SE.shape[:2]
# a boolean convolution
for r in range(0, X_rows-SE_rows):
for c in range(0, X_cols - SE_cols):
conds = []
for se_r in range(SE_rows):
for se_c in range(SE_cols):
b = <bint>SE[se_r, se_c]
p = <bint>X[se_r+r, se_c+c]
conds.append(b and p)
O[r,c] = <cnp.uint8_t>any(conds)
return O
def dilation_erosion(
img: np.ndarray,
struct_el_mat: np.ndarray,
foregroundValue: int = 1,
isErosion: bool = False):
"""
img: image matrix
struct_el: NxN mesh grid of the structuring element whose center is SE's origin
structuring element is encoded as 1
foregroundValue: value to be considered as foreground in the image
"""
B = struct_el_mat.astype(np.uint8)
if isErosion:
X = np.where(img == foregroundValue, 0, 1).astype(np.uint8)
else:
X = np.where(img == foregroundValue, 1, 0).astype(np.uint8)
nimg = dilation_c(X, B)
foreground, background = (255, 0) if foregroundValue == 1 else (0, 1)
if isErosion:
return np.where(nimg == 1, background, foreground).astype(np.uint8)
else:
return np.where(nimg == 1, foreground, background).astype(np.uint8)
# return nimg