RGB_D Video capturing using Kinect_v1 - kinect

I am trying to capture video using Kinext 1. I am looking for easiest way to do this operation. What can be easy solution? I looked through stackoverflow and found following code .
import freenect
import cv2
import numpy as np
def nothing(x):
pass
kernel = np.ones((5, 5), np.uint8)
def pretty_depth(depth):
np.clip(depth, 0, 2**10 - 1, depth)
depth >>= 2
depth = depth.astype(np.uint8)
return depth
while 1:
dst = pretty_depth(freenect.sync_get_depth()[0])#input from kinect
cv2.imshow('Video', dst)
if cv2.waitKey(1) & 0xFF == ord('b'):
break
How i can i modify above code so that i will have RGB-D video or RGB and depth frames with time stamps .

Solved using the following function:
freenect.runloop(depth=display_depth,
video=display_rgb,
body=body)
I saved images using imwrite method with argument of display_depth function and display_rgb function. In body i just killed the freenect .

Related

Using trained webcam on trained roboflow model

I'm trying to run a trained roboflow model using my webcam on visual code studio. The webcam does load up alongside the popup, but it's just a tiny rectangle in the corner and you can't see anything else. If i change "image", image to "image",1 or something else in the cv2.imshow line, the webcam lights up for a second and returns the error code:
cv2.error: OpenCV(4.5.4) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'
Here is my code as obtained from a github roboflow has:
# load config
import json
with open('roboflow_config.json') as f:
config = json.load(f)
ROBOFLOW_API_KEY = "********"
ROBOFLOW_MODEL = "penguins-ojf2k"
ROBOFLOW_SIZE = "416"
FRAMERATE = config["FRAMERATE"]
BUFFER = config["BUFFER"]
import asyncio
import cv2
import base64
import numpy as np
import httpx
import time
# Construct the Roboflow Infer URL
# (if running locally replace https://detect.roboflow.com/ with eg http://127.0.0.1:9001/)
upload_url = "".join([
"https://detect.roboflow.com/",
ROBOFLOW_MODEL,
"?api_key=",
ROBOFLOW_API_KEY,
"&format=image", # Change to json if you want the prediction boxes, not the visualization
"&stroke=5"
])
# Get webcam interface via opencv-python
video = cv2.VideoCapture(0,cv2.CAP_DSHOW)
# Infer via the Roboflow Infer API and return the result
# Takes an httpx.AsyncClient as a parameter
async def infer(requests):
# Get the current image from the webcam
ret, img = video.read()
# Resize (while maintaining the aspect ratio) to improve speed and save bandwidth
height, width, channels = img.shape
scale = min(height, width)
img = cv2.resize(img, (2000, 1500))
# Encode image to base64 string
retval, buffer = cv2.imencode('.jpg', img)
img_str = base64.b64encode(buffer)
# Get prediction from Roboflow Infer API
resp = await requests.post(upload_url, data=img_str, headers={
"Content-Type": "application/x-www-form-urlencoded"
})
# Parse result image
image = np.asarray(bytearray(resp.content), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
return image
# Main loop; infers at FRAMERATE frames per second until you press "q"
async def main():
# Initialize
last_frame = time.time()
# Initialize a buffer of images
futures = []
async with httpx.AsyncClient() as requests:
while True:
# On "q" keypress, exit
if(cv2.waitKey(1) == ord('q')):
break
# Throttle to FRAMERATE fps and print actual frames per second achieved
elapsed = time.time() - last_frame
await asyncio.sleep(max(0, 1/FRAMERATE - elapsed))
print((1/(time.time()-last_frame)), " fps")
last_frame = time.time()
# Enqueue the inference request and safe it to our buffer
task = asyncio.create_task(infer(requests))
futures.append(task)
# Wait until our buffer is big enough before we start displaying results
if len(futures) < BUFFER * FRAMERATE:
continue
# Remove the first image from our buffer
# wait for it to finish loading (if necessary)
image = await futures.pop(0)
# And display the inference results
img = cv2.imread('img.jpg')
cv2.imshow('image', image)
# Run our main loop
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())
# Release resources when finished
video.release()
cv2.destroyAllWindows()
It looks like you're missing your model's version number so the API is probably returning a 404 error which OpenCV is trying to read as an image.
I found your project on Roboflow Universe based on the ROBOFLOW_MODEL in your code; it looks like you're looking for version 3.
So try changing the line
ROBOFLOW_MODEL = "penguins-ojf2k"
to
ROBOFLOW_MODEL = "penguins-ojf2k/3"
It also looks like your model was trained at 640x640 (not 416x416) so you should change ROBOFLOW_SIZE to 640 as well for best results.

OpenCv_Python - Convert Frame Sequence To a Video

I am a newbie in OpenCV using Python. I am currently working with a project related opencv using python language. I have a video data set named "VideoDataSet/dynamicBackground/canoe/input" that stores the sequence of image frames and I would like to convert the sequence of frames from the file path to a video. However, I am getting an error when I execute the program. I have tried various codecs but it still gives me the same errors, can any of you please shed some light on what might be wrong? Thank you.
This is my sample code:
import cv2
import numpy as np
import os
import glob as gb
filename = "VideoDataSet/dynamicBackground/canoe/input"
img_path = gb.glob(filename)
videoWriter = cv2.VideoWriter('test.avi', cv2.VideoWriter_fourcc(*'MJPG'),
25, (640,480))
for path in img_path:
img = cv2.imread(path)
img = cv2.resize(img,(640,480))
videoWriter.write(img)
print ("you are success create.")
This is the error:
Error prompt out:cv2.error: OpenCV(3.4.1) D:\Build\OpenCV\opencv-3.4.1\modules\imgproc\src\resize.cpp:4044: error: (-215) ssize.width > 0 && ssize.height > 0 in function cv::resize
(Note: the problem occur with the img = cv2.resize(img,(640,480)))
It is returning this error because you are trying to re-size the directory entry! You need to put:
filename = "VideoDataSet/dynamicBackground/canoe/input/*"
So that it will match all the files in the folder when you glob it. The error actually suggested that the source image had either zero width or zero height. Putting:
print( img_path )
In after your glob attempt showed that it was only returning the directory entry itself.
You subsequently discovered that although it was now generating a file, it was corrupted. This is because you are incorrectly specifying the codec. Replace your fourcc parameter with this:
cv2.VideoWriter_fourcc('M','J','P','G')
you can try this:
img_path = gb.glob(filename)
videoWriter = cv2.VideoWriter('frame2video.avi', cv2.VideoWriter_fourcc(*'MJPG'), 25, (640,480))
for path in img_path:
img = cv2.imread(path)
img = cv2.resize(img,(640,480))
videoWriter.write(img)

how band read array just return me 0, how can i do?

I am new in GDAL. I use landsat image in classification but I apply these code line band data just give me 0, what I don't understand:
raster_dataset = gdal.Open(‘LC81850552017065LGN00_B3.tif’, gdal.GA_ReadOnly)
geo_transform = raster_dataset.GetGeoTransform()
proj = raster_dataset.GetProjectionRef()
bands_data = []
for b in range(1, raster_dataset.RasterCount+1):
band = raster_dataset.GetRasterBand(b)
bands_data.append(band.ReadAsArray())
bands_data = np.dstack(bands_data)
You are reassigning bands_data from a list to a numpy array, which is causing your trouble. Perform the np.dstack operation after all bands have been read in (after the loop has been completed).
See the numpy docs to better understand how dstack works.

Can we visualize the embedding with multiple sprite images in tensorflow?

What I mean is, can I, for example, construct 2 different sprite images and be able to choose one of them while viewing embeddings in 2D/3D space using TSNE/PCA?
In other words, when using the following code:
embedding.sprite.image_path = "Path/to/the/sprite_image.jpg"
Is there a way to add another sprite image?
So, when training a Conv Net to distinguish between MNIST digits, I not only need to view the 1,2,..9, and 0 in the 3D/2D space, instead, I would like to see where are the ones gathering in that space. Same for 2s, 3s and so on. so I need a unique color for the 1s, another one for the 2s and so on... I need to view this as in the following image:
source
Any help is much appreciated!
There is an easier way to do this with filtering. You can just select the labels with a regex syntax:
If this is not what you are looking for, you could create a sprite image that assigns the same plain color image to each of your labels!
This functionality should come out of the box (without additional sprite images). See 'colour by' in the left sidepanel. You can toggle the A to switch sprite images on and off.
This run was produced with the example on the front page of the tensorboardX projector GitHub repo. https://github.com/lanpa/tensorboardX
You can also see a live demo with MNIST dataset (images and colours) at http://projector.tensorflow.org/
import torchvision.utils as vutils
import numpy as np
import torchvision.models as models
from torchvision import datasets
from tensorboardX import SummaryWriter
resnet18 = models.resnet18(False)
writer = SummaryWriter()
sample_rate = 44100
freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440]
for n_iter in range(100):
dummy_s1 = torch.rand(1)
dummy_s2 = torch.rand(1)
# data grouping by `slash`
writer.add_scalar('data/scalar1', dummy_s1[0], n_iter)
writer.add_scalar('data/scalar2', dummy_s2[0], n_iter)
writer.add_scalars('data/scalar_group', {'xsinx': n_iter * np.sin(n_iter),
'xcosx': n_iter * np.cos(n_iter),
'arctanx': np.arctan(n_iter)}, n_iter)
dummy_img = torch.rand(32, 3, 64, 64) # output from network
if n_iter % 10 == 0:
x = vutils.make_grid(dummy_img, normalize=True, scale_each=True)
writer.add_image('Image', x, n_iter)
dummy_audio = torch.zeros(sample_rate * 2)
for i in range(x.size(0)):
# amplitude of sound should in [-1, 1]
dummy_audio[i] = np.cos(freqs[n_iter // 10] * np.pi * float(i) / float(sample_rate))
writer.add_audio('myAudio', dummy_audio, n_iter, sample_rate=sample_rate)
writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)
for name, param in resnet18.named_parameters():
writer.add_histogram(name, param.clone().cpu().data.numpy(), n_iter)
# needs tensorboard 0.4RC or later
writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(100), n_iter)
dataset = datasets.MNIST('mnist', train=False, download=True)
images = dataset.test_data[:100].float()
label = dataset.test_labels[:100]
features = images.view(100, 784)
writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))
# export scalar data to JSON for external processing
writer.export_scalars_to_json("./all_scalars.json")
writer.close()
There are some threads mentioning that this currently fails beyond a threshold number of datapoints. https://github.com/lanpa/tensorboardX

Exporting a 3D numpy to a VTK file for viewing in Paraview/Mayavi

For those that want to export a simple 3D numpy array (along with axes) to a .vtk (or .vtr) file for post-processing and display in Paraview or Mayavi there's a little module called PyEVTK that does exactly that. The module supports structured and unstructured data etc..
Unfortunately, even though the code works fine in unix-based systems I couldn't make it work (keeps crashing) on any windows installation which simply makes things complicated. Ive contacted the developer but his suggestions did not work
Therefore my question is:
How can one use the from vtk.util import numpy_support function to export a 3D array (the function itself doesn't support 3D arrays) to a .vtk file? Is there a simple way to do it without creating vtkDatasets etc etc?
Thanks a lot!
It's been forever and I had entirely forgotten asking this question but I ended up figuring it out. I've written a post about it in my blog (PyScience) providing a tutorial on how to convert between NumPy and VTK. Do take a look if interested:
pyscience.wordpress.com/2014/09/06/numpy-to-vtk-converting-your-numpy-arrays-to-vtk-arrays-and-files/
It's not a direct answer to your question, but if you have tvtk (if you have mayavi, you should have it), you can use it to write your data to vtk format. (See: http://code.enthought.com/projects/files/ETS3_API/enthought.tvtk.misc.html )
It doesn't use PyEVTK, and it supports a broad range of data sources (more than just structured and unstructured grids), so it will probably work where other things aren't.
As a quick example (Mayavi's mlab interface can make this much less verbose, especially if you're already using it.):
import numpy as np
from enthought.tvtk.api import tvtk, write_data
data = np.random.random((10,10,10))
grid = tvtk.ImageData(spacing=(10, 5, -10), origin=(100, 350, 200),
dimensions=data.shape)
grid.point_data.scalars = np.ravel(order='F')
grid.point_data.scalars.name = 'Test Data'
# Writes legacy ".vtk" format if filename ends with "vtk", otherwise
# this will write data using the newer xml-based format.
write_data(grid, 'test.vtk')
And a portion of the output file:
# vtk DataFile Version 3.0
vtk output
ASCII
DATASET STRUCTURED_POINTS
DIMENSIONS 10 10 10
SPACING 10 5 -10
ORIGIN 100 350 200
POINT_DATA 1000
SCALARS Test%20Data double
LOOKUP_TABLE default
0.598189 0.228948 0.346975 0.948916 0.0109774 0.30281 0.643976 0.17398 0.374673
0.295613 0.664072 0.307974 0.802966 0.836823 0.827732 0.895217 0.104437 0.292796
0.604939 0.96141 0.0837524 0.498616 0.608173 0.446545 0.364019 0.222914 0.514992
...
...
TVTK of Mayavi has a beautiful way of writing vtk files. Here is a test example I have written for myself following #Joe and tvtk documentation. The advantage it has over evtk, is the support for both ascii and html.Hope it will help other people.
from tvtk.api import tvtk, write_data
import numpy as np
#data = np.random.random((3, 3, 3))
#
#i = tvtk.ImageData(spacing=(1, 1, 1), origin=(0, 0, 0))
#i.point_data.scalars = data.ravel()
#i.point_data.scalars.name = 'scalars'
#i.dimensions = data.shape
#
#w = tvtk.XMLImageDataWriter(input=i, file_name='spoints3d.vti')
#w.write()
points = np.array([[0,0,0], [1,0,0], [1,1,0], [0,1,0]], 'f')
(n1, n2) = points.shape
poly_edge = np.array([[0,1,2,3]])
print n1, n2
## Scalar Data
#temperature = np.array([10., 20., 30., 40.])
#pressure = np.random.rand(n1)
#
## Vector Data
#velocity = np.random.rand(n1,n2)
#force = np.random.rand(n1,n2)
#
##Tensor Data with
comp = 5
stress = np.random.rand(n1,comp)
#
#print stress.shape
## The TVTK dataset.
mesh = tvtk.PolyData(points=points, polys=poly_edge)
#
## Data 0 # scalar data
#mesh.point_data.scalars = temperature
#mesh.point_data.scalars.name = 'Temperature'
#
## Data 1 # additional scalar data
#mesh.point_data.add_array(pressure)
#mesh.point_data.get_array(1).name = 'Pressure'
#mesh.update()
#
## Data 2 # Vector data
#mesh.point_data.vectors = velocity
#mesh.point_data.vectors.name = 'Velocity'
#mesh.update()
#
## Data 3 additional vector data
#mesh.point_data.add_array( force)
#mesh.point_data.get_array(3).name = 'Force'
#mesh.update()
mesh.point_data.tensors = stress
mesh.point_data.tensors.name = 'Stress'
# Data 4 additional tensor Data
#mesh.point_data.add_array(stress)
#mesh.point_data.get_array(4).name = 'Stress'
#mesh.update()
write_data(mesh, 'polydata.vtk')
# XML format
# Method 1
#write_data(mesh, 'polydata')
# Method 2
#w = tvtk.XMLPolyDataWriter(input=mesh, file_name='polydata.vtk')
#w.write()
I know it is a bit late and I do love your tutorials #somada141. This should work too.
def numpy2VTK(img, spacing=[1.0, 1.0, 1.0]):
# evolved from code from Stou S.,
# on http://www.siafoo.net/snippet/314
# This function, as the name suggests, converts numpy array to VTK
importer = vtk.vtkImageImport()
img_data = img.astype('uint8')
img_string = img_data.tostring() # type short
dim = img.shape
importer.CopyImportVoidPointer(img_string, len(img_string))
importer.SetDataScalarType(VTK_UNSIGNED_CHAR)
importer.SetNumberOfScalarComponents(1)
extent = importer.GetDataExtent()
importer.SetDataExtent(extent[0], extent[0] + dim[2] - 1,
extent[2], extent[2] + dim[1] - 1,
extent[4], extent[4] + dim[0] - 1)
importer.SetWholeExtent(extent[0], extent[0] + dim[2] - 1,
extent[2], extent[2] + dim[1] - 1,
extent[4], extent[4] + dim[0] - 1)
importer.SetDataSpacing(spacing[0], spacing[1], spacing[2])
importer.SetDataOrigin(0, 0, 0)
return importer
Hope it helps!
Here's a SimpleITK version with the function load_itk taken from here:
import SimpleITK as sitk
import numpy as np
if len(sys.argv)<3:
print('Wrong number of arguments.', file=sys.stderr)
print('Usage: ' + __file__ + ' input_sitk_file' + ' output_sitk_file', file=sys.stderr)
sys.exit(1)
def quick_read(filename):
# Read image information without reading the bulk data.
file_reader = sitk.ImageFileReader()
file_reader.SetFileName(filename)
file_reader.ReadImageInformation()
print('image size: {0}\nimage spacing: {1}'.format(file_reader.GetSize(), file_reader.GetSpacing()))
# Some files have a rich meta-data dictionary (e.g. DICOM)
for key in file_reader.GetMetaDataKeys():
print(key + ': ' + file_reader.GetMetaData(key))
def load_itk(filename):
# Reads the image using SimpleITK
itkimage = sitk.ReadImage(filename)
# Convert the image to a numpy array first and then shuffle the dimensions to get axis in the order z,y,x
data = sitk.GetArrayFromImage(itkimage)
# Read the origin of the ct_scan, will be used to convert the coordinates from world to voxel and vice versa.
origin = np.array(list(reversed(itkimage.GetOrigin())))
# Read the spacing along each dimension
spacing = np.array(list(reversed(itkimage.GetSpacing())))
return data, origin, spacing
def convert(data, output_filename):
image = sitk.GetImageFromArray(data)
writer = sitk.ImageFileWriter()
writer.SetFileName(output_filename)
writer.Execute(image)
def wait():
print('Press Enter to load & convert or exit using Ctrl+C')
input()
quick_read(sys.argv[1])
print('-'*20)
wait()
data, origin, spacing = load_itk(sys.argv[1])
convert(sys.argv[2])