How to play a tensorflow `decode_wav()` waveform "without" IPython notebook? - tensorflow

In the tensorflow simple_audio example, how to play the waveform without IPython notebook?
Separate loading and playing works:
import pydub,simpleaudio
sndfile = '/home/roland/.keras/datasets/mini_speech_commands/left/b19f7f5f_nohash_0.wav'
#sndfile = filenames[0].numpy().decode()
sound = pydub.AudioSegment.from_wav(sndfile)
#c,w,r = 1,2,16000
c,w,r = sound.channels, sound.sample_width, sound.frame_rate
playback = simpleaudio.play_buffer(sound.raw_data,c,w,r)
But the waveform from tensorflow?

By trial and error I found this to work:
import tensorflow as tf
from IPython import display
audio_binary = tf.io.read_file(sndfile)
audio, _ = tf.audio.decode_wav(audio_binary)
waveform = tf.squeeze(audio, axis=-1)
da = display.Audio(waveform,rate=r) #16000)
simpleaudio.play_buffer(da.data,c,w,r) # YES
#simpleaudio.play_buffer(audio,c,w,r) # NO
#simpleaudio.play_buffer(waveform,c,w,r) # NO
#type(da.data) # bytes
#audio.numpy().dtype # float32
#waveform.numpy().dtype # float32
Then I found this conversion
from the simpleaudio tutorial
to work, too.
#audionp = audio.numpy()
audionp = waveform.numpy()
audionp *= 32767 / np.max(np.abs(audionp))
audionp = audionp.astype(np.int16)
play_obj = simpleaudio.play_buffer(audionp, c,w,r) #1, 2, 16000)

Related

Using trained webcam on trained roboflow model

I'm trying to run a trained roboflow model using my webcam on visual code studio. The webcam does load up alongside the popup, but it's just a tiny rectangle in the corner and you can't see anything else. If i change "image", image to "image",1 or something else in the cv2.imshow line, the webcam lights up for a second and returns the error code:
cv2.error: OpenCV(4.5.4) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'
Here is my code as obtained from a github roboflow has:
# load config
import json
with open('roboflow_config.json') as f:
config = json.load(f)
ROBOFLOW_API_KEY = "********"
ROBOFLOW_MODEL = "penguins-ojf2k"
ROBOFLOW_SIZE = "416"
FRAMERATE = config["FRAMERATE"]
BUFFER = config["BUFFER"]
import asyncio
import cv2
import base64
import numpy as np
import httpx
import time
# Construct the Roboflow Infer URL
# (if running locally replace https://detect.roboflow.com/ with eg http://127.0.0.1:9001/)
upload_url = "".join([
"https://detect.roboflow.com/",
ROBOFLOW_MODEL,
"?api_key=",
ROBOFLOW_API_KEY,
"&format=image", # Change to json if you want the prediction boxes, not the visualization
"&stroke=5"
])
# Get webcam interface via opencv-python
video = cv2.VideoCapture(0,cv2.CAP_DSHOW)
# Infer via the Roboflow Infer API and return the result
# Takes an httpx.AsyncClient as a parameter
async def infer(requests):
# Get the current image from the webcam
ret, img = video.read()
# Resize (while maintaining the aspect ratio) to improve speed and save bandwidth
height, width, channels = img.shape
scale = min(height, width)
img = cv2.resize(img, (2000, 1500))
# Encode image to base64 string
retval, buffer = cv2.imencode('.jpg', img)
img_str = base64.b64encode(buffer)
# Get prediction from Roboflow Infer API
resp = await requests.post(upload_url, data=img_str, headers={
"Content-Type": "application/x-www-form-urlencoded"
})
# Parse result image
image = np.asarray(bytearray(resp.content), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
return image
# Main loop; infers at FRAMERATE frames per second until you press "q"
async def main():
# Initialize
last_frame = time.time()
# Initialize a buffer of images
futures = []
async with httpx.AsyncClient() as requests:
while True:
# On "q" keypress, exit
if(cv2.waitKey(1) == ord('q')):
break
# Throttle to FRAMERATE fps and print actual frames per second achieved
elapsed = time.time() - last_frame
await asyncio.sleep(max(0, 1/FRAMERATE - elapsed))
print((1/(time.time()-last_frame)), " fps")
last_frame = time.time()
# Enqueue the inference request and safe it to our buffer
task = asyncio.create_task(infer(requests))
futures.append(task)
# Wait until our buffer is big enough before we start displaying results
if len(futures) < BUFFER * FRAMERATE:
continue
# Remove the first image from our buffer
# wait for it to finish loading (if necessary)
image = await futures.pop(0)
# And display the inference results
img = cv2.imread('img.jpg')
cv2.imshow('image', image)
# Run our main loop
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())
# Release resources when finished
video.release()
cv2.destroyAllWindows()
It looks like you're missing your model's version number so the API is probably returning a 404 error which OpenCV is trying to read as an image.
I found your project on Roboflow Universe based on the ROBOFLOW_MODEL in your code; it looks like you're looking for version 3.
So try changing the line
ROBOFLOW_MODEL = "penguins-ojf2k"
to
ROBOFLOW_MODEL = "penguins-ojf2k/3"
It also looks like your model was trained at 640x640 (not 416x416) so you should change ROBOFLOW_SIZE to 640 as well for best results.

How to extract skeleton only without video in mediapipe?

import cv2
import mediapipe as mp
import numpy as np
import sys
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose
mp_drawing_styles = mp.solutions.drawing_styles
#min_Tracking_confidence = 1 for higher accuracy
pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=1)
#Import Video and Set codec
cap = cv2.VideoCapture(sys.argv[1])
# print("cap :", cap.shape)
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
if cap.isOpened() == False:
print("Error opening video stream or file")
raise TypeError
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
outdir, inputflnm = sys.argv[1][:sys.argv[1].rfind(
'/')+1], sys.argv[1][sys.argv[1].rfind('/')+1:]
inflnm, inflext = inputflnm.split('.')
out_filename = f'{outdir}{inflnm}_annotated.{inflext}'
# out = cv2.VideoWriter(out_filename, cv2.VideoWriter_fourcc(
# 'M', 'J', 'P', 'G'), 10, (frame_width, frame_height))
out = cv2.VideoWriter(out_filename, fourcc, 30, (frame_width, frame_height))
while cap.isOpened():
ret, image = cap.read()
if not ret:
break
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = pose.process(image) #core
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Render detections
mp_drawing.draw_landmarks(
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
out.write(image)
mp_drawing.plot_landmarks(
results.pose_world_landmarks, mp_pose.POSE_CONNECTIONS)
pose.close()
cap.release()
out.release()
Hello,
I would like to extract skeleton without skeleton+video.
I changed the code to using input video instead of original image in Mediapipe code.
and the result was success.
the result was the video with skeleton
plus I want to see only skeleton without video.
I tired to remove the video but i could not.
I appreciate to you if you give me any help!

OOM on GPU with tensorflow while making Mandelbrot

Trying to get a beautiful Mandelbrot picture, the code works great with 16k resolution but I can't get it to render a 32k image
I tried lowering cycles to 50 but still no difference
Specs - i9 10900k & RTX 3090 24gb
I get an OOM message saying
W tensorflow/core/common_runtime/bfc_allocator.cc:456] Allocator (GPU_0_bfc) ran out of memory
trying to allocate 7.91GiB (rounded to 8493465600)requested by op Mul
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation.
here is my code
import tensorflow as tf
import numpy as np
import PIL.Image
from io import BytesIO
from IPython.display import Image, display
def render(a):
a_cyclic = (a*0.3).reshape(list(a.shape)+[1])
img = np.concatenate([10+20*np.cos(a_cyclic),
30+50*np.sin(a_cyclic),
155-80*np.cos(a_cyclic)], 2)
img[a==a.max()] = 0
a = img
a = np.uint8(np.clip(a, 0, 255))
f = BytesIO()
return PIL.Image.fromarray(a)
##tf.function
def mandelbrot_helper(grid_c, current_values, counts,cycles):
for i in range(cycles):
temp = current_values*current_values + grid_c
not_diverged = tf.abs(temp) < 4
current_values.assign(temp),
counts.assign_add(tf.cast(not_diverged, tf.float64))
def mandelbrot(render_size,center,zoom,cycles):
f = zoom/render_size[0]
real_start = center[0]-(render_size[0]/2)*f
real_end = real_start + render_size[0]*f
imag_start = center[1]-(render_size[1]/2)*f
imag_end = imag_start + render_size[1]*f
real_range = tf.range(real_start,real_end,f,dtype=tf.float64)
imag_range = tf.range(imag_start,imag_end,f,dtype=tf.float64)
real, imag = tf.meshgrid(real_range,imag_range)
grid_c = tf.constant(tf.complex(real, imag))
current_values = tf.Variable(grid_c)
counts = tf.Variable(tf.zeros_like(grid_c, tf.float64))
mandelbrot_helper(grid_c, current_values,counts,cycles)
return counts.numpy()
counts = mandelbrot(
render_size=(30720,17280), # 32K
#render_size=(15360,8640), # 16K
#render_size=(7680,4320), # 8K
#render_size=(3840,2160), # 4K
#render_size=(1920,1080), # HD
center=(-0.5,0),
zoom=4,
cycles=200
)
img = render(counts)
print(img.size)
img
#img.save("E:/Python/Python3/TestingSO/Images/test.png")

When reconnect to Colaboratory, my computer crashed

I tried to run my deep learning code on Colaboratory, but every time I run to 26% this progress, it automatically disconnects, and the computer freezes when I reconnect to the server. My training loop is shown below:
from tqdm import tqdm_notebook
import matplotlib
from PIL import Image
dev_gen_costs = []
config = tf.ConfigProto(device_count={'gpu':0})
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config = config)
sess.run(tf.global_variables_initializer())
for iteration in tqdm_notebook(range(ITERS), desc='generate and discriminate'):
time.sleep(0.01)
start_time = time.time()
num = iteration%(train_vis.shape[0]//BATCH_SIZE)
data_IR = train_ir[num*BATCH_SIZE:(num+1)*BATCH_SIZE,:,:]
data_VI = train_vis[num*BATCH_SIZE:(num+1)*BATCH_SIZE,:,:]
data_IR = np.expand_dims(data_IR,axis = -1)
data_VI = np.expand_dims(data_VI,axis = -1)
feed_dict={real_data1_int:data_IR, real_data2_int:data_VI}
# Train generator
_ = sess.run([gen_train_op],feed_dict)
# Train critic
for i in range(CRITIC_ITERS):
_costs, _ = sess.run([[disc_cost ,gen_cost],disc_train_op] ,feed_dict)
dev_gen_costs.append(_costs)#loss
The crash is due to some errors in the code you are running.

Is there a way to get tensorflow tf.Print output to appear in Jupyter Notebook output

I'm using the tf.Print op in a Jupyter notebook. It works as required, but will only print the output to the console, without printing in the notebook. Is there any way to get around this?
An example would be the following (in a notebook):
import tensorflow as tf
a = tf.constant(1.0)
a = tf.Print(a, [a], 'hi')
sess = tf.Session()
a.eval(session=sess)
That code will print 'hi[1]' in the console, but nothing in the notebook.
Update Feb 3, 2017
I've wrapped this into memory_util package. Example usage
# install memory util
import urllib.request
response = urllib.request.urlopen("https://raw.githubusercontent.com/yaroslavvb/memory_util/master/memory_util.py")
open("memory_util.py", "wb").write(response.read())
import memory_util
sess = tf.Session()
a = tf.random_uniform((1000,))
b = tf.random_uniform((1000,))
c = a + b
with memory_util.capture_stderr() as stderr:
sess.run(c.op)
print(stderr.getvalue())
** Old stuff**
You could reuse FD redirector from IPython core. (idea from Mark Sandler)
import os
import sys
STDOUT = 1
STDERR = 2
class FDRedirector(object):
""" Class to redirect output (stdout or stderr) at the OS level using
file descriptors.
"""
def __init__(self, fd=STDOUT):
""" fd is the file descriptor of the outpout you want to capture.
It can be STDOUT or STERR.
"""
self.fd = fd
self.started = False
self.piper = None
self.pipew = None
def start(self):
""" Setup the redirection.
"""
if not self.started:
self.oldhandle = os.dup(self.fd)
self.piper, self.pipew = os.pipe()
os.dup2(self.pipew, self.fd)
os.close(self.pipew)
self.started = True
def flush(self):
""" Flush the captured output, similar to the flush method of any
stream.
"""
if self.fd == STDOUT:
sys.stdout.flush()
elif self.fd == STDERR:
sys.stderr.flush()
def stop(self):
""" Unset the redirection and return the captured output.
"""
if self.started:
self.flush()
os.dup2(self.oldhandle, self.fd)
os.close(self.oldhandle)
f = os.fdopen(self.piper, 'r')
output = f.read()
f.close()
self.started = False
return output
else:
return ''
def getvalue(self):
""" Return the output captured since the last getvalue, or the
start of the redirection.
"""
output = self.stop()
self.start()
return output
import tensorflow as tf
x = tf.constant([1,2,3])
a=tf.Print(x, [x])
redirect=FDRedirector(STDERR)
sess = tf.InteractiveSession()
redirect.start();
a.eval();
print "Result"
print redirect.stop()
I ran into the same problem and got around it by using a function like this in my notebooks:
def tf_print(tensor, transform=None):
# Insert a custom python operation into the graph that does nothing but print a tensors value
def print_tensor(x):
# x is typically a numpy array here so you could do anything you want with it,
# but adding a transformation of some kind usually makes the output more digestible
print(x if transform is None else transform(x))
return x
log_op = tf.py_func(print_tensor, [tensor], [tensor.dtype])[0]
with tf.control_dependencies([log_op]):
res = tf.identity(tensor)
# Return the given tensor
return res
# Now define a tensor and use the tf_print function much like the tf.identity function
tensor = tf_print(tf.random_normal([100, 100]), transform=lambda x: [np.min(x), np.max(x)])
# This will print the transformed version of the tensors actual value
# (which was summarized to just the min and max for brevity)
sess = tf.InteractiveSession()
sess.run([tensor])
sess.close()
FYI, using a logger instead of calling "print" in my custom function worked wonders for me as the stdout is often buffered by jupyter and not shown before "Loss is Nan" kind of errors -- which was the whole point in using that function in the first place in my case.
You can check the terminal where you launched the jupyter notebook to see the message.
import tensorflow as tf
tf.InteractiveSession()
a = tf.constant(1)
b = tf.constant(2)
opt = a + b
opt = tf.Print(opt, [opt], message="1 + 2 = ")
opt.eval()
In the terminal, I can see:
2018-01-02 23:38:07.691808: I tensorflow/core/kernels/logging_ops.cc:79] 1 + 2 = [3]
A simple way, tried it in regular python, but not jupyter yet.
os.dup2(sys.stdout.fileno(), 1)
os.dup2(sys.stdout.fileno(), 2)
Explanation is here: In python, how to capture the stdout from a c++ shared library to a variable
The issue that I faced was that one can't run a session inside a Tensorflow Graph, like in the training or in the evaluation.
That's why the options to use sess.run(opt) or opt.eval() were not a solution for me.
The best thing was to use tf.Print() and redirect the logging to an external file.
I did this using a temporal file, which I transferred to a regular file like this:
STDERR=2
import os
import sys
import tempfile
class captured:
def __init__(self, fd=STDERR):
self.fd = fd
self.prevfd = None
def __enter__(self):
t = tempfile.NamedTemporaryFile()
self.prevfd = os.dup(self.fd)
os.dup2(t.fileno(), self.fd)
return t
def __exit__(self, exc_type, exc_value, traceback):
os.dup2(self.prevfd, self.fd)
with captured(fd=STDERR) as tmp:
...
classifier.evaluate(input_fn=input_fn, steps=100)
with open('log.txt', 'w') as f:
print(open(tmp.name).read(), file=f)
And then in my evaluation I do:
a = tf.constant(1)
a = tf.Print(a, [a], message="a: ")