How to write NALs produced by x264_encoder_encode() using ffmpeg av_interleaved_write_frame() - api

I have been trying to produce a "flv" video file in the following sequence:
av_register_all();
// Open video file
if (avformat_open_input(&pFormatCtx, "6.mp4", NULL, NULL) != 0)
return -1; // Couldn't open file
// Retrieve stream information
if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
return -1; // Couldn't find stream information
// Dump information about file onto standard error
av_dump_format(pFormatCtx, 0, "input_file.mp4", 0);
// Find the first video stream
videoStream = -1;
for (i = 0; i < pFormatCtx->nb_streams; i++)
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
videoStream = i;
break;
}
if (videoStream == -1)
return -1; // Didn't find a video stream
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Find the decoder for the video stream
pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL) {
fprintf(stderr, "Unsupported codec!\n");
return -1; // Codec not found
}
// Open codec
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
return -1; // Could not open codec
// Allocate video frame
pFrame = avcodec_alloc_frame();
// Allocate video frame
pFrame = avcodec_alloc_frame();
// Allocate an AVFrame structure
pFrameYUV420 = avcodec_alloc_frame();
if (pFrameYUV420 == NULL)
return -1;
// Determine required buffer size and allocate buffer
numBytes = avpicture_get_size(pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height);
buffer = (uint8_t *) av_malloc(numBytes * sizeof(uint8_t));
// Assign appropriate parts of buffer to image planes in pFrameYUV420
// Note that pFrameYUV420 is an AVFrame, but AVFrame is a superset of AVPicture
avpicture_fill((AVPicture *) pFrameRGB, buffer, pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height);
// Setup scaler
img_convert_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt, SWS_BILINEAR, 0, 0, 0);
if (img_convert_ctx == NULL) {
fprintf(stderr, "Cannot initialize the conversion context!\n");
exit(1);
}
// Setup encoder/muxing now
filename = "output_file.flv";
fmt = av_guess_format("flv", filename, NULL);
if (fmt == NULL) {
printf("Could not guess format.\n");
return -1;
}
/* allocate the output media context */
oc = avformat_alloc_context();
if (oc == NULL) {
printf("could not allocate context.\n");
return -1;
}
oc->oformat = fmt;
snprintf(oc->filename, sizeof(oc->filename), "%s", filename);
video_st = NULL;
if (fmt->video_codec != AV_CODEC_ID_NONE) {
video_st = add_stream(oc, &video_codec, fmt->video_codec);
}
// Let's see some information about our format
av_dump_format(oc, 0, filename, 1);
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open '%s': %s\n", filename, av_err2str(ret));
return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(oc, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n", av_err2str(ret));
return 1;
}
// Setup x264 params
x264_param_t param;
x264_param_default_preset(&param, "veryfast", "zerolatency");
param.i_threads = 1;
param.i_width = video_st->codec->width;
param.i_height = video_st->codec->height;
param.i_fps_num = STREAM_FRAME_RATE; // 30 fps, same as video
param.i_fps_den = 1;
// Intra refres:
param.i_keyint_max = STREAM_FRAME_RATE;
param.b_intra_refresh = 1;
// Rate control:
param.rc.i_rc_method = X264_RC_CRF;
param.rc.f_rf_constant = 25;
param.rc.f_rf_constant_max = 35;
// For streaming:
param.b_repeat_headers = 1;
param.b_annexb = 1;
x264_param_apply_profile(&param, "baseline");
x264_t* encoder = x264_encoder_open(&param);
x264_picture_t pic_in, pic_out;
x264_picture_alloc(&pic_in, X264_CSP_I420, video_st->codec->width, video_st->codec->height);
x264_nal_t* nals;
int i_nals;
// The loop:
// 1. Read frames
// 2. Decode the frame
// 3. Attempt to re-encode using x264
// 4. Write the x264 encoded frame using av_interleaved_write_frame
while (av_read_frame(pFormatCtx, &packet) >= 0) {
// Is this a packet from the video stream?
if (packet.stream_index == videoStream) {
// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
// Did we get a video frame?
if (frameFinished) {
sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0, pCodecCtx->height, pic_in.img.plane, pic_in.img.i_stride);
int frame_size = x264_encoder_encode(encoder, &nals, &i_nals, &pic_in, &pic_out);
if (frame_size >= 0) {
if (i_nals < 0)
printf("invalid frame size: %d\n", i_nals);
// write out NALs
for (i = 0; i < i_nals; i++) {
// initalize a packet
AVPacket p;
av_init_packet(&p);
p.data = nals[i].p_payload;
p.size = nals[i].i_payload;
p.stream_index = video_st->index;
p.flags = AV_PKT_FLAG_KEY;
p.pts = AV_NOPTS_VALUE;
p.dts = AV_NOPTS_VALUE;
ret = av_interleaved_write_frame(oc, &p);
}
}
printf("encoded frame #%d\n", frame_count);
frame_count++;
}
}
// Free the packet that was allocated by av_read_frame
av_free_packet(&packet);
}
// Now we free up resources used/close codecs, and finally close our program.
Here is the implementation for the add_stream() function:
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id) {
AVCodecContext *c;
AVStream *st;
int r;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
st = avformat_new_stream(oc, *codec);
if (!st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
st->id = oc->nb_streams - 1;
c = st->codec;
switch ((*codec)->type) {
case AVMEDIA_TYPE_AUDIO:
st->id = 1;
c->sample_fmt = AV_SAMPLE_FMT_FLTP;
c->bit_rate = 64000;
c->sample_rate = 44100;
c->channels = 2;
break;
case AVMEDIA_TYPE_VIDEO:
avcodec_get_context_defaults3(c, *codec);
c->codec_id = codec_id;
c->bit_rate = 500*1000;
//c->rc_min_rate = 500*1000;
//c->rc_max_rate = 500*1000;
//c->rc_buffer_size = 500*1000;
/* Resolution must be a multiple of two. */
c->width = 1280;
c->height = 720;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
c->time_base.den = STREAM_FRAME_RATE;
c->time_base.num = 1;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
break;
default:
break;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= CODEC_FLAG_GLOBAL_HEADER;
return st;
}
After the encoding is complete, I check the output file output_file.flv. I notice it's size is very large: 101MB and it does not play. If I use ffmpeg to decode/encode the input file, then I get an output file about 83MB in size (which is about the same size as the original .mp4 file used as input). Also, the 83MB output from just using ffmpeg C api, as opposed to using x264 for the encoding step, plays just fine. Does anyone know where I am going wrong? I have tried researching this for a few days now but with no luck :(. I feel that I am close to making it work, however, I just cannot figure out what I am doing wrong. Thank you!

To produce the correct AVPacket, you should write all nals into the same packet, as it is done in http://ffmpeg.org/doxygen/trunk/libx264_8c_source.html (see encode_nals and X264_frame functions)

Related

Merge multi channel audio buffers into one CMSampleBuffer

I am using FFmpeg to access an RTSP stream in my macOS app.
REACHED GOALS: I have created a tone generator which creates single channel audio and returns a CMSampleBuffer. The tone generator is used to test my audio pipeline when the video's fps and audio sample rates are changed.
GOAL: The goal is to merge multi-channel audio buffers into a single CMSampleBuffer.
Audio data lifecyle:
AVCodecContext* audioContext = self.rtspStreamProvider.audioCodecContext;
if (!audioContext) { return; }
// Getting audio settings from FFmpegs audio context (AVCodecContext).
int samplesPerChannel = audioContext->frame_size;
int frameNumber = audioContext->frame_number;
int sampleRate = audioContext->sample_rate;
int fps = [self.rtspStreamProvider fps];
int calculatedSampleRate = sampleRate / fps;
// NSLog(#"\nSamples per channel = %i, frames = %i.\nSample rate = %i, fps = %i.\ncalculatedSampleRate = %i.", samplesPerChannel, frameNumber, sampleRate, fps, calculatedSampleRate);
// Decoding the audio data from a encoded AVPacket into a AVFrame.
AVFrame* audioFrame = [self.rtspStreamProvider readDecodedAudioFrame];
if (!audioFrame) { return; }
// Extracting my audio buffers from FFmpegs AVFrame.
uint8_t* leftChannelAudioBufRef = audioFrame->data[0];
uint8_t* rightChannelAudioBufRef = audioFrame->data[1];
// Creating the CMSampleBuffer with audio data.
CMSampleBufferRef leftSampleBuffer = [CMSampleBufferFactory createAudioSampleBufferUsingData:leftChannelAudioBufRef channelCount:1 framesCount:samplesPerChannel sampleRate:sampleRate];
// CMSampleBufferRef rightSampleBuffer = [CMSampleBufferFactory createAudioSampleBufferUsingData:packet->data[1] channelCount:1 framesCount:samplesPerChannel sampleRate:sampleRate];
if (!leftSampleBuffer) { return; }
if (!self.audioQueue) { return; }
if (!self.audioDelegates) { return; }
// All audio consumers will receive audio samples via delegation.
dispatch_sync(self.audioQueue, ^{
NSHashTable *audioDelegates = self.audioDelegates;
for (id<AudioDataProviderDelegate> audioDelegate in audioDelegates)
{
[audioDelegate provider:self didOutputAudioSampleBuffer:leftSampleBuffer];
// [audioDelegate provider:self didOutputAudioSampleBuffer:rightSampleBuffer];
}
});
CMSampleBuffer containing audio data creation:
import Foundation
import CoreMedia
#objc class CMSampleBufferFactory: NSObject
{
#objc static func createAudioSampleBufferUsing(data: UnsafeMutablePointer<UInt8> ,
channelCount: UInt32,
framesCount: CMItemCount,
sampleRate: Double) -> CMSampleBuffer? {
/* Prepare for sample Buffer creation */
var sampleBuffer: CMSampleBuffer! = nil
var osStatus: OSStatus = -1
var audioFormatDescription: CMFormatDescription! = nil
var absd: AudioStreamBasicDescription! = nil
let sampleDuration = CMTimeMake(value: 1, timescale: Int32(sampleRate))
let presentationTimeStamp = CMTimeMake(value: 0, timescale: Int32(sampleRate))
// NOTE: Change bytesPerFrame if you change the block buffer value types. Currently we are using double.
let bytesPerFrame: UInt32 = UInt32(MemoryLayout<Float32>.size) * channelCount
let memoryBlockByteLength = framesCount * Int(bytesPerFrame)
// var acl = AudioChannelLayout()
// acl.mChannelLayoutTag = kAudioChannelLayoutTag_Stereo
/* Sample Buffer Block buffer creation */
var blockBuffer: CMBlockBuffer?
osStatus = CMBlockBufferCreateWithMemoryBlock(
allocator: kCFAllocatorDefault,
memoryBlock: nil,
blockLength: memoryBlockByteLength,
blockAllocator: nil,
customBlockSource: nil,
offsetToData: 0,
dataLength: memoryBlockByteLength,
flags: 0,
blockBufferOut: &blockBuffer
)
assert(osStatus == kCMBlockBufferNoErr)
guard let eBlock = blockBuffer else { return nil }
osStatus = CMBlockBufferFillDataBytes(with: 0, blockBuffer: eBlock, offsetIntoDestination: 0, dataLength: memoryBlockByteLength)
assert(osStatus == kCMBlockBufferNoErr)
TVBlockBufferHelper.fillAudioBlockBuffer(blockBuffer,
audioData: data,
frames: Int32(framesCount))
/* Audio description creations */
absd = AudioStreamBasicDescription(
mSampleRate: sampleRate,
mFormatID: kAudioFormatLinearPCM,
mFormatFlags: kLinearPCMFormatFlagIsPacked | kLinearPCMFormatFlagIsFloat,
mBytesPerPacket: bytesPerFrame,
mFramesPerPacket: 1,
mBytesPerFrame: bytesPerFrame,
mChannelsPerFrame: channelCount,
mBitsPerChannel: 32,
mReserved: 0
)
guard absd != nil else {
print("\nCreating AudioStreamBasicDescription Failed.")
return nil
}
osStatus = CMAudioFormatDescriptionCreate(allocator: kCFAllocatorDefault,
asbd: &absd,
layoutSize: 0,
layout: nil,
// layoutSize: MemoryLayout<AudioChannelLayout>.size,
// layout: &acl,
magicCookieSize: 0,
magicCookie: nil,
extensions: nil,
formatDescriptionOut: &audioFormatDescription)
guard osStatus == noErr else {
print("\nCreating CMFormatDescription Failed.")
return nil
}
/* Create sample Buffer */
var timmingInfo = CMSampleTimingInfo(duration: sampleDuration, presentationTimeStamp: presentationTimeStamp, decodeTimeStamp: .invalid)
osStatus = CMSampleBufferCreate(allocator: kCFAllocatorDefault,
dataBuffer: eBlock,
dataReady: true,
makeDataReadyCallback: nil,
refcon: nil,
formatDescription: audioFormatDescription,
sampleCount: framesCount,
sampleTimingEntryCount: 1,
sampleTimingArray: &timmingInfo,
sampleSizeEntryCount: 0, // Must be 0, 1, or numSamples.
sampleSizeArray: nil, // Pointer ot Int. Don't know the size. Don't know if its bytes or bits?
sampleBufferOut: &sampleBuffer)
return sampleBuffer
}
}
CMSampleBuffer gets filled with raw audio data from FFmpeg's data:
#import Foundation;
#import CoreMedia;
#interface BlockBufferHelper : NSObject
+(void)fillAudioBlockBuffer:(CMBlockBufferRef)blockBuffer
audioData:(uint8_t *)data
frames:(int)framesCount;
#end
#import "TVBlockBufferHelper.h"
#implementation BlockBufferHelper
+(void)fillAudioBlockBuffer:(CMBlockBufferRef)blockBuffer
audioData:(uint8_t *)data
frames:(int)framesCount
{
// Possibly dev error.
if (framesCount == 0) {
NSAssert(false, #"\nfillAudioBlockBuffer/audioData/frames will not be able to fill an blockBuffer which has no frames.");
return;
}
char *rawBuffer = NULL;
size_t size = 0;
OSStatus status = CMBlockBufferGetDataPointer(blockBuffer, 0, &size, NULL, &rawBuffer);
if(status != noErr)
{
return;
}
memcpy(rawBuffer, data, framesCount);
}
#end
The LEARNING Core Audio book from Chris Adamson/Kevin Avila points me toward a multi channel mixer.
The multi channel mixer should have 2-n inputs and 1 output. I assume the output could be a buffer or something that could be put into a CMSampleBuffer for further consumption.
This direction should lead me to AudioUnits, AUGraph and the AudioToolbox. I don't understand all of these classes and how they work together. I have found some code snippets on SO which could help me but most of them use AudioToolBox classes and don't use CMSampleBuffers as much as I need.
Is there another way to merge audio buffers into a new one?
Is creating a multi channel mixer using AudioToolBox the right direction?

encode .wav file using ffmpeg in objective c or c

I have to encode .wav file and write it into same file,or other file using
ffmpeg library,here is my code for encoding
-(void)audioencode:(const char *)fileName
{
AVFrame *frame;
AVPacket pkt;
int i, j, k, ret, got_output;
int buffer_size;
FILE *f;
uint16_t *samples;
const char *format_name = "wav",
const char *file_url = "/Users/xxxx/Downloads/simple-drum-beat.wav";
avcodec_register_all();
av_register_all();
AVOutputFormat *format = NULL;
for (AVOutputFormat *formatIter = av_oformat_next(NULL); formatIter != NULL; formatIter = av_oformat_next(formatIter)
{
int hasEncoder = NULL != avcodec_find_encoder(formatIter->audio_codec);
if (0 == strcmp(format_name, formatIter->name)) {
format = formatIter;
break;
}
}
AVCodec *codec = avcodec_find_encoder(format->audio_codec);
NSLog(#"tet test tststs");
AVCodecContext *c;
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate audio codec context\n");
exit(1);
}
c->sample_fmt = AV_SAMPLE_FMT_S16;
if (!check_sample_fmt(codec, c->sample_fmt)) {
fprintf(stderr, "Encoder does not support sample format %s",
av_get_sample_fmt_name(c->sample_fmt));
exit(1);
}
c->bit_rate = 64000;//705600;
c->sample_rate = select_sample_rate(codec);
c->channel_layout = select_channel_layout(codec);
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
c->frame_size = av_get_audio_frame_duration(c, 16);
int bits_per_sample = av_get_bits_per_sample(c->codec_id);
int frameSize = av_get_audio_frame_duration(c,16);
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(fileName, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", fileName);
exit(1);
}
/* frame containing input raw audio */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
frame->nb_samples = frameSize/*c->frame_size*/;
frame->format = c->sample_fmt;
frame->channel_layout = c->channel_layout;
buffer_size = av_samples_get_buffer_size(NULL, c->channels,frameSize /*c->frame_size*/,
c->sample_fmt, 0);
samples = av_malloc(buffer_size);
if (!samples) {
fprintf(stderr, "Could not allocate %d bytes for samples buffer\n",
buffer_size);
exit(1);
}
/* setup the data pointers in the AVFrame */
ret = avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt,
(const uint8_t*)samples, buffer_size, 0);
if (ret < 0) {
fprintf(stderr, "Could not setup audio frame\n");
exit(1);
}
float t, tincr;
/* encode a single tone sound */
t = 0;
tincr = 2 * M_PI * 440.0 / c->sample_rate;
for(i=0;i<800;i++) {
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
for (j = 0; j < frameSize/*c->frame_size*/; j++) {
samples[2*j] = (int)(sin(t) * 10000);
for (k = 1; k < c->channels; k++)
samples[2*j + k] = samples[2*j];
t += tincr;
}
/* encode the samples */
ret = avcodec_encode_audio2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding audio frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
}
but after encoded file size is zero,
Please suggest what m doing wrong,any help will be appreciate, thanks in advance

Decode .wav file and write it into another file using ffmpeg

How to decode a .wav file and write it into another file using ffmpeg?
I got decoded data by this piece of code:
-(void)audioDecode:(const char *)outfilename inFileName:(const char *)filename
{
const char* input_filename=filename;
//avcodec_register_all();
av_register_all();
avcodec_register_all();
// av_register_all();
//av_ini
// AVFormatContext* container=avformat_alloc_context();
// if(avformat_open_input(&container,filename,NULL,NULL)<0){
// NSLog(#"Could not open file");
// }
AVCodec *codec;
AVCodecContext *c= NULL;
int len;
FILE *f, *outfile;
uint8_t inbuf[AUDIO_INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
AVPacket avpkt;
AVFrame *decoded_frame = NULL;
av_init_packet(&avpkt);
printf("Decode audio file %s to %s\n", filename, outfilename);
avcodec_register_all();
AVFrame* frame = av_frame_alloc();
if (!frame)
{
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
AVFormatContext* formatContext = NULL;
/* Opening the file, and check if it has opened */
if (avformat_open_input(&formatContext, filename, NULL, NULL) != 0)
{
av_frame_free(&frame);
NSLog(#"Could not open file");
}
if (avformat_find_stream_info(formatContext, NULL) < 0)
{
av_frame_free(&frame);
avformat_close_input(&formatContext);
NSLog(#"Error finding the stream info");
}
outfile = fopen(outfilename, "wb");
if (!outfile) {
av_free(c);
exit(1);
}
/* Find the audio Stream, if no audio stream are found, clean and exit */
AVCodec* cdc = NULL;
int streamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &cdc, 0);
if (streamIndex < 0)
{
av_frame_free(&frame);
avformat_close_input(&formatContext);
NSLog(#"Could not find any audio stream in the file");
exit(1);
}
/* Open the audio stream to read data in audioStream */
AVStream* audioStream = formatContext->streams[streamIndex];
/* Initialize the codec context */
AVCodecContext* codecContext = audioStream->codec;
codecContext->codec = cdc;
/* Open the codec, and verify if it has opened */
if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0)
{
av_frame_free(&frame);
avformat_close_input(&formatContext);
NSLog(#"Couldn't open the context with the decoder");
exit(1);
}
/* Initialize buffer to store compressed packets */
AVPacket readingPacket;
av_init_packet(&readingPacket);
int lenght = 1;
long long int chunk = ((formatContext->bit_rate)*lenght/8);
int j=0;
int count = 0;
//audioChunk output;
while(av_read_frame(formatContext, &readingPacket)==0){
//if((count+readingPacket.size)>start){
if(readingPacket.stream_index == audioStream->index){
AVPacket decodingPacket = readingPacket;
// Audio packets can have multiple audio frames in a single packet
while (decodingPacket.size > 0){
// Try to decode the packet into a frame
// Some frames rely on multiple packets, so we have to make sure the frame is finished before
// we can use it
int gotFrame = 0;
int result = avcodec_decode_audio4(codecContext, frame, &gotFrame, &decodingPacket);
count += result;
if (result >= 0 && gotFrame)
{
decodingPacket.size -= result;
decodingPacket.data += result;
int a;
for(int i=0;i<result-1;i++){
fwrite(frame->data[0], 1, decodingPacket.size, outfile);
// *(output.data+j)=frame->data[0][i];
//
j++;
if(j>=chunk) break;
}
// We now have a fully decoded audio frame
}
else
{
decodingPacket.size = 0;
decodingPacket.data = NULL;
}
// if(j>=chunk) break;
}
}
// }else count+=readingPacket.size;
//
// // To prevent memory leak, must free packet.
// av_free_packet(&readingPacket);
// if(j>=chunk) break;
}
fclose(outfile);
But the file is created with zero bytes. I don't know what's wrong with this code. And I got one more error: "Format adp detected only with low score of 25, misdetection possible!"

Record rtsp stream with ffmpeg in iOS

I've followed iFrameExtractor to successfully stream rtsp in my swift project. In this project, it also has recording function. It basically use avformat_write_header
, av_interleaved_write_frame and av_write_trailer to save the rtsp source into mp4 file.
When I used this project in my device, the rtsp streaming works fine, but recording function will always generate a blank mp4 file with no image and sound.
Could anyone tell me what step that I miss?
I'm using iPhone5 with iOS 9.1 and XCode 7.1.1.
The ffmpeg is 2.8.3 version and followed the compile instruction by CompilationGuide – FFmpeg
Following is the sample code in this project
The function that generate every frame:
-(BOOL)stepFrame {
// AVPacket packet;
int frameFinished=0;
static bool bFirstIFrame=false;
static int64_t vPTS=0, vDTS=0, vAudioPTS=0, vAudioDTS=0;
while(!frameFinished && av_read_frame(pFormatCtx, &packet)>=0) {
// Is this a packet from the video stream?
if(packet.stream_index==videoStream) {
// 20130525 albert.liao modified start
// Initialize a new format context for writing file
if(veVideoRecordState!=eH264RecIdle)
{
switch(veVideoRecordState)
{
case eH264RecInit:
{
if ( !pFormatCtx_Record )
{
int bFlag = 0;
//NSString *videoPath = [NSHomeDirectory() stringByAppendingPathComponent:#"Documents/test.mp4"];
NSString *videoPath = #"/Users/liaokuohsun/iFrameTest.mp4";
const char *file = [videoPath UTF8String];
pFormatCtx_Record = avformat_alloc_context();
bFlag = h264_file_create(file, pFormatCtx_Record, pCodecCtx, pAudioCodecCtx,/*fps*/0.0, packet.data, packet.size );
if(bFlag==true)
{
veVideoRecordState = eH264RecActive;
fprintf(stderr, "h264_file_create success\n");
}
else
{
veVideoRecordState = eH264RecIdle;
fprintf(stderr, "h264_file_create error\n");
}
}
}
//break;
case eH264RecActive:
{
if((bFirstIFrame==false) &&(packet.flags&AV_PKT_FLAG_KEY)==AV_PKT_FLAG_KEY)
{
bFirstIFrame=true;
vPTS = packet.pts ;
vDTS = packet.dts ;
#if 0
NSRunLoop *pRunLoop = [NSRunLoop currentRunLoop];
[pRunLoop addTimer:RecordingTimer forMode:NSDefaultRunLoopMode];
#else
[NSTimer scheduledTimerWithTimeInterval:5.0//2.0
target:self
selector:#selector(StopRecording:)
userInfo:nil
repeats:NO];
#endif
}
// Record audio when 1st i-Frame is obtained
if(bFirstIFrame==true)
{
if ( pFormatCtx_Record )
{
#if PTS_DTS_IS_CORRECT==1
packet.pts = packet.pts - vPTS;
packet.dts = packet.dts - vDTS;
#endif
h264_file_write_frame( pFormatCtx_Record, packet.stream_index, packet.data, packet.size, packet.dts, packet.pts);
}
else
{
NSLog(#"pFormatCtx_Record no exist");
}
}
}
break;
case eH264RecClose:
{
if ( pFormatCtx_Record )
{
h264_file_close(pFormatCtx_Record);
#if 0
// 20130607 Test
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^(void)
{
ALAssetsLibrary *library = [[ALAssetsLibrary alloc]init];
NSString *filePathString = [NSHomeDirectory() stringByAppendingPathComponent:#"Documents/test.mp4"];
NSURL *filePathURL = [NSURL fileURLWithPath:filePathString isDirectory:NO];
if(1)// ([library videoAtPathIsCompatibleWithSavedPhotosAlbum:filePathURL])
{
[library writeVideoAtPathToSavedPhotosAlbum:filePathURL completionBlock:^(NSURL *assetURL, NSError *error){
if (error) {
// TODO: error handling
NSLog(#"writeVideoAtPathToSavedPhotosAlbum error");
} else {
// TODO: success handling
NSLog(#"writeVideoAtPathToSavedPhotosAlbum success");
}
}];
}
[library release];
});
#endif
vPTS = 0;
vDTS = 0;
vAudioPTS = 0;
vAudioDTS = 0;
pFormatCtx_Record = NULL;
NSLog(#"h264_file_close() is finished");
}
else
{
NSLog(#"fc no exist");
}
bFirstIFrame = false;
veVideoRecordState = eH264RecIdle;
}
break;
default:
if ( pFormatCtx_Record )
{
h264_file_close(pFormatCtx_Record);
pFormatCtx_Record = NULL;
}
NSLog(#"[ERROR] unexpected veVideoRecordState!!");
veVideoRecordState = eH264RecIdle;
break;
}
}
// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
}
else if(packet.stream_index==audioStream)
{
// 20131024 albert.liao modfied start
static int vPktCount=0;
BOOL bIsAACADTS = FALSE;
int ret = 0;
if(aPlayer.vAACType == eAAC_UNDEFINED)
{
tAACADTSHeaderInfo vxAACADTSHeaderInfo = {0};
bIsAACADTS = [AudioUtilities parseAACADTSHeader:(uint8_t *)packet.data ToHeader:&vxAACADTSHeaderInfo];
}
#synchronized(aPlayer)
{
if(aPlayer==nil)
{
aPlayer = [[AudioPlayer alloc]initAudio:nil withCodecCtx:(AVCodecContext *) pAudioCodecCtx];
NSLog(#"aPlayer initAudio");
if(bIsAACADTS)
{
aPlayer.vAACType = eAAC_ADTS;
//NSLog(#"is ADTS AAC");
}
}
else
{
if(vPktCount<5) // The voice is listened once image is rendered
{
vPktCount++;
}
else
{
if([aPlayer getStatus]!=eAudioRunning)
{
dispatch_async(dispatch_get_main_queue(), ^(void) {
#synchronized(aPlayer)
{
NSLog(#"aPlayer start play");
[aPlayer Play];
}
});
}
}
}
};
#synchronized(aPlayer)
{
int ret = 0;
ret = [aPlayer putAVPacket:&packet];
if(ret <= 0)
NSLog(#"Put Audio Packet Error!!");
}
// 20131024 albert.liao modfied end
if(bFirstIFrame==true)
{
switch(veVideoRecordState)
{
case eH264RecActive:
{
if ( pFormatCtx_Record )
{
h264_file_write_audio_frame(pFormatCtx_Record, pAudioCodecCtx, packet.stream_index, packet.data, packet.size, packet.dts, packet.pts);
}
else
{
NSLog(#"pFormatCtx_Record no exist");
}
}
}
}
}
else
{
//fprintf(stderr, "packet len=%d, Byte=%02X%02X%02X%02X%02X\n",\
packet.size, packet.data[0],packet.data[1],packet.data[2],packet.data[3], packet.data[4]);
}
// 20130525 albert.liao modified end
}
return frameFinished!=0;
}
avformat_write_header:
int h264_file_create(const char *pFilePath, AVFormatContext *fc, AVCodecContext *pCodecCtx,AVCodecContext *pAudioCodecCtx, double fps, void *p, int len )
{
int vRet=0;
AVOutputFormat *of=NULL;
AVStream *pst=NULL;
AVCodecContext *pcc=NULL, *pAudioOutputCodecContext=NULL;
avcodec_register_all();
av_register_all();
av_log_set_level(AV_LOG_VERBOSE);
if(!pFilePath)
{
fprintf(stderr, "FilePath no exist");
return -1;
}
if(!fc)
{
fprintf(stderr, "AVFormatContext no exist");
return -1;
}
fprintf(stderr, "file=%s\n",pFilePath);
// Create container
of = av_guess_format( 0, pFilePath, 0 );
fc->oformat = of;
strcpy( fc->filename, pFilePath );
// Add video stream
pst = avformat_new_stream( fc, 0 );
vVideoStreamIdx = pst->index;
fprintf(stderr,"Video Stream:%d",vVideoStreamIdx);
pcc = pst->codec;
avcodec_get_context_defaults3( pcc, AVMEDIA_TYPE_VIDEO );
// Save the stream as origin setting without convert
pcc->codec_type = pCodecCtx->codec_type;
pcc->codec_id = pCodecCtx->codec_id;
pcc->bit_rate = pCodecCtx->bit_rate;
pcc->width = pCodecCtx->width;
pcc->height = pCodecCtx->height;
if(fps==0)
{
double fps=0.0;
AVRational pTimeBase;
pTimeBase.num = pCodecCtx->time_base.num;
pTimeBase.den = pCodecCtx->time_base.den;
fps = 1.0/ av_q2d(pCodecCtx->time_base)/ FFMAX(pCodecCtx->ticks_per_frame, 1);
fprintf(stderr,"fps_method(tbc): 1/av_q2d()=%g",fps);
pcc->time_base.num = 1;
pcc->time_base.den = fps;
}
else
{
pcc->time_base.num = 1;
pcc->time_base.den = fps;
}
// reference ffmpeg\libavformat\utils.c
// For SPS and PPS in avcC container
pcc->extradata = malloc(sizeof(uint8_t)*pCodecCtx->extradata_size);
memcpy(pcc->extradata, pCodecCtx->extradata, pCodecCtx->extradata_size);
pcc->extradata_size = pCodecCtx->extradata_size;
// For Audio stream
if(pAudioCodecCtx)
{
AVCodec *pAudioCodec=NULL;
AVStream *pst2=NULL;
pAudioCodec = avcodec_find_encoder(AV_CODEC_ID_AAC);
// Add audio stream
pst2 = avformat_new_stream( fc, pAudioCodec );
vAudioStreamIdx = pst2->index;
pAudioOutputCodecContext = pst2->codec;
avcodec_get_context_defaults3( pAudioOutputCodecContext, pAudioCodec );
fprintf(stderr,"Audio Stream:%d",vAudioStreamIdx);
fprintf(stderr,"pAudioCodecCtx->bits_per_coded_sample=%d",pAudioCodecCtx->bits_per_coded_sample);
pAudioOutputCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;
pAudioOutputCodecContext->codec_id = AV_CODEC_ID_AAC;
// Copy the codec attributes
pAudioOutputCodecContext->channels = pAudioCodecCtx->channels;
pAudioOutputCodecContext->channel_layout = pAudioCodecCtx->channel_layout;
pAudioOutputCodecContext->sample_rate = pAudioCodecCtx->sample_rate;
pAudioOutputCodecContext->bit_rate = 12000;//pAudioCodecCtx->sample_rate * pAudioCodecCtx->bits_per_coded_sample;
pAudioOutputCodecContext->bits_per_coded_sample = pAudioCodecCtx->bits_per_coded_sample;
pAudioOutputCodecContext->profile = pAudioCodecCtx->profile;
//FF_PROFILE_AAC_LOW;
// pAudioCodecCtx->bit_rate;
// AV_SAMPLE_FMT_U8P, AV_SAMPLE_FMT_S16P
//pAudioOutputCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;//pAudioCodecCtx->sample_fmt;
pAudioOutputCodecContext->sample_fmt = pAudioCodecCtx->sample_fmt;
//pAudioOutputCodecContext->sample_fmt = AV_SAMPLE_FMT_U8;
pAudioOutputCodecContext->sample_aspect_ratio = pAudioCodecCtx->sample_aspect_ratio;
pAudioOutputCodecContext->time_base.num = pAudioCodecCtx->time_base.num;
pAudioOutputCodecContext->time_base.den = pAudioCodecCtx->time_base.den;
pAudioOutputCodecContext->ticks_per_frame = pAudioCodecCtx->ticks_per_frame;
pAudioOutputCodecContext->frame_size = 1024;
fprintf(stderr,"profile:%d, sample_rate:%d, channles:%d", pAudioOutputCodecContext->profile, pAudioOutputCodecContext->sample_rate, pAudioOutputCodecContext->channels);
AVDictionary *opts = NULL;
av_dict_set(&opts, "strict", "experimental", 0);
if (avcodec_open2(pAudioOutputCodecContext, pAudioCodec, &opts) < 0) {
fprintf(stderr, "\ncould not open codec\n");
}
av_dict_free(&opts);
#if 0
// For Audio, this part is no need
if(pAudioCodecCtx->extradata_size!=0)
{
NSLog(#"extradata_size !=0");
pAudioOutputCodecContext->extradata = malloc(sizeof(uint8_t)*pAudioCodecCtx->extradata_size);
memcpy(pAudioOutputCodecContext->extradata, pAudioCodecCtx->extradata, pAudioCodecCtx->extradata_size);
pAudioOutputCodecContext->extradata_size = pAudioCodecCtx->extradata_size;
}
else
{
// For WMA test only
pAudioOutputCodecContext->extradata_size = 0;
NSLog(#"extradata_size ==0");
}
#endif
}
if(fc->oformat->flags & AVFMT_GLOBALHEADER)
{
pcc->flags |= CODEC_FLAG_GLOBAL_HEADER;
pAudioOutputCodecContext->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if ( !( fc->oformat->flags & AVFMT_NOFILE ) )
{
vRet = avio_open( &fc->pb, fc->filename, AVIO_FLAG_WRITE );
if(vRet!=0)
{
fprintf(stderr,"avio_open(%s) error", fc->filename);
}
}
// dump format in console
av_dump_format(fc, 0, pFilePath, 1);
vRet = avformat_write_header( fc, NULL );
if(vRet==0)
return 1;
else
return 0;
}
av_interleaved_write_frame:
void h264_file_write_frame(AVFormatContext *fc, int vStreamIdx, const void* p, int len, int64_t dts, int64_t pts )
{
AVStream *pst = NULL;
AVPacket pkt;
if ( 0 > vVideoStreamIdx )
return;
// may be audio or video
pst = fc->streams[ vStreamIdx ];
// Init packet
av_init_packet( &pkt );
if(vStreamIdx ==vVideoStreamIdx)
{
pkt.flags |= ( 0 >= getVopType( p, len ) ) ? AV_PKT_FLAG_KEY : 0;
//pkt.flags |= AV_PKT_FLAG_KEY;
pkt.stream_index = pst->index;
pkt.data = (uint8_t*)p;
pkt.size = len;
pkt.dts = AV_NOPTS_VALUE;
pkt.pts = AV_NOPTS_VALUE;
// TODO: mark or unmark the log
//fprintf(stderr, "dts=%lld, pts=%lld\n",dts,pts);
// av_write_frame( fc, &pkt );
}
av_interleaved_write_frame( fc, &pkt );
}
av_write_trailer:
void h264_file_close(AVFormatContext *fc)
{
if ( !fc )
return;
av_write_trailer( fc );
if ( fc->oformat && !( fc->oformat->flags & AVFMT_NOFILE ) && fc->pb )
avio_close( fc->pb );
av_free( fc );
}
Thanks.
It looks like you're using the same AVFormatContext for both the input and output?
In the line
pst = fc->streams[ vStreamIdx ];
You're assigning the AVStream* from your AVFormatContext connected with your input (RTSP stream). But then later on you're trying to write the packet back to the same context av_interleaved_write_frame( fc, &pkt );. I kind of think of a context as a file which has helped me navagate this type of thing better. I do something identicial to what you're doing (not iOS though) where I use a separate AVFormatContext for each of the input (RTSP stream) and output (mp4 file). If I'm correct, I think what you just need to do is initialize an AVFormatContext and properly.
The following code (without error checking everything) is what I do to take an AVFormatContext * output_format_context = NULL and the AVFormatContext * input_format_context that I had associated with the RTSP stream and write from one to the other. This is after I have fetched a packet, etc., which in your case it looks like you're populating (I just take the packet from av_read_frame and re-package it.
This is code that could be in your write frame function (but it also does include the writing of the header).
AVFormatContext * output_format_context;
AVStream * in_stream_2;
AVStream * out_stream_2;
// Allocate the context with the output file
avformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename.c_str());
// Point to AVOutputFormat * output_format for manipulation
output_format = output_format_context->oformat;
// Loop through all streams
for (i = 0; i < input_format_context->nb_streams; i++) {
// Create a pointer to the input stream that was allocated earlier in the code
AVStream *in_stream = input_format_context->streams[i];
// Create a pointer to a new stream that will be part of the output
AVStream *out_stream = avformat_new_stream(output_format_context, in_stream->codec->codec);
// Set time_base of the new output stream to equal the input stream one since I'm not changing anything (can avoid but get a deprecation warning)
out_stream->time_base = in_stream->time_base;
// This is the non-deprecated way of copying all the parameters from the input stream into the output stream since everything stays the same
avcodec_parameters_from_context(out_stream->codecpar, in_stream->codec);
// I don't remember what this is for :)
out_stream->codec->codec_tag = 0;
// This just sets a flag from the format context to the stream relating to the header
if (output_format_context->oformat->flags & AVFMT_GLOBALHEADER)
out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
// Check NOFILE flag and open the output file context (previously the output file was associated with the format context, now it is actually opened.
if (!(output_format->flags & AVFMT_NOFILE))
avio_open(&output_format_context->pb, out_filename.c_str(), AVIO_FLAG_WRITE);
// Write the header (not sure if this is always needed but h264 I believe it is.
avformat_write_header(output_format_context,NULL);
// Re-getting the appropriate stream that was populated above (this should allow for both audio/video)
in_stream_2 = input_format_context->streams[packet.stream_index];
out_stream_2 = output_format_context->streams[packet.stream_index];
// Rescaling pts and dts, duration and pos - you would do as you need in your code.
packet.pts = av_rescale_q_rnd(packet.pts, in_stream_2->time_base, out_stream_2->time_base, (AVRounding) (AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
packet.dts = av_rescale_q_rnd(packet.dts, in_stream_2->time_base, out_stream_2->time_base, (AVRounding) (AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
packet.duration = av_rescale_q(packet.duration, in_stream_2->time_base, out_stream_2->time_base);
packet.pos = -1;
// The first packet of my stream always gives me negative dts/pts so this just protects that first one for my purposes. You probably don't need.
if (packet.dts < 0) packet.dts = 0;
if (packet.pts < 0) packet.pts = 0;
// Finally write the frame
av_interleaved_write_frame(output_format_context, &packet);
// ....
// Write header, close/cleanup... etc
// ....
This code is fairly bare bones and doesn't include the setup (which it sounds like you're doing correctly anyway). I would also imagine this code could be cleaned up and tweaked for your purposes, but this works for me to re-write the RTSP stream into a file (in my case many files but code not shown).
The code is C code, so you might need to do minor tweaks for making it Swift compatible (for some of the library function calls maybe). I think overall it should be compatible though.
Hopefully this helps point you to the right direction. This was cobbled together thanks to several sample code sources (I don't remember where), along with warning prompts from the libraries themselves.

WMV Converter Fails FFMpeg

I just cann't seem to get the FFMpeg working with using the library. Everytime I try to convert asf file to wmv. I get the following issue on run time:
[wmv1 # 0x81ee000]error, slice code was 2
[wmv1 # 0x81ee000]header damaged
This my code:
static void audio_decode_example(const char *outfilename, const char *filename)
{
AVCodec *codec;
AVCodecContext *c= NULL;
int out_size, len, in_size;
FILE *f, *outfile;
uint8_t *outbuf;
uint8_t inbuf[AUDIO_INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
AVPacket avpkt;
av_init_packet(&avpkt);
printf("Audio decoding\n");
/* find the mpeg audio decoder */
codec = avcodec_find_decoder(CODEC_ID_WMV1);
if (!codec) {
fprintf(stderr, "codec not found\n");
return;
}
c= avcodec_alloc_context2(CODEC_TYPE_AUDIO);
/* open it */
if (avcodec_open(c, codec) < 0) {
fprintf(stderr, "could not open codec\n");
return;
}
outbuf = malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE);
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "could not open %s\n", filename);
return;
}
outfile = fopen(outfilename, "wb");
if (!outfile) {
av_free(c);
return;
}
/* decode until eof */
avpkt.data = inbuf;
len = avpkt.size = fread(inbuf, 1, INBUF_SIZE, f);
NSLog(#"%d", avpkt.size);
while (avpkt.size > 0) {
out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
len = avcodec_decode_audio2(c, (short *)outbuf, &out_size, inbuf,len);// avpkt.size);
NSLog(#"%d", len);
if (len < 0) {
fprintf(stderr, "Error while decoding\n");
fclose(outfile);
return;
}
if (out_size > 0) {
/* if a frame has been decoded, output it */
fwrite(outbuf, 1, out_size, outfile);
}
avpkt.size -= len;
avpkt.data += len;
if (avpkt.size < AUDIO_REFILL_THRESH) {
/* Refill the input buffer, to avoid trying to decode
* incomplete frames. Instead of this, one could also use
* a parser, or use a proper container format through
* libavformat. */
memmove(inbuf, avpkt.data, avpkt.size);
avpkt.data = inbuf;
len = fread(avpkt.data + avpkt.size, 1,
INBUF_SIZE - avpkt.size, f);
if (len > 0)
avpkt.size += len;
}
}
fclose(outfile);
fclose(f);
free(outbuf);
avcodec_close(c);
av_free(c);
}
I have try the command line utilities and it successfully convert the file. Any help would be helpfully. thanks
Make the mistake of opening the wrong file