The FFmpeg (libavformat/codec, in this case) API maps the ffmpeg.exe commandline arguments pretty closely. To open a file, use avformat_open_input_file(). The last two arguments can be NULL. This fills in the AVFormatContext for you. Now you start reading frames using av_read_frame() in a loop. The pkt.stream_index will tell you which stream each packet belongs to, and avformatcontext->streams[pkt.stream_index] is the accompanying stream information, which tells you what codec it uses, whether it's video/audio, etc. Use avformat_close() to shut down.
For muxing, you use the inverse, see muxing for details. Basically it's allocate, avio_open2, add streams for each existing stream in the input file (basically context->streams[]), avformat_write_header(), av_interleaved_write_frame() in a loop, av_write_trailer() to shut down (and free the allocated context in the end).
Encoding/decoding of the video stream(s) is done using libavcodec. For each AVPacket you get from the muxer, use avcodec_decode_video2(). Use avcodec_encode_video2() for encoding of the output AVFrame. Note that both will introduce delay so the first few calls to each function will not return any data and you need to flush cached data by calling each function with NULL input data to get the tail packets/frames out of it. av_interleave_write_frame will interleave packets correctly so the video/audio stream will not desync (as in: video packets of the same timestamp occur MBs after audio packets in the ts file).
If you need more detailed examples for avcodec_decode_video2, avcodec_encode_video2, av_read_frame or av_interleaved_write_frame, just Google "$function example" and you'll see full-fledged examples showing how to use them correctly. For x264 encoding, set some default parameters in the AVCodecContext when calling avcodec_open2 for encoding quality settings. In the C API, you do that using AVDictionary, e.g.:
AVDictionary opts = *NULL;
av_dict_set(&opts, "preset", "veryslow", 0);
// use either crf or b, not both! See the link above on H264 encoding options
av_dict_set_int(&opts, "b", 1000, 0);
av_dict_set_int(&opts, "crf", 10, 0);
[edit] Oh I forgot one part, the timestamping. Each AVPacket and AVFrame has a pts variable in its struct, and you can use that to decide whether to include the packet/frame in the output stream. So for audio, you'd use AVPacket.pts from the demuxing step as a delimiter, and for video, you'd use AVFrame.pts from the decoding step as a delimited. Their respective documentation tells you in what unit they are.
[edit2] I see you're still having some issues without actual code, so here's a real (working) transcoder which re-codes video and re-muxes audio. It probably has tons of bugs, leaks and lacks proper error reporting, it also doesn't deal with timestamps (I'm leaving that to you as an exercise), but it does the basic things that you asked for:
#include <stdio.h>
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
static AVFormatContext *inctx, *outctx;
#define MAX_STREAMS 16
static AVCodecContext *inavctx[MAX_STREAMS];
static AVCodecContext *outavctx[MAX_STREAMS];
static int openInputFile(const char *file) {
int res;
inctx = NULL;
res = avformat_open_input(& inctx, file, NULL, NULL);
if (res != 0)
return res;
res = avformat_find_stream_info(inctx, NULL);
if (res < 0)
return res;
return 0;
}
static void closeInputFile(void) {
int n;
for (n = 0; n < inctx->nb_streams; n++)
if (inavctx[n]) {
avcodec_close(inavctx[n]);
avcodec_free_context(&inavctx[n]);
}
avformat_close_input(&inctx);
}
static int openOutputFile(const char *file) {
int res, n;
outctx = avformat_alloc_context();
outctx->oformat = av_guess_format(NULL, file, NULL);
if ((res = avio_open2(&outctx->pb, file, AVIO_FLAG_WRITE, NULL, NULL)) < 0)
return res;
for (n = 0; n < inctx->nb_streams; n++) {
AVStream *inst = inctx->streams[n];
AVCodecContext *inc = inst->codec;
if (inc->codec_type == AVMEDIA_TYPE_VIDEO) {
// video decoder
inavctx[n] = avcodec_alloc_context3(inc->codec);
avcodec_copy_context(inavctx[n], inc);
if ((res = avcodec_open2(inavctx[n], avcodec_find_decoder(inc->codec_id), NULL)) < 0)
return res;
// video encoder
AVCodec *encoder = avcodec_find_encoder_by_name("libx264");
AVStream *outst = avformat_new_stream(outctx, encoder);
outst->codec->width = inavctx[n]->width;
outst->codec->height = inavctx[n]->height;
outst->codec->pix_fmt = inavctx[n]->pix_fmt;
AVDictionary *dict = NULL;
av_dict_set(&dict, "preset", "veryslow", 0);
av_dict_set_int(&dict, "crf", 10, 0);
outavctx[n] = avcodec_alloc_context3(encoder);
avcodec_copy_context(outavctx[n], outst->codec);
if ((res = avcodec_open2(outavctx[n], encoder, &dict)) < 0)
return res;
} else if (inc->codec_type == AVMEDIA_TYPE_AUDIO) {
avformat_new_stream(outctx, inc->codec);
inavctx[n] = outavctx[n] = NULL;
} else {
fprintf(stderr, "Don’t know what to do with stream %d
", n);
return -1;
}
}
if ((res = avformat_write_header(outctx, NULL)) < 0)
return res;
return 0;
}
static void closeOutputFile(void) {
int n;
av_write_trailer(outctx);
for (n = 0; n < outctx->nb_streams; n++)
if (outctx->streams[n]->codec)
avcodec_close(outctx->streams[n]->codec);
avformat_free_context(outctx);
}
static int encodeFrame(int stream_index, AVFrame *frame, int *gotOutput) {
AVPacket outPacket;
int res;
av_init_packet(&outPacket);
if ((res = avcodec_encode_video2(outavctx[stream_index], &outPacket, frame, gotOutput)) < 0) {
fprintf(stderr, "Failed to encode frame
");
return res;
}
if (*gotOutput) {
outPacket.stream_index = stream_index;
if ((res = av_interleaved_write_frame(outctx, &outPacket)) < 0) {
fprintf(stderr, "Failed to write packet
");
return res;
}
}
av_free_packet(&outPacket);
return 0;
}
static int decodePacket(int stream_index, AVPacket *pkt, AVFrame *frame, int *frameFinished) {
int res;
if ((res = avcodec_decode_video2(inavctx[stream_index], frame,
frameFinished, pkt)) < 0) {
fprintf(stderr, "Failed to decode frame
");
return res;
}
if (*frameFinished){
int hasOutput;
frame->pts = frame->pkt_pts;
return encodeFrame(stream_index, frame, &hasOutput);
} else {
return 0;
}
}
int main(int argc, char *argv[]) {
char *input = argv[1];
char *output = argv[2];
int res, n;
printf("Converting %s to %s
", input, output);
av_register_all();
if ((res = openInputFile(input)) < 0) {
fprintf(stderr, "Failed to open input file %s
", input);
return res;
}
if ((res = openOutputFile(output)) < 0) {
fprintf(stderr, "Failed to open output file %s
", input);
return res;
}
AVFrame *frame = av_frame_alloc();
AVPacket inPacket;
av_init_packet(&inPacket);
while (av_read_frame(inctx, &inPacket) >= 0) {
if (inavctx[inPacket.stream_index] != NULL) {
int frameFinished;
if ((res = decodePacket(inPacket.stream_index, &inPacket, frame, &frameFinished)) < 0) {
return res;
}
} else {
if ((res = av_interleaved_write_frame(outctx, &inPacket)) < 0) {
fprintf(stderr, "Failed to write packet
");
return res;
}
}
}
for (n = 0; n < inctx->nb_streams; n++) {
if (inavctx[n]) {
// flush decoder
int frameFinished;
do {
inPacket.data = NULL;
inPacket.size = 0;
if ((res = decodePacket(n, &inPacket, frame, &frameFinished)) < 0)
return res;
} while (frameFinished);
// flush encoder
int gotOutput;
do {
if ((res = encodeFrame(n, NULL, &gotOutput)) < 0)
return res;
} while (gotOutput);
}
}
av_free_packet(&inPacket);
closeInputFile();
closeOutputFile();
return 0;
}