Encoding a screenshot into a video using FFMPEG

c++ encoding ffmpeg video-streaming

10,888

I managed to get it working after quite a bit of trial-and-error. I was allocating the image after filling it, which was the first problem. The second problem was that the screenshot was not being created properly, and should have been casted as a COLORREF* rather than an RGBQUAD.

AVCodec* codec;
AVCodecContext* c = NULL;
uint8_t* outbuf;
int i, out_size, outbuf_size;

avcodec_register_all();                                 // mandatory to register ffmpeg functions

printf("Video encoding\n");

codec = avcodec_find_encoder(CODEC_ID_H264);            // finding the H264 encoder
if (!codec) {
    fprintf(stderr, "Codec not found\n");
    exit(1);
}
else printf("H264 codec found\n");

c = avcodec_alloc_context3(codec);
c->bit_rate = 400000;
c->width = 1280;                                        // resolution must be a multiple of two (1280x720),(1900x1080),(720x480)
c->height = 720;
c->time_base.num = 1;                                   // framerate numerator
c->time_base.den = 25;                                  // framerate denominator
c->gop_size = 10;                                       // emit one intra frame every ten frames
c->max_b_frames = 1;                                    // maximum number of b-frames between non b-frames
c->keyint_min = 1;                                      // minimum GOP size
c->i_quant_factor = (float)0.71;                        // qscale factor between P and I frames
c->b_frame_strategy = 20;                               ///// find out exactly what this does
c->qcompress = (float)0.6;                              ///// find out exactly what this does
c->qmin = 20;                                           // minimum quantizer
c->qmax = 51;                                           // maximum quantizer
c->max_qdiff = 4;                                       // maximum quantizer difference between frames
c->refs = 4;                                            // number of reference frames
c->trellis = 1;                                         // trellis RD Quantization
c->pix_fmt = PIX_FMT_YUV420P;                           // universal pixel format for video encoding
c->codec_id = CODEC_ID_H264;
c->codec_type = AVMEDIA_TYPE_VIDEO;

if (avcodec_open2(c, codec,NULL) < 0) {
    fprintf(stderr, "Could not open codec\n");          // opening the codec
    exit(1);
}
else printf("H264 codec opened\n");

outbuf_size = 100000 + c->width*c->height*(32>>3);      // allocate output buffer
outbuf = static_cast<uint8_t *>(malloc(outbuf_size));
printf("Setting buffer size to: %d\n",outbuf_size);

FILE* f = fopen("example.mpg","wb");                    // opening video file for writing
if(!f) printf("x  -  Cannot open video file for writing\n");
else printf("Opened video file for writing\n");

// encode video
for(i=0;i<STREAM_FRAME_RATE*STREAM_DURATION;i++) {
    fflush(stdout);

    screenCap();                                                                                                // taking screenshot

    int nbytes = avpicture_get_size(PIX_FMT_YUV420P, c->width, c->height);                                      // allocating outbuffer
    uint8_t* outbuffer = (uint8_t*)av_malloc(nbytes*sizeof(uint8_t));

    AVFrame* inpic = avcodec_alloc_frame();                                                                     // mandatory frame allocation
    AVFrame* outpic = avcodec_alloc_frame();

    outpic->pts = (int64_t)((float)i * (1000.0/((float)(c->time_base.den))) * 90);                              // setting frame pts
    avpicture_fill((AVPicture*)inpic, (uint8_t*)pPixels, PIX_FMT_RGB32, c->width, c->height);                   // fill image with input screenshot
    avpicture_fill((AVPicture*)outpic, outbuffer, PIX_FMT_YUV420P, c->width, c->height);                        // clear output picture for buffer copy
    av_image_alloc(outpic->data, outpic->linesize, c->width, c->height, c->pix_fmt, 1); 

    inpic->data[0] += inpic->linesize[0]*(screenHeight-1);                                                      // flipping frame
    inpic->linesize[0] = -inpic->linesize[0];                                                                   // flipping frame

    struct SwsContext* fooContext = sws_getContext(screenWidth, screenHeight, PIX_FMT_RGB32, c->width, c->height, PIX_FMT_YUV420P, SWS_FAST_BILINEAR, NULL, NULL, NULL);
    sws_scale(fooContext, inpic->data, inpic->linesize, 0, c->height, outpic->data, outpic->linesize);          // converting frame size and format

    out_size = avcodec_encode_video(c, outbuf, outbuf_size, outpic);                                            // encoding video
    printf("Encoding frame %3d (size=%5d)\n", i, out_size);
    fwrite(outbuf, 1, out_size, f);

    delete [] pPixels;                                                                                          // freeing memory
    av_free(outbuffer);     
    av_free(inpic);
    av_free(outpic);
}

for(; out_size; i++) {
    fflush(stdout);

    out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);                                              // encode the delayed frames
    printf("Writing frame %3d (size=%5d)\n", i, out_size);
    fwrite(outbuf, 1, out_size, f);
}

outbuf[0] = 0x00;
outbuf[1] = 0x00;                                                                                               // add sequence end code to have a real mpeg file
outbuf[2] = 0x01;
outbuf[3] = 0xb7;
fwrite(outbuf, 1, 4, f);
fclose(f);

avcodec_close(c);                                                                                               // freeing memory
free(outbuf);
av_free(c);
printf("Closed codec and Freed\n");

10,888

Author by

mohM

Updated on June 04, 2022

Comments

mohM about 2 years

I'm trying to get the pixels from the screen, and encode the screenshot into a video using ffmpeg. I've seen a couple of examples but they either assume you already have the pixel data, or use image file input. It seems like whether I use sws_scale() or not (which is included in the examples I've seen), or whether I'm typecasting a HBITMAP or RGBQUAD* it's telling me that the image src data is bad and is encoding a blank image rather than the screenshot. Is there something I'm missing here?

AVCodec* codec;
AVCodecContext* c = NULL;
AVFrame* inpic;
uint8_t* outbuf, *picture_buf;
int i, out_size, size, outbuf_size;
HBITMAP hBmp;
//int x,y;

avcodec_register_all();

printf("Video encoding\n");

// Find the mpeg1 video encoder
codec = avcodec_find_encoder(CODEC_ID_H264);
if (!codec) {
    fprintf(stderr, "Codec not found\n");
    exit(1);
}
else printf("H264 codec found\n");

c = avcodec_alloc_context3(codec);
inpic = avcodec_alloc_frame();

c->bit_rate = 400000;
c->width = screenWidth;                                     // resolution must be a multiple of two
c->height = screenHeight;
c->time_base.num = 1;
c->time_base.den = 25;
c->gop_size = 10;                                           // emit one intra frame every ten frames
c->max_b_frames=1;
c->pix_fmt = PIX_FMT_YUV420P;
c->codec_id = CODEC_ID_H264;
//c->codec_type = AVMEDIA_TYPE_VIDEO;

//av_opt_set(c->priv_data, "preset", "slow", 0);
//printf("Setting presets to slow for performance\n");

// Open the encoder
if (avcodec_open2(c, codec,NULL) < 0) {
    fprintf(stderr, "Could not open codec\n");
    exit(1);
}
else printf("H264 codec opened\n");

outbuf_size = 100000 + 12*c->width*c->height;           // alloc image and output buffer
//outbuf_size = 100000;
outbuf = static_cast<uint8_t *>(malloc(outbuf_size));
size = c->width * c->height;
picture_buf = static_cast<uint8_t*>(malloc((size*3)/2));
printf("Setting buffer size to: %d\n",outbuf_size);

FILE* f = fopen("example.mpg","wb");
if(!f) printf("x  -  Cannot open video file for writing\n");
else printf("Opened video file for writing\n");

/*inpic->data[0] = picture_buf;
inpic->data[1] = inpic->data[0] + size;
inpic->data[2] = inpic->data[1] + size / 4;
inpic->linesize[0] = c->width;
inpic->linesize[1] = c->width / 2;
inpic->linesize[2] = c->width / 2;*/


//int x,y;
// encode 1 second of video
for(i=0;i<c->time_base.den;i++) {
    fflush(stdout);


    HWND hDesktopWnd = GetDesktopWindow();
    HDC hDesktopDC = GetDC(hDesktopWnd);
    HDC hCaptureDC = CreateCompatibleDC(hDesktopDC);
    hBmp = CreateCompatibleBitmap(GetDC(0), screenWidth, screenHeight);
    SelectObject(hCaptureDC, hBmp);
    BitBlt(hCaptureDC, 0, 0, screenWidth, screenHeight, hDesktopDC, 0, 0, SRCCOPY|CAPTUREBLT);
    BITMAPINFO bmi = {0}; 
    bmi.bmiHeader.biSize = sizeof(bmi.bmiHeader); 
    bmi.bmiHeader.biWidth = screenWidth; 
    bmi.bmiHeader.biHeight = screenHeight; 
    bmi.bmiHeader.biPlanes = 1; 
    bmi.bmiHeader.biBitCount = 32; 
    bmi.bmiHeader.biCompression = BI_RGB;
    RGBQUAD *pPixels = new RGBQUAD[screenWidth*screenHeight];
    GetDIBits(hCaptureDC,hBmp,0,screenHeight,pPixels,&bmi,DIB_RGB_COLORS);

inpic->pts = (float) i * (1000.0/(float)(c->time_base.den))*90;
    avpicture_fill((AVPicture*)inpic, (uint8_t*)pPixels, PIX_FMT_BGR32, c->width, c->height);                   // Fill picture with image
    av_image_alloc(inpic->data, inpic->linesize, c->width, c->height, c->pix_fmt, 1);
    //printf("Allocated frame\n");
    //SaveBMPFile(L"screenshot.bmp",hBmp,hDc,screenWidth,screenHeight);
    ReleaseDC(hDesktopWnd,hDesktopDC);
    DeleteDC(hCaptureDC);
    DeleteObject(hBmp);

    // encode the image
    out_size = avcodec_encode_video(c, outbuf, outbuf_size, inpic);
    printf("Encoding frame %3d (size=%5d)\n", i, out_size);
    fwrite(outbuf, 1, out_size, f);
}

// get the delayed frames
for(; out_size; i++) {
    fflush(stdout);

    out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
    printf("Writing frame %3d (size=%5d)\n", i, out_size);
    fwrite(outbuf, 1, out_size, f);
}

// add sequence end code to have a real mpeg file
outbuf[0] = 0x00;
outbuf[1] = 0x00;
outbuf[2] = 0x01;
outbuf[3] = 0xb7;
fwrite(outbuf, 1, 4, f);
fclose(f);
free(picture_buf);
free(outbuf);

avcodec_close(c);
av_free(c);
av_free(inpic);
printf("Closed codec and Freed\n");