WebM VP8 Codec SDK
vp8_scalable_patterns
00001 /*
00002  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
00003  *
00004  *  Use of this source code is governed by a BSD-style license
00005  *  that can be found in the LICENSE file in the root of the source
00006  *  tree. An additional intellectual property rights grant can be found
00007  *  in the file PATENTS.  All contributing project authors may
00008  *  be found in the AUTHORS file in the root of the source tree.
00009  */
00010 
00011 
00012 /*
00013  * This is an example demonstrating how to implement a multi-layer VP8
00014  * encoding scheme based on temporal scalability for video applications
00015  * that benefit from a scalable bitstream.
00016  */
00017 #include <stdio.h>
00018 #include <stdlib.h>
00019 #include <stdarg.h>
00020 #include <string.h>
00021 #define VPX_CODEC_DISABLE_COMPAT 1
00022 #include "vpx/vpx_encoder.h"
00023 #include "vpx/vp8cx.h"
00024 #define interface (vpx_codec_vp8_cx())
00025 #define fourcc    0x30385056
00026 
00027 #define IVF_FILE_HDR_SZ  (32)
00028 #define IVF_FRAME_HDR_SZ (12)
00029 
00030 static void mem_put_le16(char *mem, unsigned int val) {
00031     mem[0] = val;
00032     mem[1] = val>>8;
00033 }
00034 
00035 static void mem_put_le32(char *mem, unsigned int val) {
00036     mem[0] = val;
00037     mem[1] = val>>8;
00038     mem[2] = val>>16;
00039     mem[3] = val>>24;
00040 }
00041 
00042 static void die(const char *fmt, ...) {
00043     va_list ap;
00044 
00045     va_start(ap, fmt);
00046     vprintf(fmt, ap);
00047     if(fmt[strlen(fmt)-1] != '\n')
00048         printf("\n");
00049     exit(EXIT_FAILURE);
00050 }
00051 
00052 static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
00053     const char *detail = vpx_codec_error_detail(ctx);
00054 
00055     printf("%s: %s\n", s, vpx_codec_error(ctx));
00056     if(detail)
00057         printf("    %s\n",detail);
00058     exit(EXIT_FAILURE);
00059 }
00060 
00061 static int read_frame(FILE *f, vpx_image_t *img) {
00062     size_t nbytes, to_read;
00063     int    res = 1;
00064 
00065     to_read = img->w*img->h*3/2;
00066     nbytes = fread(img->planes[0], 1, to_read, f);
00067     if(nbytes != to_read) {
00068         res = 0;
00069         if(nbytes > 0)
00070             printf("Warning: Read partial frame. Check your width & height!\n");
00071     }
00072     return res;
00073 }
00074 
00075 static void write_ivf_file_header(FILE *outfile,
00076                                   const vpx_codec_enc_cfg_t *cfg,
00077                                   int frame_cnt) {
00078     char header[32];
00079 
00080     if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
00081         return;
00082     header[0] = 'D';
00083     header[1] = 'K';
00084     header[2] = 'I';
00085     header[3] = 'F';
00086     mem_put_le16(header+4,  0);                   /* version */
00087     mem_put_le16(header+6,  32);                  /* headersize */
00088     mem_put_le32(header+8,  fourcc);              /* headersize */
00089     mem_put_le16(header+12, cfg->g_w);            /* width */
00090     mem_put_le16(header+14, cfg->g_h);            /* height */
00091     mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
00092     mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
00093     mem_put_le32(header+24, frame_cnt);           /* length */
00094     mem_put_le32(header+28, 0);                   /* unused */
00095 
00096     if(fwrite(header, 1, 32, outfile));
00097 }
00098 
00099 
00100 static void write_ivf_frame_header(FILE *outfile,
00101                                    const vpx_codec_cx_pkt_t *pkt)
00102 {
00103     char             header[12];
00104     vpx_codec_pts_t  pts;
00105 
00106     if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
00107         return;
00108 
00109     pts = pkt->data.frame.pts;
00110     mem_put_le32(header, pkt->data.frame.sz);
00111     mem_put_le32(header+4, pts&0xFFFFFFFF);
00112     mem_put_le32(header+8, pts >> 32);
00113 
00114     if(fwrite(header, 1, 12, outfile));
00115 }
00116 
00117 static int mode_to_num_layers[7] = {2, 2, 3, 3, 3, 3, 5};
00118 
00119 int main(int argc, char **argv) {
00120     FILE                *infile, *outfile[MAX_LAYERS];
00121     vpx_codec_ctx_t      codec;
00122     vpx_codec_enc_cfg_t  cfg;
00123     int                  frame_cnt = 0;
00124     vpx_image_t          raw;
00125     vpx_codec_err_t      res;
00126     unsigned int         width;
00127     unsigned int         height;
00128     int                  frame_avail;
00129     int                  got_data;
00130     int                  flags = 0;
00131     int                  i;
00132     int                  pts = 0;              // PTS starts at 0
00133     int                  frame_duration = 1;   // 1 timebase tick per frame
00134 
00135     int                  layering_mode = 0;
00136     int                  frames_in_layer[MAX_LAYERS] = {0};
00137     int                  layer_flags[MAX_PERIODICITY] = {0};
00138 
00139     // Check usage and arguments
00140     if (argc < 9)
00141         die("Usage: %s <infile> <outfile> <width> <height> <rate_num> "
00142             " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]);
00143 
00144     width  = strtol (argv[3], NULL, 0);
00145     height = strtol (argv[4], NULL, 0);
00146     if (width < 16 || width%2 || height <16 || height%2)
00147         die ("Invalid resolution: %d x %d", width, height);
00148 
00149     if (!sscanf(argv[7], "%d", &layering_mode))
00150         die ("Invalid mode %s", argv[7]);
00151     if (layering_mode<0 || layering_mode>6)
00152         die ("Invalid mode (0..6) %s", argv[7]);
00153 
00154     if (argc != 8+mode_to_num_layers[layering_mode])
00155         die ("Invalid number of arguments");
00156 
00157     if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1))
00158         die ("Failed to allocate image", width, height);
00159 
00160     printf("Using %s\n",vpx_codec_iface_name(interface));
00161 
00162     // Populate encoder configuration
00163     res = vpx_codec_enc_config_default(interface, &cfg, 0);
00164     if(res) {
00165         printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
00166         return EXIT_FAILURE;
00167     }
00168 
00169     // Update the default configuration with our settings
00170     cfg.g_w = width;
00171     cfg.g_h = height;
00172 
00173     // Timebase format e.g. 30fps: numerator=1, demoninator=30
00174     if (!sscanf (argv[5], "%d", &cfg.g_timebase.num ))
00175         die ("Invalid timebase numerator %s", argv[5]);
00176     if (!sscanf (argv[6], "%d", &cfg.g_timebase.den ))
00177         die ("Invalid timebase denominator %s", argv[6]);
00178 
00179     for (i=8; i<8+mode_to_num_layers[layering_mode]; i++)
00180         if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8]))
00181             die ("Invalid data rate %s", argv[i]);
00182 
00183     // Real time parameters
00184     cfg.rc_dropframe_thresh = 0;
00185     cfg.rc_end_usage        = VPX_CBR;
00186     cfg.rc_resize_allowed   = 0;
00187     cfg.rc_min_quantizer    = 4;
00188     cfg.rc_max_quantizer    = 63;
00189     cfg.rc_undershoot_pct   = 98;
00190     cfg.rc_overshoot_pct    = 100;
00191     cfg.rc_buf_initial_sz   = 500;
00192     cfg.rc_buf_optimal_sz   = 600;
00193     cfg.rc_buf_sz           = 1000;
00194 
00195     // Enable error resilient mode
00196     cfg.g_error_resilient = 1;
00197     cfg.g_lag_in_frames   = 0;
00198     cfg.kf_mode           = VPX_KF_DISABLED;
00199 
00200     // Disable automatic keyframe placement
00201     cfg.kf_min_dist = cfg.kf_max_dist = 1000;
00202 
00203     // Temporal scaling parameters:
00204     // NOTE: The 3 prediction frames cannot be used interchangeably due to
00205     // differences in the way they are handled throughout the code. The
00206     // frames should be allocated to layers in the order LAST, GF, ARF.
00207     // Other combinations work, but may produce slightly inferior results.
00208     switch (layering_mode)
00209     {
00210 
00211     case 0:
00212     {
00213         // 2-layers, 2-frame period
00214         int ids[2] = {0,1};
00215         cfg.ts_number_layers     = 2;
00216         cfg.ts_periodicity       = 2;
00217         cfg.ts_rate_decimator[0] = 2;
00218         cfg.ts_rate_decimator[1] = 1;
00219         memcpy(cfg.ts_layer_id, ids, sizeof(ids));
00220 
00221 #if 1
00222         // 0=L, 1=GF, Intra-layer prediction enabled
00223         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
00224                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
00225                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
00226         layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
00227                          VP8_EFLAG_NO_REF_ARF;
00228 #else
00229         // 0=L, 1=GF, Intra-layer prediction disabled
00230         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
00231                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
00232                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
00233         layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
00234                          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST;
00235 #endif
00236         break;
00237     }
00238 
00239     case 1:
00240     {
00241         // 2-layers, 3-frame period
00242         int ids[3] = {0,1,1};
00243         cfg.ts_number_layers     = 2;
00244         cfg.ts_periodicity       = 3;
00245         cfg.ts_rate_decimator[0] = 3;
00246         cfg.ts_rate_decimator[1] = 1;
00247         memcpy(cfg.ts_layer_id, ids, sizeof(ids));
00248 
00249         // 0=L, 1=GF, Intra-layer prediction enabled
00250         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
00251                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
00252                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
00253         layer_flags[1] =
00254         layer_flags[2] = VP8_EFLAG_NO_REF_GF  |
00255                          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
00256                                                 VP8_EFLAG_NO_UPD_LAST;
00257         break;
00258     }
00259 
00260     case 2:
00261     {
00262         // 3-layers, 6-frame period
00263         int ids[6] = {0,2,2,1,2,2};
00264         cfg.ts_number_layers     = 3;
00265         cfg.ts_periodicity       = 6;
00266         cfg.ts_rate_decimator[0] = 6;
00267         cfg.ts_rate_decimator[1] = 3;
00268         cfg.ts_rate_decimator[2] = 1;
00269         memcpy(cfg.ts_layer_id, ids, sizeof(ids));
00270 
00271         // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
00272         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
00273                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
00274                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
00275         layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
00276                                                 VP8_EFLAG_NO_UPD_LAST;
00277         layer_flags[1] =
00278         layer_flags[2] =
00279         layer_flags[4] =
00280         layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
00281         break;
00282     }
00283 
00284     case 3:
00285     {
00286         // 3-layers, 4-frame period
00287         int ids[4] = {0,2,1,2};
00288         cfg.ts_number_layers     = 3;
00289         cfg.ts_periodicity       = 4;
00290         cfg.ts_rate_decimator[0] = 4;
00291         cfg.ts_rate_decimator[1] = 2;
00292         cfg.ts_rate_decimator[2] = 1;
00293         memcpy(cfg.ts_layer_id, ids, sizeof(ids));
00294 
00295         // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled
00296         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
00297                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
00298                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
00299         layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
00300                          VP8_EFLAG_NO_UPD_ARF |
00301                          VP8_EFLAG_NO_UPD_LAST;
00302         layer_flags[1] =
00303         layer_flags[3] = VP8_EFLAG_NO_REF_ARF |
00304                          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
00305                          VP8_EFLAG_NO_UPD_ARF;
00306         break;
00307     }
00308 
00309     case 4:
00310     {
00311         // 3-layers, 4-frame period
00312         int ids[4] = {0,2,1,2};
00313         cfg.ts_number_layers     = 3;
00314         cfg.ts_periodicity       = 4;
00315         cfg.ts_rate_decimator[0] = 4;
00316         cfg.ts_rate_decimator[1] = 2;
00317         cfg.ts_rate_decimator[2] = 1;
00318         memcpy(cfg.ts_layer_id, ids, sizeof(ids));
00319 
00320         // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1,
00321         // disabled in layer 2
00322         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
00323                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
00324                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
00325         layer_flags[2] = VP8_EFLAG_NO_REF_ARF |
00326                          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
00327         layer_flags[1] =
00328         layer_flags[3] = VP8_EFLAG_NO_REF_ARF |
00329                          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
00330                          VP8_EFLAG_NO_UPD_ARF;
00331         break;
00332     }
00333 
00334     case 5:
00335     {
00336         // 3-layers, 4-frame period
00337         int ids[4] = {0,2,1,2};
00338         cfg.ts_number_layers     = 3;
00339         cfg.ts_periodicity       = 4;
00340         cfg.ts_rate_decimator[0] = 4;
00341         cfg.ts_rate_decimator[1] = 2;
00342         cfg.ts_rate_decimator[2] = 1;
00343         memcpy(cfg.ts_layer_id, ids, sizeof(ids));
00344 
00345         // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
00346         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
00347                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
00348                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
00349         layer_flags[2] = VP8_EFLAG_NO_REF_ARF |
00350                          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
00351         layer_flags[1] =
00352         layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
00353         break;
00354     }
00355 
00356     case 6:
00357     {
00358         // NOTE: Probably of academic interest only
00359 
00360         // 5-layers, 16-frame period
00361         int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4};
00362         cfg.ts_number_layers     = 5;
00363         cfg.ts_periodicity       = 16;
00364         cfg.ts_rate_decimator[0] = 16;
00365         cfg.ts_rate_decimator[1] = 8;
00366         cfg.ts_rate_decimator[2] = 4;
00367         cfg.ts_rate_decimator[3] = 2;
00368         cfg.ts_rate_decimator[4] = 1;
00369         memcpy(cfg.ts_layer_id, ids, sizeof(ids));
00370 
00371         layer_flags[0]  = VPX_EFLAG_FORCE_KF;
00372         layer_flags[1]  =
00373         layer_flags[3]  =
00374         layer_flags[5]  =
00375         layer_flags[7]  =
00376         layer_flags[9]  =
00377         layer_flags[11] =
00378         layer_flags[13] =
00379         layer_flags[15] = VP8_EFLAG_NO_UPD_LAST |
00380                           VP8_EFLAG_NO_UPD_GF   |
00381                           VP8_EFLAG_NO_UPD_ARF  |
00382                           VP8_EFLAG_NO_UPD_ENTROPY;
00383         layer_flags[2]  =
00384         layer_flags[6]  =
00385         layer_flags[10] =
00386         layer_flags[14] = 0;
00387         layer_flags[4]  =
00388         layer_flags[12] = VP8_EFLAG_NO_REF_LAST;
00389         layer_flags[8]  = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF |
00390                           VP8_EFLAG_NO_UPD_ENTROPY;
00391         break;
00392     }
00393 
00394     default:
00395         break;
00396     }
00397 
00398     // Open input file
00399     if(!(infile = fopen(argv[1], "rb")))
00400         die("Failed to open %s for reading", argv[1]);
00401 
00402     // Open an output file for each stream
00403     for (i=0; i<cfg.ts_number_layers; i++)
00404     {
00405         char file_name[512];
00406         sprintf (file_name, "%s_%d.ivf", argv[2], i);
00407         if (!(outfile[i] = fopen(file_name, "wb")))
00408             die("Failed to open %s for writing", file_name);
00409         write_ivf_file_header(outfile[i], &cfg, 0);
00410     }
00411 
00412     // Initialize codec
00413     if (vpx_codec_enc_init (&codec, interface, &cfg, 0))
00414         die_codec (&codec, "Failed to initialize encoder");
00415 
00416     // Cap CPU & first I-frame size
00417     vpx_codec_control (&codec, VP8E_SET_CPUUSED, -6);
00418     vpx_codec_control (&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, 600);
00419 
00420     frame_avail = 1;
00421     while (frame_avail || got_data) {
00422         vpx_codec_iter_t iter = NULL;
00423         const vpx_codec_cx_pkt_t *pkt;
00424 
00425         flags = layer_flags[frame_cnt % cfg.ts_periodicity];
00426 
00427         frame_avail = read_frame(infile, &raw);
00428         if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts,
00429                             1, flags, VPX_DL_REALTIME))
00430             die_codec(&codec, "Failed to encode frame");
00431 
00432         // Reset KF flag
00433         layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;
00434 
00435         got_data = 0;
00436         while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
00437             got_data = 1;
00438             switch (pkt->kind) {
00439             case VPX_CODEC_CX_FRAME_PKT:
00440                 for (i=cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
00441                                               i<cfg.ts_number_layers; i++)
00442                 {
00443                     write_ivf_frame_header(outfile[i], pkt);
00444                     if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
00445                               outfile[i]));
00446                     frames_in_layer[i]++;
00447                 }
00448                 break;
00449             default:
00450                 break;
00451             }
00452             printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT
00453                     && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
00454             fflush (stdout);
00455         }
00456         frame_cnt++;
00457         pts += frame_duration;
00458     }
00459     printf ("\n");
00460     fclose (infile);
00461 
00462     printf ("Processed %d frames.\n",frame_cnt-1);
00463     if (vpx_codec_destroy(&codec))
00464             die_codec (&codec, "Failed to destroy codec");
00465 
00466     // Try to rewrite the output file headers with the actual frame count
00467     for (i=0; i<cfg.ts_number_layers; i++)
00468     {
00469         if (!fseek(outfile[i], 0, SEEK_SET))
00470             write_ivf_file_header (outfile[i], &cfg, frames_in_layer[i]);
00471         fclose (outfile[i]);
00472     }
00473 
00474     return EXIT_SUCCESS;
00475 }
00476