AlucardSama04 · December 28, 2019 14:56
diff --git a/triaq x264 patch.diff b/triaq x264 patch.diff
 diff --git a/Makefile b/Makefile
 index fb1fdc08..677bfce7 100644
 --- a/Makefile
 +++ b/Makefile
 @@ -400,7 +400,7 @@ OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-we
 OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0  --slice-max-mbs 50
 OPT2 = --crf 26 -b4 -m5 -r2 --me hex --cqm jvt --nr 100 --psnr --no-mixed-refs --b-adapt 2 --slice-max-size 1500
 OPT3 = --crf 18 -b3 -m9 -r5 --me umh -t1 -A all --b-pyramid normal --direct auto --no-fast-pskip --no-mbtree
 -OPT4 = --crf 22 -b3 -m7 -r4 --me esa -t2 -A all --psy-rd 1.0:1.0 --slices 4 --fgo 8 --fade-compensate 0.5
 +OPT4 = --crf 22 -b3 -m7 -r4 --me esa -t2 -A all --psy-rd 1.0:1.0 --slices 4 --fgo 8 --fade-compensate 0.5 --aq2-strength 0.5 --aq3-mode 2
 OPT5 = --frames 50 --crf 24 -b3 -m10 -r3 --me tesa -t2
 OPT6 = --frames 50 -q0 -m9 -r2 --me hex -Aall
 OPT7 = --frames 50 -q0 -m2 -r1 --me hex --no-cabac
 diff --git a/common/base.c b/common/base.c
 index e3265290..c1cca61a 100644
 --- a/common/base.c
 +++ b/common/base.c
 @@ -383,6 +383,37 @@ REALIGN_STACK void x264_param_default( x264_param_t *param )
     param->rc.f_pb_factor = 1.3;
     param->rc.i_aq_mode = X264_AQ_VARIANCE;
     param->rc.f_aq_strength = 1.0;
 +    param->rc.f_aq_sensitivity = 10;
 +    param->rc.f_aq_ifactor = 1.0;
 +    param->rc.f_aq_pfactor = 1.0;
 +    param->rc.f_aq_bfactor = 1.0;
 +    param->rc.b_aq2 = 0;
 +    param->rc.f_aq2_strength = 0.0;
 +    param->rc.f_aq2_sensitivity = 15.0;
 +    param->rc.f_aq2_ifactor = 1.0;
 +    param->rc.f_aq2_pfactor = 1.0;
 +    param->rc.f_aq2_bfactor = 1.0;
 +    param->rc.i_aq3_mode = X264_AQ_NONE;
 +    param->rc.f_aq3_strength = 0.5;
 +    param->rc.f_aq3_strengths[0][0] = 0;
 +    param->rc.f_aq3_strengths[0][1] = 0;
 +    param->rc.f_aq3_strengths[0][2] = 0;
 +    param->rc.f_aq3_strengths[0][3] = 0;
 +    param->rc.f_aq3_strengths[1][0] = 0;
 +    param->rc.f_aq3_strengths[1][1] = 0;
 +    param->rc.f_aq3_strengths[1][2] = 0;
 +    param->rc.f_aq3_strengths[1][3] = 0;
 +    param->rc.f_aq3_sensitivity = 10;
 +    param->rc.f_aq3_ifactor[0] = 1.0;
 +    param->rc.f_aq3_ifactor[1] = 1.0;
 +    param->rc.f_aq3_pfactor[0] = 1.0;
 +    param->rc.f_aq3_pfactor[1] = 1.0;
 +    param->rc.f_aq3_bfactor[0] = 1.0;
 +    param->rc.f_aq3_bfactor[1] = 1.0;
 +    param->rc.b_aq3_boundary = 0;
 +    param->rc.i_aq3_boundary[0] = 192;
 +    param->rc.i_aq3_boundary[1] = 64;
 +    param->rc.i_aq3_boundary[2] = 24;
     param->rc.i_lookahead = 40;
 
     param->rc.b_stat_write = 0;
 @@ -1512,6 +1543,87 @@ REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const cha
         p->rc.i_aq_mode = atoi(value);
     OPT("aq-strength")
         p->rc.f_aq_strength = atof(value);
 +    OPT("aq-sensitivity")
 +        p->rc.f_aq_sensitivity = atof(value);
 +    OPT("aq-ifactor")
 +        p->rc.f_aq_ifactor = atof(value);
 +    OPT("aq-pfactor")
 +        p->rc.f_aq_pfactor = atof(value);
 +    OPT("aq-bfactor")
 +        p->rc.f_aq_bfactor = atof(value);
 +    OPT("aq2-strength")
 +    {
 +        p->rc.f_aq2_strength = atof(value);
 +        p->rc.b_aq2 = 1;
 +    }
 +    OPT("aq2-sensitivity")
 +        p->rc.f_aq2_sensitivity = atof(value);
 +    OPT("aq2-ifactor")
 +        p->rc.f_aq2_ifactor = atof(value);
 +    OPT("aq2-pfactor")
 +        p->rc.f_aq2_pfactor = atof(value);
 +    OPT("aq2-bfactor")
 +        p->rc.f_aq2_bfactor = atof(value);
 +    OPT("aq3-mode")
 +        p->rc.i_aq3_mode = atoi(value);
 +    OPT("aq3-strength")
 +    {
 +		int i;
 +		
 +        if( 8 == sscanf( value, "%f:%f:%f:%f:%f:%f:%f:%f",
 +                         &p->rc.f_aq3_strengths[0][0], &p->rc.f_aq3_strengths[1][0], &p->rc.f_aq3_strengths[0][1], &p->rc.f_aq3_strengths[1][1],
 +                         &p->rc.f_aq3_strengths[0][2], &p->rc.f_aq3_strengths[1][2], &p->rc.f_aq3_strengths[0][3], &p->rc.f_aq3_strengths[1][3] ) ||
 +            8 == sscanf( value, "%f,%f,%f,%f,%f,%f,%f,%f",
 +                         &p->rc.f_aq3_strengths[0][0], &p->rc.f_aq3_strengths[1][0], &p->rc.f_aq3_strengths[0][1], &p->rc.f_aq3_strengths[1][1],
 +                         &p->rc.f_aq3_strengths[0][2], &p->rc.f_aq3_strengths[1][2], &p->rc.f_aq3_strengths[0][3], &p->rc.f_aq3_strengths[1][3] ) )
 +            p->rc.f_aq3_strength = 0.0;
 +        else if( 2 == sscanf( value, "%f:%f", &p->rc.f_aq3_strengths[0][0], &p->rc.f_aq3_strengths[1][0] ) ||
 +                 2 == sscanf( value, "%f,%f", &p->rc.f_aq3_strengths[0][0], &p->rc.f_aq3_strengths[1][0] ) )
 +        {
 +            p->rc.f_aq3_strength = 0.0;
 +            for( i = 0; i < 2; i++ )
 +                for( int j = 1; j < 4; j++ )
 +                    p->rc.f_aq3_strengths[i][j] = p->rc.f_aq3_strengths[i][0];
 +        }
 +        else if( sscanf( value, "%f", &p->rc.f_aq3_strength ) )
 +            for( i = 0; i < 2; i++ )
 +                for( int j = 0; j < 4; j++ )
 +                    p->rc.f_aq3_strengths[i][j] = p->rc.f_aq3_strength;
 +    }
 +    OPT("aq3-sensitivity")
 +        p->rc.f_aq3_sensitivity = atof(value);
 +    OPT("aq3-ifactor")
 +        if( 2 == sscanf( value, "%f:%f", &p->rc.f_aq3_ifactor[0], &p->rc.f_aq3_ifactor[1] ) ||
 +            2 == sscanf( value, "%f,%f", &p->rc.f_aq3_ifactor[0], &p->rc.f_aq3_ifactor[1] ) )
 +        { }
 +        else if( sscanf( value, "%f", &p->rc.f_aq3_ifactor[0] ) )
 +            p->rc.f_aq3_ifactor[1] = p->rc.f_aq3_ifactor[0];
 +        else
 +            p->rc.f_aq3_ifactor[1] = p->rc.f_aq3_ifactor[0] = 1.0;
 +    OPT("aq3-pfactor")
 +        if( 2 == sscanf( value, "%f:%f", &p->rc.f_aq3_pfactor[0], &p->rc.f_aq3_pfactor[1] ) ||
 +            2 == sscanf( value, "%f,%f", &p->rc.f_aq3_pfactor[0], &p->rc.f_aq3_pfactor[1] ) )
 +        { }
 +        else if( sscanf( value, "%f", &p->rc.f_aq3_pfactor[0] ) )
 +            p->rc.f_aq3_pfactor[1] = p->rc.f_aq3_pfactor[0];
 +        else
 +            p->rc.f_aq3_pfactor[1] = p->rc.f_aq3_pfactor[0] = 1.0;
 +    OPT("aq3-bfactor")
 +        if( 2 == sscanf( value, "%f:%f", &p->rc.f_aq3_bfactor[0], &p->rc.f_aq3_bfactor[1] ) ||
 +            2 == sscanf( value, "%f,%f", &p->rc.f_aq3_bfactor[0], &p->rc.f_aq3_bfactor[1] ) )
 +        { }
 +        else if( sscanf( value, "%f", &p->rc.f_aq3_bfactor[0] ) )
 +            p->rc.f_aq3_bfactor[1] = p->rc.f_aq3_bfactor[0];
 +        else
 +            p->rc.f_aq3_bfactor[1] = p->rc.f_aq3_bfactor[0] = 1.0;
 +    OPT("aq3-boundary")
 +    {
 +        if( 3 == sscanf( value, "%d:%d:%d", &p->rc.i_aq3_boundary[0], &p->rc.i_aq3_boundary[1], &p->rc.i_aq3_boundary[2] ) ||
 +            3 == sscanf( value, "%d,%d,%d", &p->rc.i_aq3_boundary[0], &p->rc.i_aq3_boundary[1], &p->rc.i_aq3_boundary[2] ) )
 +            p->rc.b_aq3_boundary = 1;
 +        else
 +            p->rc.i_aq3_boundary[0] = p->rc.i_aq3_boundary[1] = p->rc.i_aq3_boundary[2] = 0;
 +    }
     OPT("fgo")
         p->analyse.i_fgo = atoi(value);
     OPT("fade-compensate")
 @@ -1750,7 +1862,34 @@ char *x264_param2string( x264_param_t *p, int b_res )
             s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
         s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
         if( p->rc.i_aq_mode )
 +		{
             s += sprintf( s, ":%.2f", p->rc.f_aq_strength );
 +            s += sprintf( s, " aq-sensitivity=%.2f", p->rc.f_aq_sensitivity );
 +            s += sprintf( s, " aq-factor=%.2f:%.2f:%.2f", p->rc.f_aq_ifactor,
 +                                                          p->rc.f_aq_pfactor,
 +                                                          p->rc.f_aq_bfactor );
 +		}
 +        s += sprintf( s, " aq2=%d", p->rc.b_aq2 );
 +        if( p->rc.b_aq2 )
 +        {
 +            s += sprintf( s, ":%.2f", p->rc.f_aq2_strength );
 +            s += sprintf( s, " aq2-sensitivity=%.2f", p->rc.f_aq2_sensitivity );
 +            s += sprintf( s, " aq2-factor=%.2f:%.2f:%.2f", p->rc.f_aq2_ifactor,
 +                                                           p->rc.f_aq2_pfactor,
 +                                                           p->rc.f_aq2_bfactor );
 +        }
 +        s += sprintf( s, " aq3=%d", p->rc.i_aq3_mode );
 +        if( p->rc.i_aq3_mode )
 +        {
 +            s += sprintf( s, ":[%.2f:%.2f]:[%.2f:%.2f]:[%.2f:%.2f]:[%.2f:%.2f]",
 +                          p->rc.f_aq3_strengths[0][0], p->rc.f_aq3_strengths[1][0], p->rc.f_aq3_strengths[0][1], p->rc.f_aq3_strengths[1][1],
 +                          p->rc.f_aq3_strengths[0][2], p->rc.f_aq3_strengths[1][2], p->rc.f_aq3_strengths[0][3], p->rc.f_aq3_strengths[1][3] );
 +            s += sprintf( s, " aq3-sensitivity=%.2f", p->rc.f_aq3_sensitivity );
 +            s += sprintf( s, " aq3-factor=[%.2f:%.2f]:[%.2f:%.2f]:[%.2f:%.2f]", p->rc.f_aq3_ifactor[0], p->rc.f_aq3_ifactor[1],
 +                                                                                p->rc.f_aq3_pfactor[0], p->rc.f_aq3_pfactor[1],
 +                                                                                p->rc.f_aq3_bfactor[0], p->rc.f_aq3_bfactor[1] );
 +            s += sprintf( s, " aq3-boundary=%d:%d:%d", p->rc.i_aq3_boundary[0], p->rc.i_aq3_boundary[1], p->rc.i_aq3_boundary[2] );
 +        }
         if( p->rc.psz_zones )
             s += sprintf( s, " zones=%s", p->rc.psz_zones );
         else if( p->rc.i_zones )
 diff --git a/common/frame.c b/common/frame.c
 index 38dea643..d304d5c9 100644
 --- a/common/frame.c
 +++ b/common/frame.c
 @@ -230,6 +230,11 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
             if( h->frames.b_have_lowres )
                 PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
         }
 +        if( h->param.rc.i_aq3_mode )
 +        {
 +            PREALLOC( frame->f_qp_offset3, h->mb.i_mb_count * sizeof(float) );
 +            PREALLOC( frame->f_qp_offset_aq3, h->mb.i_mb_count * sizeof(float) );
 +        }
     }
 
     PREALLOC_END( frame->base );
 diff --git a/common/frame.h b/common/frame.h
 index 3b65e484..e9ca58f6 100644
 --- a/common/frame.h
 +++ b/common/frame.h
 @@ -129,6 +129,8 @@ typedef struct x264_frame
     float   *f_row_qscale;
     float   *f_qp_offset;
     float   *f_qp_offset_aq;
 +    float   *f_qp_offset3;
 +    float   *f_qp_offset_aq3;
     int     b_intra_calculated;
     uint16_t *i_intra_cost;
     uint16_t *i_propagate_cost;
 diff --git a/common/pixel.c b/common/pixel.c
 index 20894e23..96890bf6 100644
 --- a/common/pixel.c
 +++ b/common/pixel.c
 @@ -77,6 +77,20 @@ PIXEL_SAD_C( x264_pixel_sad_4x16,   4, 16 )
 PIXEL_SAD_C( x264_pixel_sad_4x8,    4,  8 )
 PIXEL_SAD_C( x264_pixel_sad_4x4,    4,  4 )
 
 +#define PIXEL_COUNT_C( name, lx, ly ) \
 +static int name( pixel *pix, intptr_t i_pix, pixel threshold ) \
 +{                                               \
 +    int i_sum = 0;                              \
 +    for( int y = 0; y < ly; y++, pix += i_pix ) \
 +        for( int x = 0; x < lx; x++ )           \
 +            i_sum += pix[x] > threshold;        \
 +    return i_sum;                               \
 +}
 +
 +PIXEL_COUNT_C( x264_pixel_count_16x16, 16, 16 )
 +PIXEL_COUNT_C( x264_pixel_count_8x16,   8, 16 )
 +PIXEL_COUNT_C( x264_pixel_count_8x8,    8,  8 )
 +
 /****************************************************************************
  * pixel_ssd_WxH
  ****************************************************************************/
 @@ -939,6 +953,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
     pixf->ssim_end4 = ssim_end4;
     pixf->vsad = pixel_vsad;
     pixf->asd8 = pixel_asd8;
 +    pixf->pixel_count[PIXEL_16x16] = x264_pixel_count_16x16;
 +    pixf->pixel_count[PIXEL_8x16] = x264_pixel_count_8x16;
 +    pixf->pixel_count[PIXEL_8x8] = x264_pixel_count_8x8;
 
     pixf->intra_sad_x3_4x4    = intra_sad_x3_4x4;
     pixf->intra_satd_x3_4x4   = intra_satd_x3_4x4;
 diff --git a/common/pixel.h b/common/pixel.h
 index 627d42b3..238e52fe 100644
 --- a/common/pixel.h
 +++ b/common/pixel.h
 @@ -117,6 +117,8 @@ typedef struct
     int (*ads[7])( int enc_dc[4], uint16_t *sums, int delta,
                    uint16_t *cost_mvx, int16_t *mvs, int width, int thresh );
 
 +    int (*pixel_count[4])( pixel *pix, intptr_t i_pix, pixel threshold );
 +
     /* calculate satd or sad of V, H, and DC modes. */
     void (*intra_mbcmp_x3_16x16)( pixel *fenc, pixel *fdec, int res[3] );
     void (*intra_satd_x3_16x16) ( pixel *fenc, pixel *fdec, int res[3] );
 diff --git a/encoder/encoder.c b/encoder/encoder.c
 index 1567d50d..c330804a 100644
 --- a/encoder/encoder.c
 +++ b/encoder/encoder.c
 @@ -958,6 +958,8 @@ static int validate_parameters( x264_t *h, int b_open )
         h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, QP_MAX );
         h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, QP_MAX );
         h->param.rc.i_aq_mode = 0;
 +        h->param.rc.b_aq2 = 0;
 +        h->param.rc.i_aq3_mode = 0;
         h->param.rc.b_mb_tree = 0;
         h->param.rc.i_bitrate = 0;
     }
 @@ -1175,10 +1177,54 @@ static int validate_parameters( x264_t *h, int b_open )
         h->param.rc.f_fade_compensate = 0;
 
     h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 3 );
 -    h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
 -    if( h->param.rc.f_aq_strength == 0 )
 +    h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, -3, 3 );
 +    h->param.rc.b_aq2 = h->param.rc.b_aq2 && h->param.rc.f_aq2_strength > 0;
 +    if( h->param.rc.f_aq_strength == 0 && (h->param.rc.i_aq_mode > 0 ? !h->param.rc.b_aq2 : 1) )
         h->param.rc.i_aq_mode = 0;
 -
 +    if( h->param.rc.f_aq_sensitivity < 0 )
 +        h->param.rc.f_aq_sensitivity = 0;
 +    h->param.rc.f_aq_ifactor = x264_clip3f( h->param.rc.f_aq_ifactor, -10, 10 );
 +    h->param.rc.f_aq_pfactor = x264_clip3f( h->param.rc.f_aq_pfactor, -10, 10 );
 +    h->param.rc.f_aq_bfactor = x264_clip3f( h->param.rc.f_aq_bfactor, -10, 10 );
 +    h->param.rc.f_aq2_ifactor = x264_clip3f( h->param.rc.f_aq2_ifactor, -10, 10 );
 +    h->param.rc.f_aq2_pfactor = x264_clip3f( h->param.rc.f_aq2_pfactor, -10, 10 );
 +    h->param.rc.f_aq2_bfactor = x264_clip3f( h->param.rc.f_aq2_bfactor, -10, 10 );
 +    h->param.rc.i_aq3_mode = x264_clip3( h->param.rc.i_aq3_mode, 0, 4 );
 +    h->param.rc.f_aq3_strength = x264_clip3f( h->param.rc.f_aq3_strength, -3, 3 );
 +    for( int i = 0; i < 2; i++ )
 +        for( int j = 0; j < 4; j++ )
 +            h->param.rc.f_aq3_strengths[i][j] = x264_clip3f( h->param.rc.f_aq3_strengths[i][j], -3, 3 );
 +    if( h->param.rc.f_aq3_strengths[0][0] == 0 && h->param.rc.f_aq3_strengths[1][0] == 0 &&
 +        h->param.rc.f_aq3_strengths[0][1] == 0 && h->param.rc.f_aq3_strengths[1][1] == 0 &&
 +        h->param.rc.f_aq3_strengths[0][2] == 0 && h->param.rc.f_aq3_strengths[1][2] == 0 &&
 +        h->param.rc.f_aq3_strengths[0][3] == 0 && h->param.rc.f_aq3_strengths[1][3] == 0 )
 +    {
 +        if( h->param.rc.f_aq3_strength == 0 )
 +            h->param.rc.i_aq3_mode = 0;
 +        else
 +            for( int i = 0; i < 2; i++ )
 +                for( int j = 0; j < 4; j++ )
 +                    h->param.rc.f_aq3_strengths[i][j] = h->param.rc.f_aq3_strength;
 +    }
 +    if( h->param.rc.f_aq3_sensitivity < 0 )
 +        h->param.rc.f_aq3_sensitivity = 0;
 +    for( int i = 0; i < 2; i++ )
 +    {
 +        h->param.rc.f_aq3_ifactor[i] = x264_clip3f( h->param.rc.f_aq3_ifactor[i], -10, 10 );
 +        h->param.rc.f_aq3_pfactor[i] = x264_clip3f( h->param.rc.f_aq3_pfactor[i], -10, 10 );
 +        h->param.rc.f_aq3_bfactor[i] = x264_clip3f( h->param.rc.f_aq3_bfactor[i], -10, 10 );
 +    }
 +    h->param.rc.i_aq3_boundary[0] = x264_clip3( h->param.rc.i_aq3_boundary[0], 0, (256 << (BIT_DEPTH - 8)) - 1 );
 +    h->param.rc.i_aq3_boundary[1] = x264_clip3( h->param.rc.i_aq3_boundary[1], 0, (256 << (BIT_DEPTH - 8)) - 1 );
 +    h->param.rc.i_aq3_boundary[2] = x264_clip3( h->param.rc.i_aq3_boundary[2], 0, (256 << (BIT_DEPTH - 8)) - 1 );
 +    if( !h->param.rc.b_aq3_boundary ||
 +        h->param.rc.i_aq3_boundary[0] <= h->param.rc.i_aq3_boundary[1] || h->param.rc.i_aq3_boundary[1] <= h->param.rc.i_aq3_boundary[2] )
 +    {
 +        h->param.rc.i_aq3_boundary[0] = (h->param.vui.b_fullrange == 1 ? 205 : 192) << (BIT_DEPTH - 8);
 +        h->param.rc.i_aq3_boundary[1] = (h->param.vui.b_fullrange == 1 ?  56 :  64) << (BIT_DEPTH - 8);
 +        h->param.rc.i_aq3_boundary[2] = (h->param.vui.b_fullrange == 1 ?   9 :  24) << (BIT_DEPTH - 8);
 +    }
 +	
     if( h->param.i_log_level < X264_LOG_INFO && (!h->param.psz_log_file || h->param.i_log_file_level < X264_LOG_INFO) )
     {
         h->param.analyse.b_psnr = 0;
 diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
 index 7ed3d292..f99eb19c 100644
 --- a/encoder/ratecontrol.c
 +++ b/encoder/ratecontrol.c
 @@ -165,6 +165,9 @@ struct x264_ratecontrol_t
     int bframes;                /* # consecutive B-frames before this P-frame */
     int bframe_bits;            /* total cost of those frames */
 
 +    /* OreAQ stuff */
 +	float aq3_threshold;
 +
     int i_zones;
     x264_zone_t *zones;
     x264_zone_t *prev_zone;
 @@ -256,7 +259,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
 }
 
 // Find the total AC energy of the block in all planes.
 -static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
 +static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, uint32_t *energy )
 {
     /* This function contains annoying hacks because GCC has a habit of reordering emms
      * and putting it after floating point ops.  As a result, we put the emms at the end of the
 @@ -268,40 +271,230 @@ static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame
     {
         /* We don't know the super-MB mode we're going to pick yet, so
          * simply try both and pick the lower of the two. */
 -        uint32_t var_interlaced, var_progressive;
 -        var_interlaced   = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 1, 1 );
 -        var_progressive  = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 0, 0 );
 +        uint32_t var_interlaced_y  = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 1, 1 );
 +        uint32_t var_progressive_y = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 0, 0 );
 +        uint32_t var_interlaced_uv;
 +        uint32_t var_progressive_uv;
         if( CHROMA444 )
         {
 -            var_interlaced  += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 1, 1 );
 -            var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 0, 0 );
 -            var_interlaced  += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 1, 1 );
 -            var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 0, 0 );
 +            var_interlaced_uv   = ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 1, 1 );
 +            var_progressive_uv  = ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 0, 0 );
 +            var_interlaced_uv  += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 1, 1 );
 +            var_progressive_uv += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 0, 0 );
         }
         else if( CHROMA_FORMAT )
         {
 -            var_interlaced  += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1, 1 );
 -            var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 0, 0 );
 +            var_interlaced_uv   = ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1, 1 );
 +            var_progressive_uv  = ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 0, 0 );
 +        }
 +        uint32_t var_interlaced  = var_interlaced_y  + var_interlaced_uv;
 +        uint32_t var_progressive = var_progressive_y + var_progressive_uv;
 +        if( var_interlaced < var_progressive )
 +        {
 +            var = var_interlaced;
 +            energy[0] = var_interlaced_y;
 +            energy[1] = var_interlaced_uv;
 +        }
 +        else
 +        {
 +            var = var_progressive;
 +            energy[0] = var_progressive_y;
 +            energy[1] = var_progressive_uv;
         }
 -        var = X264_MIN( var_interlaced, var_progressive );
     }
     else
     {
 -        var  = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, PARAM_INTERLACED, 1 );
 +		energy[0] = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, PARAM_INTERLACED, 1 );
         if( CHROMA444 )
         {
 -            var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, PARAM_INTERLACED, 1 );
 -            var += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, PARAM_INTERLACED, 1 );
 +            energy[1]  = ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, PARAM_INTERLACED, 1 );
 +            energy[1] += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, PARAM_INTERLACED, 1 );
         }
         else if( CHROMA_FORMAT )
 -            var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, PARAM_INTERLACED, 1 );
 +            energy[1]  = ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, PARAM_INTERLACED, 1 );
 +        var = energy[0] + energy[1];
     }
     x264_emms();
     return var;
 }
 
 +static int x264_sum_dctq( dctcoef dct[64] )
 +{
 +    int t = 0;
 +    dctcoef *p = &dct[0];
 +    for( int i = 1; i < 64; i++ )
 +        t += abs(p[i]) * x264_dct8_weight_tab[i];
 +    return t;
 +}
 +
 +static NOINLINE void get_image_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int b_field, int *luma, int *bluePoint )
 +{
 +#define BLUE_THRESHOLD (0x81<<(BIT_DEPTH-8))
 +#define RED_THRESHOLD  (0x87<<(BIT_DEPTH-8))
 +    ALIGNED_32( static pixel zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
 +    if( CHROMA444 )
 +    {
 +        int stride[3];
 +        int offset[3];
 +        for( int i = 0; i < 3; i++ )
 +        {
 +            stride[i] = frame->i_stride[i];
 +            offset[i] = b_field
 +                ? 16 * (mb_x + (mb_y&~1) * stride[i]) + (mb_y&1) * stride[i]
 +                : 16 * (mb_x +  mb_y * stride[i]);
 +            stride[i] <<= b_field;
 +        }
 +        *luma = h->pixf.sad[PIXEL_16x16]( frame->plane[0] + offset[0], stride[0], zero, 0 ) >> 8;
 +        *bluePoint =
 +            (h->pixf.pixel_count[PIXEL_16x16]( frame->plane[1] + offset[1], stride[1], BLUE_THRESHOLD ) >= 160) &&
 +            (h->pixf.pixel_count[PIXEL_16x16]( frame->plane[2] + offset[2], stride[2], RED_THRESHOLD  ) <= 96);
 +    }
 +    else
 +    {
 +        int height[2];
 +        int stride[2];
 +        int offset[2];
 +        for( int i = 0; i < 2; i++ )
 +        {
 +            height[i] = i ? 16>>CHROMA_V_SHIFT : 16;
 +            stride[i] = frame->i_stride[i];
 +            offset[i] = b_field
 +                ? 16 * mb_x + height[i] *(mb_y&~1) * stride[i] + (mb_y&1) * stride[i]
 +                : 16 * mb_x + height[i] * mb_y * stride[i];
 +            stride[i] <<= b_field;
 +        }
 +        *luma = h->pixf.sad[PIXEL_16x16]( frame->plane[0] + offset[0], stride[0], zero, 0 ) >> 8;
 +        ALIGNED_ARRAY_32( pixel, pix,[FENC_STRIDE*16] );
 +        int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
 +        int weight = 1 + (chromapix == PIXEL_8x16);
 +        h->mc.load_deinterleave_chroma_fenc( pix, frame->plane[1] + offset[1], stride[1], height[1] );
 +        *bluePoint =
 +            (h->pixf.pixel_count[chromapix]( pix,               FENC_STRIDE, BLUE_THRESHOLD ) >= 40 * weight) &&
 +            (h->pixf.pixel_count[chromapix]( pix+FENC_STRIDE/2, FENC_STRIDE, RED_THRESHOLD  ) <= 24 * weight);
 +    }
 +#undef BLUE_THRESHOLD
 +#undef RED_THRESHOLD
 +}
 +
 +static NOINLINE float x264_adjust_OreAQ( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, uint32_t *energy)
 +{
 +    uint8_t mode;
 +    int bluePoint = 0;
 +    int luma = 0;
 +    float energy_y,energy_uv,f_qp_adj;
 +    uint32_t _energy_y, _energy_uv;
 +
 +    get_image_mb( h, mb_x, mb_y, frame, PARAM_INTERLACED, &luma, &bluePoint );
 +    x264_emms();
 +
 +    _energy_y  = X264_MAX( energy[0], 1 );
 +    _energy_uv = X264_MAX( energy[1], 1 );
 +	
 +	h->rc->aq3_threshold = logf( powf( h->param.rc.f_aq3_sensitivity, 4 ) / 2.0 );
 +    // logf(energy) = 1.0397 * x264_log2( energy ) / 1.5
 +    // energy_y  = 1.2 * (logf(_energy_y ) - ((h->rc->aq3_threshold + 2*(BIT_DEPTH-8)) * .91) + 0.5);
 +    // energy_uv = 0.8 * (logf(_energy_uv) - ((h->rc->aq3_threshold + 2*(BIT_DEPTH-8)) * .91) + 0.5);
 +	energy_y  = 0.83176f * x264_log2( _energy_y  ) - ((h->rc->aq3_threshold + 2*(BIT_DEPTH-8)) * 1.092f) + 0.5f;
 +	energy_uv = 0.55451f * x264_log2( _energy_uv ) - ((h->rc->aq3_threshold + 2*(BIT_DEPTH-8)) * 0.728f) + 0.5f;
 +    f_qp_adj = 0.f;
 +
 +    if( luma > h->param.rc.i_aq3_boundary[0] )
 +    {
 +        // *** Bright ***
 +        // Y & UV Flat      -> qp up
 +        // Y Flat / UV Bump -> qp y up
 +        // Y Bump / UV Flat -> even
 +        // Y & UV Bump      -> qp down
 +        mode = 0x00;
 +
 +        // qp up
 +        if( !bluePoint && energy_y < 0 && energy_uv < 0 )
 +            f_qp_adj = X264_MIN( energy_y, energy_uv );
 +        // qp y up
 +        else if( !bluePoint && energy_y < 0 && energy_uv >= 0 )
 +            f_qp_adj = energy_y;
 +        // qp down
 +        else if( energy_y >= 0 && energy_uv >= 0 )
 +            f_qp_adj = X264_MAX( energy_y, energy_uv ) * 0.5f;
 +    }
 +    else if( luma > h->param.rc.i_aq3_boundary[1] )
 +    {
 +        // *** Middle ***
 +        // Y & UV Flat      -> qp up
 +        // Y Flat / UV Bump -> even
 +        // Y Bump / UV Flat -> qp mix down
 +        // Y & UV Bump      -> qp down
 +        mode = 0x01;
 +
 +        // qp up
 +        if( !bluePoint && energy_y < 0 && energy_uv < 0 )
 +            f_qp_adj = X264_MAX( energy_y, energy_uv );
 +        // qp mix down
 +        else if( energy_y >= 0 && energy_uv < 0 )
 +            f_qp_adj = X264_MAX( energy_y+ (!bluePoint * energy_uv), 0 ) * 0.5f;
 +        // qp down
 +        else if( energy_y >= 0 && energy_uv >= 0 )
 +            f_qp_adj = X264_MAX( energy_y, bluePoint * energy_uv );
 +    }
 +    else if( luma > h->param.rc.i_aq3_boundary[2] )
 +    {
 +        // *** Dark ***
 +        // Y & UV Flat      -> qp up
 +        // Y Flat / UV Bump -> qp uv down
 +        // Y Bump / UV Flat -> qp y down
 +        // Y & UV Bump      -> qp down
 +        mode = 0x02;
 +
 +        // qp up
 +        if( energy_y < 0 && energy_uv < 0 )
 +            f_qp_adj = X264_MAX( energy_y, energy_uv ) * 0.5f;
 +        // qp uv down
 +        else if( energy_y < 0 && energy_uv >= 0 )
 +            f_qp_adj = energy_uv * 1.25f;
 +        // qp y down
 +        else if( energy_y >= 0 && energy_uv < 0 )
 +            f_qp_adj = energy_y * 1.25f;
 +        // qp down
 +        else if( energy_y >= 0 && energy_uv >= 0 )
 +            f_qp_adj = X264_MAX( energy_y, energy_uv ) * 1.25f;
 +    }
 +    else
 +    {
 +        // *** M.Dark ***
 +        // Y & UV Flat      -> qp double up
 +        // Y Flat / UV Bump -> qp y up
 +        // Y Bump / UV Flat -> qp uv up
 +        // Y & UV Bump      -> even
 +        mode = 0x03;
 +
 +        // qp double up
 +        if( energy_y < 0 && energy_uv < 0 )
 +            f_qp_adj = energy_y + energy_uv;
 +        // qp mix up
 +        else if( energy_y < 0 && energy_uv >= 0 )
 +            f_qp_adj = energy_y;
 +        // qp uv down
 +        else if( energy_y >= 0 && energy_uv < 0 )
 +            f_qp_adj = energy_uv * 0.5f;
 +    }
 +
 +    /* If f_qp_adj is positive, then lower the qp. */
 +    f_qp_adj *= h->param.rc.f_aq3_strengths[f_qp_adj>0][mode];
 +
 +    if( h->param.rc.i_aq3_mode == X264_AQ_ORE )
 +    {
 +        /* If current MB is frame edge, lower the qp. */
 +        if( mb_x == 0 || mb_y == 0 || mb_x == h->sps->i_mb_width - 1 || mb_y == h->sps->i_mb_height - 1  )
 +            f_qp_adj += (float)( (energy_y<0) + (energy_uv<0) + 1 ) * ( h->param.rc.f_aq3_strengths[1][3] + 0.5f );
 +    }
 +
 +    return -f_qp_adj;
 +}
 +
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets )
 {
 +	uint32_t energy_yuv[2] = {0, 0};
 +	
     /* Initialize frame stats */
     for( int i = 0; i < 3; i++ )
     {
 @@ -337,7 +530,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
         {
             for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
                 for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
 -                    ac_energy_mb( h, mb_x, mb_y, frame );
 +                    ac_energy_mb( h, mb_x, mb_y, frame, energy_yuv );
         }
         else
             return;
 @@ -350,6 +543,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
         float strength;
         float avg_adj = 0.f;
         float bias_strength = 0.f;
 +		float f_aq_sensitivity = 0.f;
 
         if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE || h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE_BIASED )
         {
 @@ -358,7 +552,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
             for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
                 for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
                 {
 -                    uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
 +                    uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame, energy_yuv );
                     float qp_adj = powf( energy * bit_depth_correction + 1, 0.125f );
                     frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
                     avg_adj += qp_adj;
 @@ -371,7 +565,10 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
             bias_strength = h->param.rc.f_aq_strength;
         }
         else
 +		{
             strength = h->param.rc.f_aq_strength * 1.0397f;
 +			f_aq_sensitivity = h->param.rc.f_aq_sensitivity - 10.f + 14.427f + 2*(BIT_DEPTH-8);
 +		}
 
         for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
             for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
 @@ -390,8 +587,8 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
                 }
                 else
                 {
 -                    uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
 -                    qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - (14.427f + 2*(BIT_DEPTH-8)));
 +                    uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame, energy_yuv );
 +                    qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - f_aq_sensitivity);
                 }
                 if( quant_offsets )
                     qp_adj += quant_offsets[mb_xy];
 @@ -402,6 +599,64 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
             }
     }
 
 +    // OreAQ stuffs
 +    if( h->param.rc.i_aq3_mode != X264_AQ_NONE )
 +    {
 +        float strength = 0.f;
 +        float avg_adj  = 0.f;
 +
 +        if( h->param.rc.i_aq3_mode == X264_AQ_MIXORE )
 +        {
 +            float bit_depth_correction = powf(1 << (BIT_DEPTH-8), 0.5f);
 +            float avg_adj_pow2 = 0.f;
 +            for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
 +                for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
 +                {
 +                    uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame, energy_yuv );
 +                    float qp_adj = powf( energy + 1, 0.125f );
 +                    avg_adj += qp_adj;
 +                    avg_adj_pow2 += qp_adj * qp_adj;
 +                    frame->f_qp_offset3[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
 +                    frame->f_qp_offset_aq3[mb_x + mb_y*h->mb.i_mb_stride] = x264_adjust_OreAQ( h, mb_x, mb_y, frame, energy_yuv );
 +                }
 +            avg_adj /= h->mb.i_mb_count;
 +            avg_adj_pow2 /= h->mb.i_mb_count;
 +            strength = h->param.rc.f_aq3_strengths[1][3] * avg_adj / bit_depth_correction;
 +            avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (14.f * bit_depth_correction)) / avg_adj;
 +        }
 +
 +        for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
 +            for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
 +            {
 +                float qp_adj;
 +                int mb_xy = mb_x + mb_y*h->mb.i_mb_stride;
 +                if( h->param.rc.i_aq3_mode == X264_AQ_MIXORE )
 +                {
 +                    if( frame->f_qp_offset_aq3[mb_xy] < -1 )
 +                    {
 +                        qp_adj = strength * (frame->f_qp_offset3[mb_xy] - avg_adj);
 +                        qp_adj = X264_MIN( qp_adj, frame->f_qp_offset_aq3[mb_xy] );
 +                    }
 +                    else if( frame->f_qp_offset_aq3[mb_xy] >= 1 )
 +                        qp_adj = frame->f_qp_offset_aq3[mb_xy];
 +                    else
 +                        qp_adj = strength * (frame->f_qp_offset3[mb_xy] - avg_adj);
 +                }
 +                else
 +                {
 +                    ac_energy_mb( h, mb_x, mb_y, frame, energy_yuv );
 +                    qp_adj = x264_adjust_OreAQ( h, mb_x, mb_y, frame, energy_yuv );
 +                }
 +                if( quant_offsets )
 +                    qp_adj += quant_offsets[mb_xy];
 +                frame->f_qp_offset3[mb_xy] =
 +                frame->f_qp_offset_aq3[mb_xy] = qp_adj;
 +                // FIXME: does OreAQ affects lowres factor?
 +                // if( h->frames.b_have_lowres )
 +                //    frame->i_inv_qscale_factor[mb_xy] = x264_exp2fix8(qp_adj);
 +            }
 +    }
 +
     /* Remove mean from SSD calculation */
     for( int i = 0; i < 3; i++ )
     {
 @@ -1746,19 +2001,102 @@ int x264_ratecontrol_qp( x264_t *h )
     return x264_clip3( h->rc->qpm + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
 }
 
 +static NOINLINE float x264_haali_adaptive_quant( x264_t *h )
 +{
 +#define BLUE_THRESHOLD (0x81<<(BIT_DEPTH-8))
 +#define RED_THRESHOLD  (0x87<<(BIT_DEPTH-8))
 +    ALIGNED_32( static pixel zero[FDEC_STRIDE*8] );
 +    ALIGNED_32( dctcoef dct[64] );
 +    float qp_adj;
 +    int total = 0;
 +
 +    if( h->mb.i_qp <= 10 ) /* AQ is probably not needed at such low QP */
 +        return 0;
 +
 +    if( h->pixf.sad[PIXEL_16x16](h->mb.pic.p_fenc[0], FENC_STRIDE, zero, 16) > 64*16*16 ) /* light places? */
 +    {
 +        int (*count_func)( pixel *, intptr_t, pixel ) = CHROMA444 ? h->pixf.pixel_count[PIXEL_16x16] : h->pixf.pixel_count[PIXEL_8x8];
 +        int weight = CHROMA444 ? 4 : 1;
 +        if( count_func(h->mb.pic.p_fenc[1], FENC_STRIDE, BLUE_THRESHOLD) < 40 * weight /* not enough "blue" pixels? */ ||
 +            count_func(h->mb.pic.p_fenc[2], FENC_STRIDE, RED_THRESHOLD)  > 24 * weight /* too many "red" pixels? */ )
 +        {
 +            x264_emms();
 +            return 0;
 +        }
 +    }
 +
 +    for( int i = 0; i < 4; i++ )
 +    {
 +        h->dctf.sub8x8_dct8( dct, h->mb.pic.p_fenc[0] + (i&1)*8 + (i>>1)*FENC_STRIDE, zero );
 +        total += x264_sum_dctq( dct );
 +    }
 +
 +    x264_emms();
 +
 +    if( total == 0 ) /* no AC coefficients, nothing to do */
 +        return 0;
 +
 +    /* the function is chosen such that it stays close to 0 in almost all
 +     * range of 0..1, and rapidly goes up to 1 near 1.0 */
 +    qp_adj = h->rc->qpm * h->param.rc.f_aq2_strength / pow( 2 - expf( -5e-13 * total * total ), h->param.rc.f_aq2_sensitivity );
 +
 +    /* don't adjust by more than this amount */
 +    qp_adj = X264_MIN( qp_adj, h->rc->qpm / 2.f );
 +
 +    return -qp_adj;
 +#undef BLUE_THRESHOLD
 +#undef RED_THRESHOLD
 +}
 +
 int x264_ratecontrol_mb_qp( x264_t *h )
 {
     x264_emms();
     float qp = h->rc->qpm;
 +	
 +	float qp_offset = 0;
     if( h->param.rc.i_aq_mode )
     {
 -         /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
 -        float qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
 -        /* Scale AQ's effect towards zero in emergency mode. */
 -        if( qp > QP_MAX_SPEC )
 -            qp_offset *= (QP_MAX - qp) / (QP_MAX - QP_MAX_SPEC);
 -        qp += qp_offset;
 +        /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
 +        qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
 +
 +        qp_offset *= h->sh.i_type == SLICE_TYPE_I ? h->param.rc.f_aq_ifactor
 +                   : h->sh.i_type == SLICE_TYPE_P ? h->param.rc.f_aq_pfactor
 +                   : h->sh.i_type == SLICE_TYPE_B ? h->param.rc.f_aq_bfactor
 +                   : 1.f;
 +    }
 +	
 +   if( h->param.rc.b_aq2 )
 +    {
 +        float haq_offset = x264_haali_adaptive_quant( h );
 +        haq_offset *= h->sh.i_type == SLICE_TYPE_I ? h->param.rc.f_aq2_ifactor
 +                    : h->sh.i_type == SLICE_TYPE_P ? h->param.rc.f_aq2_pfactor
 +                    : h->sh.i_type == SLICE_TYPE_B ? h->param.rc.f_aq2_bfactor
 +                    : 1.f;
 +
 +        if( qp_offset >= 0.f )
 +            qp_offset += haq_offset;
 +        else
 +            qp_offset = X264_MIN( qp_offset, haq_offset );
 +    }
 +
 +    if( h->param.rc.i_aq3_mode )
 +    {
 +        /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
 +        float oaq_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset3[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq3[h->mb.i_mb_xy];
 +
 +        oaq_offset *= h->sh.i_type == SLICE_TYPE_I ? h->param.rc.f_aq3_ifactor[oaq_offset<0]
 +                    : h->sh.i_type == SLICE_TYPE_P ? h->param.rc.f_aq3_pfactor[oaq_offset<0]
 +                    : h->sh.i_type == SLICE_TYPE_B ? h->param.rc.f_aq3_bfactor[oaq_offset<0]
 +                    : 1.f;
 +
 +        qp_offset += oaq_offset;
     }
 +
 +    /* Scale AQ's effect towards zero in emergency mode. */
 +    if( qp > QP_MAX_SPEC )
 +        qp_offset *= (QP_MAX - qp) / (QP_MAX - QP_MAX_SPEC);
 +    qp += qp_offset;
 +	
     return x264_clip3( qp + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
 }
 
 diff --git a/x264.c b/x264.c
 index 916a426d..db441cc4 100644
 --- a/x264.c
 +++ b/x264.c
 @@ -957,6 +957,63 @@ static void help( x264_param_t *defaults, int longhelp )
         "                                  - 3: Auto-variance AQ with bias to dark scenes\n", defaults->rc.i_aq_mode );
     H1( "      --aq-strength <float>   Reduces blocking and blurring in flat and\n"
         "                              textured areas. [%.1f]\n", defaults->rc.f_aq_strength );
 +    H1( "      --aq-sensitivity <float> \"Center\" of AQ curve. [%.1f]\n"
 +        "                                  -  5: most QPs are raised\n"
 +        "                                  - 10: good general-use sensitivity\n"
 +        "                                  - 15: most QPs are lowered\n", defaults->rc.f_aq_sensitivity );
 +    H2( "      --aq-ifactor <float>    AQ strength factor of I-frames [%.2f]\n", defaults->rc.f_aq_ifactor );
 +    H2( "      --aq-pfactor <float>    AQ strength factor of P-frames [%.2f]\n", defaults->rc.f_aq_pfactor );
 +    H2( "      --aq-bfactor <float>    AQ strength factor of B-frames [%.2f]\n", defaults->rc.f_aq_bfactor );
 +    H1( "      --aq2-strength <float>   Use 2nd AQ (Haali's AQ) algorithm for support. [%.1f]\n"
 +        "                                  0.0: no 2nd AQ\n"
 +        "                                  1.1: strong 2nd AQ\n", defaults->rc.f_aq2_strength );
 +    H1( "      --aq2-sensitivity <float> \"Flatness\" threshold to trigger 2nd AQ [%.1f]\n"
 +        "                                    5: applies to almost all blocks\n"
 +        "                                   22: only flat blocks\n", defaults->rc.f_aq2_sensitivity );
 +    H2( "      --aq2-ifactor <float>   2nd AQ strength factor of I-frames [%.2f]\n", defaults->rc.f_aq2_ifactor );
 +    H2( "      --aq2-pfactor <float>   2nd AQ strength factor of P-frames [%.2f]\n", defaults->rc.f_aq2_pfactor );
 +    H2( "      --aq2-bfactor <float>   2nd AQ strength factor of B-frames [%.2f]\n", defaults->rc.f_aq2_bfactor );
 +    H2( "      --aq3-mode <integer>    3rd AQ (OreAQ) method [%d]\n"
 +        "                                  - 0: Disabled\n"
 +        "                                  - 1: OreAQ\n"
 +        "                                  - 2: MixOre (experimental)\n", defaults->rc.i_aq3_mode );
 +    H1( "      --aq3-strength <float>  Reduces blocking and blurring in bump and\n"
 +        "                              clear-cut areas. [%.1f]\n", defaults->rc.f_aq3_strength );
 +    H2( "                    <Up:Down> or <Up1:Down1:Up2:Down2:Up3:Down3:Up4:OtherStuff>\n"
 +        "                              Set QP up/down strength.\n" );
 +    H1( "      --aq3-sensitivity <float> \"Center\" of 3rd AQ curve. [%.1f]\n"
 +        "                                  -  5: most QPs are raised\n"
 +        "                                  - 10: good general-use sensitivity\n"
 +        "                                  - 15: most QPs are lowered\n", defaults->rc.f_aq3_sensitivity );
 +    H2( "      --aq3-ifactor <Up:Down> 3rd AQ strength factor of I-frames [%.1f:%.1f]\n", defaults->rc.f_aq3_ifactor[0], defaults->rc.f_aq3_ifactor[1] );
 +    H2( "      --aq3-pfactor <Up:Down> 3rd AQ strength factor of P-frames [%.1f:%.1f]\n", defaults->rc.f_aq3_pfactor[0], defaults->rc.f_aq3_pfactor[1] );
 +    H2( "      --aq3-bfactor <Up:Down> 3rd AQ strength factor of B-frames [%.1f:%.1f]\n", defaults->rc.f_aq3_bfactor[0], defaults->rc.f_aq3_bfactor[1] );
 +    H2( "      --aq3-boundary <int:int:int>  OreAQ boundary. \n"
 +#if HAVE_BITDEPTH8 && HAVE_BITDEPTH10
 +        "                                       fullrange=off [8][10]: [%d:%d:%d][%d:%d:%d]\n"
 +        "                                       fullrange=on [8][10]: [%d:%d:%d][%d:%d:%d]\n"
 +        "                                       #1: Bright-Middle\n"
 +        "                                       #2: Middle-Dark\n"
 +        "                                       #3: Dark-M.Dark\n",
 +        192, 64, 24,192<<2, 64<<2, 24<<2,
 +        205, 56,  9,205<<2, 56<<2,  9<<2 );
 +#elif HAVE_BITDEPTH8
 +        "                                       fullrange=off: [%d:%d:%d]\n"
 +        "                                       fullrange=on: [%d:%d:%d]\n"
 +        "                                       #1: Bright-Middle\n"
 +        "                                       #2: Middle-Dark\n"
 +        "                                       #3: Dark-M.Dark\n",
 +        192, 64, 24,
 +        205, 56,  9 );
 +#else
 +        "                                       fullrange=off: [%d:%d:%d]\n"
 +        "                                       fullrange=on: [%d:%d:%d]\n"
 +        "                                       #1: Bright-Middle\n"
 +        "                                       #2: Middle-Dark\n"
 +        "                                       #3: Dark-M.Dark\n",
 +        192<<2, 64<<2, 24<<2,
 +        205<<2, 56<<2,  9<<2 );
 +#endif
     H1( "      --fade-compensate <float> Allocate more bits to fades [%.1f]\n", defaults->rc.f_fade_compensate );
     H2( "                                  Approximate sane range: 0.0 - 1.0 (requires mb-tree)\n" );
     H1( "\n" );
 @@ -1399,7 +1456,23 @@ static struct option long_options[] =
     { "no-fast-pskip",     no_argument, NULL, 0 },
     { "no-dct-decimate",   no_argument, NULL, 0 },
     { "aq-strength", required_argument, NULL, 0 },
 +    { "aq-sensitivity", required_argument, NULL, 0 },
 +    { "aq-ifactor",  required_argument, NULL, 0 },
 +    { "aq-pfactor",  required_argument, NULL, 0 },
 +    { "aq-bfactor",  required_argument, NULL, 0 },
     { "aq-mode",     required_argument, NULL, 0 },
 +    { "aq2-strength", required_argument, NULL, 0 },
 +    { "aq2-sensitivity", required_argument, NULL, 0 },
 +    { "aq2-ifactor", required_argument, NULL, 0 },
 +    { "aq2-pfactor", required_argument, NULL, 0 },
 +    { "aq2-bfactor", required_argument, NULL, 0 },
 +    { "aq3-strength", required_argument, NULL, 0 },
 +    { "aq3-sensitivity", required_argument, NULL, 0 },
 +    { "aq3-ifactor", required_argument, NULL, 0 },
 +    { "aq3-pfactor", required_argument, NULL, 0 },
 +    { "aq3-bfactor", required_argument, NULL, 0 },
 +    { "aq3-mode",    required_argument, NULL, 0 },
 +    { "aq3-boundary", required_argument, NULL, 0 },
     { "fgo",         required_argument, NULL, 0 },
     { "fade-compensate", required_argument, NULL, 0 },
     { "deadzone-inter", required_argument, NULL, 0 },
 diff --git a/x264.h b/x264.h
 index 84ce9c8d..558a750b 100644
 --- a/x264.h
 +++ b/x264.h
 @@ -208,6 +208,8 @@ typedef struct x264_nal_t
 #define X264_AQ_VARIANCE             1
 #define X264_AQ_AUTOVARIANCE         2
 #define X264_AQ_AUTOVARIANCE_BIASED  3
 +#define X264_AQ_ORE                  1
 +#define X264_AQ_MIXORE               2
 #define X264_B_ADAPT_NONE            0
 #define X264_B_ADAPT_FAST            1
 #define X264_B_ADAPT_TRELLIS         2
 @@ -525,6 +527,25 @@ typedef struct x264_param_t
 
         int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
         float       f_aq_strength;
 +        float       f_aq_sensitivity;
 +        float       f_aq_ifactor;
 +        float       f_aq_pfactor;
 +        float       f_aq_bfactor;
 +        int         b_aq2;          /* psy 2nd adaptive QP */
 +        float       f_aq2_strength;
 +        float       f_aq2_sensitivity;
 +        float       f_aq2_ifactor;
 +        float       f_aq2_pfactor;
 +        float       f_aq2_bfactor;
 +        int         i_aq3_mode;      /* psy 3rd adaptive QP */
 +        float       f_aq3_strength;
 +        float       f_aq3_strengths[2][4];   /* Up{ Bright, Middle, Dark, M.Dark }, Down{ Bright, Middle, Dark, Other stuff } */
 +        float       f_aq3_sensitivity;
 +        float       f_aq3_ifactor[2]; /* { Up, Down } */
 +        float       f_aq3_pfactor[2]; /* { Up, Down } */
 +        float       f_aq3_bfactor[2]; /* { Up, Down } */
 +        int         b_aq3_boundary;
 +        int         i_aq3_boundary[3];
         float       f_fade_compensate; /* Give more bits to fades. */
         int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
         int         i_lookahead;