From: <cl...@us...> - 2006-09-26 16:10:23
|
Revision: 262 http://svn.sourceforge.net/ffdshow-tryout/?rev=262&view=rev Author: clsid2 Date: 2006-09-26 09:10:14 -0700 (Tue, 26 Sep 2006) Log Message: ----------- revert to old version of SNOW, update broke snow encoding, updated version probably also needed some changes in VFW interface Modified Paths: -------------- src/ffmpeg/libavcodec/snow.c Modified: src/ffmpeg/libavcodec/snow.c =================================================================== --- src/ffmpeg/libavcodec/snow.c 2006-09-26 15:54:21 UTC (rev 261) +++ src/ffmpeg/libavcodec/snow.c 2006-09-26 16:10:14 UTC (rev 262) @@ -366,12 +366,9 @@ obmc32, obmc16, obmc8, obmc4 }; -static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; - typedef struct BlockNode{ int16_t mx; int16_t my; - uint8_t ref; uint8_t color[3]; uint8_t type; //#define TYPE_SPLIT 1 @@ -385,7 +382,6 @@ .color= {128,128,128}, .mx= 0, .my= 0, - .ref= 0, .type= 0, .level= 0, }; @@ -428,7 +424,7 @@ AVFrame new_picture; AVFrame input_picture; ///< new_picture with the internal linesizes AVFrame current_picture; - AVFrame last_picture[MAX_REF_FRAMES]; + AVFrame last_picture; AVFrame mconly_picture; // uint8_t q_context[16]; uint8_t header_state[32]; @@ -440,10 +436,6 @@ int temporal_decomposition_type; int spatial_decomposition_count; int temporal_decomposition_count; - int max_ref_frames; - int ref_frames; - int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; - uint32_t *ref_scores[MAX_REF_FRAMES]; DWTELEM *spatial_dwt_buffer; int colorspace_type; int chroma_h_shift; @@ -452,7 +444,6 @@ int qlog; int lambda; int lambda2; - int pass1_rc; int mv_scale; int qbias; #define QBIAS_SHIFT 3 @@ -465,7 +456,7 @@ int me_cache[ME_CACHE_SIZE]; int me_cache_generation; slice_buffer sb; - + MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) }SnowContext; @@ -485,19 +476,19 @@ static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) { int i; - + buf->base_buffer = base_buffer; buf->line_count = line_count; buf->line_width = line_width; buf->data_count = max_allocated_lines; buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count); buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines); - + for (i = 0; i < max_allocated_lines; i++) { buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width); } - + buf->data_stack_top = max_allocated_lines - 1; } @@ -505,21 +496,21 @@ { int offset; DWTELEM * buffer; - -// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); - + +// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); + assert(buf->data_stack_top >= 0); // assert(!buf->line[line]); if (buf->line[line]) return buf->line[line]; - + offset = buf->line_width * line; buffer = buf->data_stack[buf->data_stack_top]; buf->data_stack_top--; buf->line[line] = buffer; - + // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1); - + return buffer; } @@ -536,7 +527,7 @@ buf->data_stack_top++; buf->data_stack[buf->data_stack_top] = buffer; buf->line[line] = NULL; - + // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1); } @@ -557,7 +548,7 @@ { int i; slice_buffer_flush(buf); - + for (i = buf->data_count - 1; i >= 0; i--) { assert(buf->data_stack[i]); @@ -569,9 +560,9 @@ av_freep(&buf->line); } -#ifdef __sgi +#ifdef __sgi // Avoid a name clash on SGI IRIX -#undef qexp +#undef qexp #endif #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 static uint8_t qexp[QROOT]; @@ -591,7 +582,7 @@ const int a= ABS(v); const int e= av_log2(a); #if 1 - const int el= FFMIN(e, 10); + const int el= FFMIN(e, 10); put_rac(c, state+0, 0); for(i=0; i<el; i++){ @@ -612,7 +603,7 @@ if(is_signed) put_rac(c, state+11 + el, v < 0); //11..21 #else - + put_rac(c, state+0, 0); if(e<=9){ for(i=0; i<e; i++){ @@ -654,7 +645,7 @@ while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 e++; } - + a= 1; for(i=e-1; i>=0; i--){ a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 @@ -681,7 +672,7 @@ if(log2>0) r+=r; } put_rac(c, state+4+log2, 0); - + for(i=log2-1; i>=0; i--){ put_rac(c, state+31-i, (v>>i)&1); } @@ -699,7 +690,7 @@ log2++; if(log2>0) r+=r; } - + for(i=log2-1; i>=0; i--){ v+= get_rac(c, state+31-i)<<i; } @@ -719,11 +710,11 @@ dst += dst_step; src += src_step; } - + for(i=0; i<w; i++){ dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse); } - + if(mirror_right){ dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse); } @@ -744,14 +735,14 @@ dst += dst_step; src += src_step; } - + for(i=0; i<w; i++){ int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]); r += r>>4; r += r>>8; dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse); } - + if(mirror_right){ int r= 3*2*ref[w*ref_step]; r += r>>4; @@ -775,11 +766,11 @@ dst += dst_step; src += src_step; } - + for(i=0; i<w; i++){ dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse); } - + if(mirror_right){ dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); } @@ -789,7 +780,7 @@ static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){ int x, i; - + for(x=start; x<width; x+=2){ int64_t sum=0; @@ -809,7 +800,7 @@ for(y=start; y<height; y+=2){ for(x=0; x<width; x++){ int64_t sum=0; - + for(i=0; i<n; i++){ int y2= y + 2*i - n + 1; if (y2< 0) y2= -y2; @@ -868,7 +859,7 @@ #define N4 0 #define SHIFT4 0 #define COEFFS4 NULL -#elif 1 // 11/5 +#elif 1 // 11/5 #define N1 0 #define SHIFT1 1 #define COEFFS1 NULL @@ -946,7 +937,7 @@ inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0); inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0); inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0); - + for(x=0; x<width2; x++){ temp[x ]= b[2*x ]; temp[x+w2]= b[2*x + 1]; @@ -978,7 +969,7 @@ static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){ int x, y; - + for(y=0; y<height; y++){ for(x=0; x<width; x++){ buffer[y*stride + x] *= SCALEX; @@ -988,16 +979,16 @@ for(y=0; y<height; y++){ horizontal_decomposeX(buffer + y*stride, width); } - + inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0); inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0); inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0); - inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0); + inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0); } static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){ int x, y; - + inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1); inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1); inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1); @@ -1057,7 +1048,7 @@ b[width -1] = A3; b[width2-1] = A2; } -#else +#else lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); #endif @@ -1065,7 +1056,7 @@ static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ b1[i] -= (b0[i] + b2[i])>>1; } @@ -1073,7 +1064,7 @@ static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ b1[i] += (b0[i] + b2[i] + 2)>>2; } @@ -1083,7 +1074,7 @@ int y; DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; - + for(y=-2; y<height; y+=2){ DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; @@ -1092,12 +1083,12 @@ if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); STOP_TIMER("horizontal_decompose53i")} - + {START_TIMER if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); STOP_TIMER("vertical_decompose53i*")} - + b0=b2; b1=b3; } @@ -1116,7 +1107,7 @@ static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; } @@ -1124,7 +1115,7 @@ static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ #ifdef lift5 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; @@ -1139,7 +1130,7 @@ static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ #ifdef liftS b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; @@ -1151,7 +1142,7 @@ static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; } @@ -1163,7 +1154,7 @@ DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; - + for(y=-4; y<height; y+=2){ DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; @@ -1174,7 +1165,7 @@ if(width>400){ STOP_TIMER("horizontal_decompose97i") }} - + {START_TIMER if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); @@ -1184,7 +1175,7 @@ if(width>400){ STOP_TIMER("vertical_decompose97i") }} - + b0=b2; b1=b3; b2=b4; @@ -1194,12 +1185,12 @@ void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ int level; - + for(level=0; level<decomposition_count; level++){ switch(type){ - case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; - case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; - case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break; + case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; + case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; + case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break; } } } @@ -1239,7 +1230,7 @@ A2 += (A1 + A3 + 2)>>2; b[width -1] = A3; b[width2-1] = A2; -#else +#else lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); #endif @@ -1253,7 +1244,7 @@ static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ b1[i] += (b0[i] + b2[i])>>1; } @@ -1261,7 +1252,7 @@ static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ b1[i] -= (b0[i] + b2[i] + 2)>>2; } @@ -1278,10 +1269,10 @@ cs->b1 = buffer + mirror(-1 , height-1)*stride; cs->y = -1; } - + static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ int y= cs->y; - + DWTELEM *b0= cs->b0; DWTELEM *b1= cs->b1; DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); @@ -1306,8 +1297,8 @@ int y= cs->y; DWTELEM *b0= cs->b0; DWTELEM *b1= cs->b1; - DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; - DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; + DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; + DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; {START_TIMER if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); @@ -1322,16 +1313,16 @@ cs->b0 = b2; cs->b1 = b3; cs->y += 2; -} + } static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){ dwt_compose_t cs; spatial_compose53i_init(&cs, buffer, height, stride); while(cs.y <= height) spatial_compose53i_dy(&cs, buffer, width, height, stride); -} +} - + void ff_snow_horizontal_compose97i(DWTELEM *b, int width){ DWTELEM temp[width]; const int w2= (width+1)>>1; @@ -1344,7 +1335,7 @@ static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; } @@ -1352,7 +1343,7 @@ static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ #ifdef lift5 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; @@ -1367,7 +1358,7 @@ static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ #ifdef liftS b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; @@ -1379,7 +1370,7 @@ static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ int i; - + for(i=0; i<width; i++){ b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; } @@ -1387,7 +1378,7 @@ void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){ int i; - + for(i=0; i<width; i++){ #ifndef lift5 int r; @@ -1428,14 +1419,14 @@ static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ int y = cs->y; - + DWTELEM *b0= cs->b0; DWTELEM *b1= cs->b1; DWTELEM *b2= cs->b2; DWTELEM *b3= cs->b3; DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); - + {START_TIMER if(y>0 && y+4<height){ dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); @@ -1467,8 +1458,8 @@ DWTELEM *b1= cs->b1; DWTELEM *b2= cs->b2; DWTELEM *b3= cs->b3; - DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; - DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; + DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; + DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; {START_TIMER if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); @@ -1483,7 +1474,7 @@ if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); if(width>400 && b0 <= b2){ STOP_TIMER("horizontal_compose97i")}} - + cs->b0=b2; cs->b1=b3; cs->b2=b4; @@ -1502,10 +1493,10 @@ int level; for(level=decomposition_count-1; level>=0; level--){ switch(type){ - case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; - case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; + case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; + case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; /* not slicified yet */ - case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/ + case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/ av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break; } } @@ -1515,10 +1506,10 @@ int level; for(level=decomposition_count-1; level>=0; level--){ switch(type){ - case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; - case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; + case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; + case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; /* not slicified yet */ - case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; + case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; } } } @@ -1530,16 +1521,16 @@ for(level=decomposition_count-1; level>=0; level--){ while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ - switch(type){ - case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); + switch(type){ + case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); break; - case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); + case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); break; - case DWT_X: break; + case 2: break; } } } -} + } static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ const int support = type==1 ? 3 : 5; @@ -1549,11 +1540,11 @@ for(level=decomposition_count-1; level>=0; level--){ while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ switch(type){ - case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); + case 0: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); break; - case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); + case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); break; - case DWT_X: break; + case 2: break; } } } @@ -1583,7 +1574,7 @@ int runs[w*h]; int run_index=0; int max_index; - + for(y=0; y<h; y++){ for(x=0; x<w; x++){ int v, p=0; @@ -1609,7 +1600,7 @@ if(parent){ int px= x>>1; int py= y>>1; - if(px<b->parent->width && py<b->parent->height) + if(px<b->parent->width && py<b->parent->height) p= parent[px + py*2*stride]; } if(!(/*ll|*/l|lt|t|rt|p)){ @@ -1629,8 +1620,8 @@ put_symbol2(&s->c, b->state[30], max_index, 0); if(run_index <= max_index) - put_symbol2(&s->c, b->state[1], run, 3); - + put_symbol2(&s->c, b->state[1], run, 3); + for(y=0; y<h; y++){ if(s->c.bytestream_end - s->c.bytestream < w*40){ av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); @@ -1660,7 +1651,7 @@ if(parent){ int px= x>>1; int py= y>>1; - if(px<b->parent->width && py<b->parent->height) + if(px<b->parent->width && py<b->parent->height) p= parent[px + py*2*stride]; } if(/*ll|*/l|lt|t|rt|p){ @@ -1672,7 +1663,7 @@ run= runs[run_index++]; if(run_index <= max_index) - put_symbol2(&s->c, b->state[1], run, 3); + put_symbol2(&s->c, b->state[1], run, 3); assert(v); }else{ run--; @@ -1693,7 +1684,7 @@ return 0; } -static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ +static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ // encode_subband_qtree(s, b, src, parent, stride, orientation); // encode_subband_z0run(s, b, src, parent, stride, orientation); return encode_subband_c0run(s, b, src, parent, stride, orientation); @@ -1716,7 +1707,7 @@ runs= get_symbol2(&s->c, b->state[30], 0); if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); else run= INT_MAX; - + for(y=0; y<h; y++){ int v=0; int lt=0, t=0, rt=0; @@ -1727,7 +1718,7 @@ for(x=0; x<w; x++){ int p=0; const int l= v; - + lt= t; t= rt; if(y){ @@ -1737,15 +1728,15 @@ rt= prev_xc->coeff; else rt=0; - } + } if(parent_xc){ if(x>>1 > parent_xc->x){ parent_xc++; } if(x>>1 == parent_xc->x){ p= parent_xc->coeff; - } } + } if(/*ll|*/l|lt|t|rt|p){ int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); @@ -1753,7 +1744,7 @@ if(v){ v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); - + xc->x=x; (xc++)->coeff= v; } @@ -1763,7 +1754,7 @@ else run= INT_MAX; v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); - + xc->x=x; (xc++)->coeff= v; }else{ @@ -1775,15 +1766,15 @@ else max_run= FFMIN(run, w-x-1); if(parent_xc) max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); - x+= max_run; - run-= max_run; + x+= max_run; + run-= max_run; + } } } - } (xc++)->x= w+1; //end marker prev_xc= prev2_xc; prev2_xc= xc; - + if(parent_xc){ if(y&1){ while(parent_xc->x != parent->width+1) @@ -1807,7 +1798,7 @@ int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; int new_index = 0; - + START_TIMER if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){ @@ -1819,7 +1810,7 @@ if (start_y != 0) new_index = save_state[0]; - + for(y=start_y; y<h; y++){ int x = 0; int v; @@ -1838,15 +1829,15 @@ } } if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){ - STOP_TIMER("decode_subband") - } - + STOP_TIMER("decode_subband") + } + /* Save our variables for the next slice. */ save_state[0] = new_index; + + return; + } - return; -} - static void reset_contexts(SnowContext *s){ int plane_index, level, orientation; @@ -1864,10 +1855,10 @@ static int alloc_blocks(SnowContext *s){ int w= -((-s->avctx->width )>>LOG2_MB_SIZE); int h= -((-s->avctx->height)>>LOG2_MB_SIZE); - + s->b_width = w; s->b_height= h; - + s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); return 0; } @@ -1913,20 +1904,19 @@ return s; } -static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ +static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){ const int w= s->b_width << s->block_max_depth; const int rem_depth= s->block_max_depth - level; const int index= (x + y*w) << rem_depth; const int block_w= 1<<rem_depth; BlockNode block; int i,j; - + block.color[0]= l; block.color[1]= cb; block.color[2]= cr; block.mx= mx; block.my= my; - block.ref= ref; block.type= type; block.level= level; @@ -1951,22 +1941,6 @@ assert(!ref_index); } -static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, - BlockNode *left, BlockNode *top, BlockNode *tr){ - if(s->ref_frames == 1){ - *mx = mid_pred(left->mx, top->mx, tr->mx); - *my = mid_pred(left->my, top->my, tr->my); - }else{ - const int *scale = scale_mv_ref[ref]; - *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8, - top ->mx * scale[top ->ref] + 128 >>8, - tr ->mx * scale[tr ->ref] + 128 >>8); - *my = mid_pred(left->my * scale[left->ref] + 128 >>8, - top ->my * scale[top ->ref] + 128 >>8, - tr ->my * scale[tr ->ref] + 128 >>8); - } -} - //FIXME copy&paste #define P_LEFT P[1] #define P_TOP P[2] @@ -2000,7 +1974,8 @@ int pl = left->color[0]; int pcb= left->color[1]; int pcr= left->color[2]; - int pmx, pmy; + int pmx= mid_pred(left->mx, top->mx, tr->mx); + int pmy= mid_pred(left->my, top->my, tr->my); int mx=0, my=0; int l,cr,cb; const int stride= s->current_picture.linesize[0]; @@ -2013,15 +1988,13 @@ int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused const int shift= 1+qpel; MotionEstContext *c= &s->m.me; - int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); int mx_context= av_log2(2*ABS(left->mx - top->mx)); int my_context= av_log2(2*ABS(left->my - top->my)); int s_context= 2*left->level + 2*top->level + tl->level + tr->level; - int ref, best_ref, ref_score, ref_mx, ref_my; assert(sizeof(s->block_state) >= 256); if(s->keyframe){ - set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); + set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); return 0; } @@ -2033,34 +2006,36 @@ P_TOP [1]= top->my; P_TOPRIGHT[0]= tr->mx; P_TOPRIGHT[1]= tr->my; - + last_mv[0][0]= s->block[index].mx; last_mv[0][1]= s->block[index].my; last_mv[1][0]= right->mx; last_mv[1][1]= right->my; last_mv[2][0]= bottom->mx; last_mv[2][1]= bottom->my; - + s->m.mb_stride=2; - s->m.mb_x= + s->m.mb_x= s->m.mb_y= 0; s->m.me.skip= 0; + init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0); + assert(s->m.me. stride == stride); assert(s->m.me.uvstride == uvstride); - + c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; - + c->xmin = - x*block_w - 16+2; c->ymin = - y*block_w - 16+2; c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); - if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); + if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); @@ -2078,36 +2053,18 @@ c->pred_y = P_MEDIAN[1]; } - score= INT_MAX; - best_ref= 0; - for(ref=0; ref<s->ref_frames; ref++){ - init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); + score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, + (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); - ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, - (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); - - assert(ref_mx >= c->xmin); - assert(ref_mx <= c->xmax); - assert(ref_my >= c->ymin); - assert(ref_my <= c->ymax); - - ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); - ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); - ref_score+= 2*av_log2(2*ref)*c->penalty_factor; - if(s->ref_mvs[ref]){ - s->ref_mvs[ref][index][0]= ref_mx; - s->ref_mvs[ref][index][1]= ref_my; - s->ref_scores[ref][index]= ref_score; - } - if(score > ref_score){ - score= ref_score; - best_ref= ref; - mx= ref_mx; - my= ref_my; - } - } + assert(mx >= c->xmin); + assert(mx <= c->xmax); + assert(my >= c->ymin); + assert(my <= c->ymax); + + score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w); + score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2 - + // subpel search pc= s->c; pc.bytestream_start= @@ -2117,11 +2074,8 @@ if(level!=s->block_max_depth) put_rac(&pc, &p_state[4 + s_context], 1); put_rac(&pc, &p_state[1 + left->type + top->type], 0); - if(s->ref_frames > 1) - put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); - pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); - put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); - put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); + put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1); + put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1); p_len= pc.bytestream - pc.bytestream_start; score += (s->lambda2*(p_len*8 + (pc.outstanding_count - s->c.outstanding_count)*8 @@ -2132,7 +2086,7 @@ sum = pix_sum(current_data[0], stride, block_w); l= (sum + block_s/2)/block_s; iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; - + block_s= block_w*block_w>>2; sum = pix_sum(current_data[1], uvstride, block_w>>1); cb= (sum + block_s/2)/block_s; @@ -2171,7 +2125,7 @@ else c->scene_change_score+= s->m.qscale; } - + if(level!=s->block_max_depth){ put_rac(&s->c, &s->block_state[4 + s_context], 0); score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); @@ -2179,18 +2133,17 @@ score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead - + if(score2 < score && score2 < iscore) return score2; } - + if(iscore < score){ - pred_mv(s, &pmx, &pmy, 0, left, top, tr); memcpy(pbbak, i_buffer, i_len); s->c= ic; s->c.bytestream_start= pbbak_start; s->c.bytestream= pbbak + i_len; - set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); + set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA); memcpy(s->block_state, i_state, sizeof(s->block_state)); return iscore; }else{ @@ -2198,7 +2151,7 @@ s->c= pc; s->c.bytestream_start= pbbak_start; s->c.bytestream= pbbak + p_len; - set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); + set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0); memcpy(s->block_state, p_state, sizeof(s->block_state)); return score; } @@ -2208,7 +2161,7 @@ if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); }else{ - return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); + return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA)); } } @@ -2225,14 +2178,14 @@ int pl = left->color[0]; int pcb= left->color[1]; int pcr= left->color[2]; - int pmx, pmy; - int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); - int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref; - int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref; + int pmx= mid_pred(left->mx, top->mx, tr->mx); + int pmy= mid_pred(left->my, top->my, tr->my); + int mx_context= av_log2(2*ABS(left->mx - top->mx)); + int my_context= av_log2(2*ABS(left->my - top->my)); int s_context= 2*left->level + 2*top->level + tl->level + tr->level; if(s->keyframe){ - set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); + set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); return; } @@ -2249,20 +2202,16 @@ } } if(b->type & BLOCK_INTRA){ - pred_mv(s, &pmx, &pmy, 0, left, top, tr); put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); - set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); + set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA); }else{ - pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); - if(s->ref_frames > 1) - put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); - set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); + set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0); } } @@ -2276,9 +2225,9 @@ BlockNode *tl = y && x ? &s->block[index-w-1] : left; BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt int s_context= 2*left->level + 2*top->level + tl->level + tr->level; - + if(s->keyframe){ - set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); + set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA); return; } @@ -2289,26 +2238,20 @@ int cr= left->color[2]; int mx= mid_pred(left->mx, top->mx, tr->mx); int my= mid_pred(left->my, top->my, tr->my); - int ref = 0; - int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx)); int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my)); - + type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; if(type){ - pred_mv(s, &mx, &my, 0, left, top, tr); l += get_symbol(&s->c, &s->block_state[32], 1); cb+= get_symbol(&s->c, &s->block_state[64], 1); cr+= get_symbol(&s->c, &s->block_state[96], 1); }else{ - if(s->ref_frames > 1) - ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); - pred_mv(s, &mx, &my, ref, left, top, tr); - mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); - my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); + mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1); + my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1); } - set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); + set_blocks(s, level, x, y, l, cb, cr, mx, my, type); }else{ decode_q_branch(s, level+1, 2*x+0, 2*y+0); decode_q_branch(s, level+1, 2*x+1, 2*y+0); @@ -2317,12 +2260,12 @@ } } -static void encode_blocks(SnowContext *s, int search){ +static void encode_blocks(SnowContext *s){ int x, y; int w= s->b_width; int h= s->b_height; - if(s->avctx->me_method == ME_ITER && !s->keyframe && search) + if(s->avctx->me_method == ME_ITER && !s->keyframe) iterative_me(s); for(y=0; y<h; y++){ @@ -2331,10 +2274,10 @@ return; } for(x=0; x<w; x++){ - if(s->avctx->me_method == ME_ITER || !search) + if(s->avctx->me_method == ME_ITER) encode_q_branch2(s, 0, x, y); else - encode_q_branch (s, 0, x, y); + encode_q_branch(s, 0, x, y); } } } @@ -2372,10 +2315,10 @@ if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8; else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8; - + /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/ if(am&(~255)) am= ~(am>>31); - + tmp[x] = am; /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6; @@ -2387,7 +2330,7 @@ src += stride; } tmp -= (b_h+5)*stride; - + for(y=0; y < b_h; y++){ for(x=0; x < b_w; x++){ int a0= tmp[x + 0*stride]; @@ -2400,14 +2343,14 @@ // int am= 18*(a2+a3) - 2*(a1+a4); /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/ - + // if(b_w==16) am= 8*(a1+a2); if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8; else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8; if(am&(~255)) am= ~(am>>31); - + dst[x] = am; /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6; else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6; @@ -2436,10 +2379,10 @@ mca( 0, 8,8) mca( 8, 8,8) -static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ +static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ if(block->type & BLOCK_INTRA){ int x, y; - const int color = block->color[plane_index]; + const int color= block->color[plane_index]; const int color4= color*0x01010101; if(b_w==32){ for(y=0; y < b_h; y++){ @@ -2469,14 +2412,13 @@ *(uint32_t*)&dst[0 + y*stride]= color4; } }else{ - for(y=0; y < b_h; y++){ - for(x=0; x < b_w; x++){ - dst[x + y*stride]= color; - } + for(y=0; y < b_h; y++){ + for(x=0; x < b_w; x++){ + dst[x + y*stride]= color; } } + } }else{ - uint8_t *src= s->last_picture[block->ref].data[plane_index]; const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; int mx= block->mx*scale; int my= block->my*scale; @@ -2512,9 +2454,9 @@ assert(2*b_w==b_h); s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride); s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride); - } } } +} void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ @@ -2551,7 +2493,8 @@ } //FIXME name clenup (b_w, block_w, b_width stuff) -static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ +static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ + DWTELEM * dst = NULL; const int b_width = s->b_width << s->block_max_depth; const int b_height= s->b_height << s->block_max_depth; const int b_stride= b_width; @@ -2559,7 +2502,7 @@ BlockNode *rt= lt+1; BlockNode *lb= lt+b_stride; BlockNode *rb= lb+1; - uint8_t *block[4]; + uint8_t *block[4]; int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align uint8_t *ptmp; @@ -2579,12 +2522,10 @@ lb= lt; rb= rt; } - + if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 obmc -= src_x; b_w += src_x; - if(!sliced && !offset_dst) - dst -= src_x; src_x=0; }else if(src_x + b_w > w){ b_w = w - src_x; @@ -2592,34 +2533,31 @@ if(src_y<0){ obmc -= src_y*obmc_stride; b_h += src_y; - if(!sliced && !offset_dst) - dst -= src_y*dst_stride; src_y=0; }else if(src_y + b_h> h){ b_h = h - src_y; } - + if(b_w<=0 || b_h<=0) return; assert(src_stride > 2*MB_SIZE + 5); - if(!sliced && offset_dst) - dst += src_x + src_y*dst_stride; +// old_dst += src_x + src_y*dst_stride; dst8+= src_x + src_y*src_stride; // src += src_x + src_y*src_stride; ptmp= tmp + 3*tmp_step; block[0]= ptmp; ptmp+=tmp_step; - pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); + pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); if(same_block(lt, rt)){ block[1]= block[0]; }else{ block[1]= ptmp; ptmp+=tmp_step; - pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); + pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); } - + if(same_block(lt, lb)){ block[2]= block[0]; }else if(same_block(rt, lb)){ @@ -2627,7 +2565,7 @@ }else{ block[2]= ptmp; ptmp+=tmp_step; - pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); + pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); } if(same_block(lt, rb) ){ @@ -2638,7 +2576,7 @@ block[3]= block[2]; }else{ block[3]= ptmp; - pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); + pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); } #if 0 for(y=0; y<b_h; y++){ @@ -2674,13 +2612,141 @@ } } #else - if(sliced){ - START_TIMER +{ - s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); - STOP_TIMER("inner_add_yblock") - }else + START_TIMER + + s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); + STOP_TIMER("Inner add y block") +} +#endif +} + +//FIXME name clenup (b_w, block_w, b_width stuff) +static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ + const int b_width = s->b_width << s->block_max_depth; + const int b_height= s->b_height << s->block_max_depth; + const int b_stride= b_width; + BlockNode *lt= &s->block[b_x + b_y*b_stride]; + BlockNode *rt= lt+1; + BlockNode *lb= lt+b_stride; + BlockNode *rb= lb+1; + uint8_t *block[4]; + int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; + uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align + uint8_t *ptmp; + int x,y; + + if(b_x<0){ + lt= rt; + lb= rb; + }else if(b_x + 1 >= b_width){ + rt= lt; + rb= lb; + } + if(b_y<0){ + lt= lb; + rt= rb; + }else if(b_y + 1 >= b_height){ + lb= lt; + rb= rt; + } + + if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 + obmc -= src_x; + b_w += src_x; + if(!offset_dst) + dst -= src_x; + src_x=0; + }else if(src_x + b_w > w){ + b_w = w - src_x; + } + if(src_y<0){ + obmc -= src_y*obmc_stride; + b_h += src_y; + if(!offset_dst) + dst -= src_y*dst_stride; + src_y=0; + }else if(src_y + b_h> h){ + b_h = h - src_y; + } + + if(b_w<=0 || b_h<=0) return; + +assert(src_stride > 2*MB_SIZE + 5); + if(offset_dst) + dst += src_x + src_y*dst_stride; + dst8+= src_x + src_y*src_stride; +// src += src_x + src_y*src_stride; + + ptmp= tmp + 3*tmp_step; + block[0]= ptmp; + ptmp+=tmp_step; + pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); + + if(same_block(lt, rt)){ + block[1]= block[0]; + }else{ + block[1]= ptmp; + ptmp+=tmp_step; + pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); + } + + if(same_block(lt, lb)){ + block[2]= block[0]; + }else if(same_block(rt, lb)){ + block[2]= block[1]; + }else{ + block[2]= ptmp; + ptmp+=tmp_step; + pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); + } + + if(same_block(lt, rb) ){ + block[3]= block[0]; + }else if(same_block(rt, rb)){ + block[3]= block[1]; + }else if(same_block(lb, rb)){ + block[3]= block[2]; + }else{ + block[3]= ptmp; + pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); + } +#if 0 for(y=0; y<b_h; y++){ + for(x=0; x<b_w; x++){ + int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } + for(y=0; y < b_h; y++){ + uint8_t *obmc2= obmc + (obmc_stride>>1); + for(x=0; x < b_w; x++){ + int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } + for(y=0; y<b_h; y++){ + uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); + for(x=0; x<b_w; x++){ + int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } + for(y=0; y < b_h; y++){ + uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + for(x=0; x < b_w; x++){ + int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } +#else + for(y=0; y<b_h; y++){ //FIXME ugly missue of obmc_stride uint8_t *obmc1= obmc + y*obmc_stride; uint8_t *obmc2= obmc1+ (obmc_stride>>1); @@ -2691,7 +2757,7 @@ +obmc2[x] * block[2][x + y*src_stride] +obmc3[x] * block[1][x + y*src_stride] +obmc4[x] * block[0][x + y*src_stride]; - + v <<= 8 - LOG2_OBMC_MAX; if(FRAC_BITS != 8){ v += 1<<(7 - FRAC_BITS); @@ -2720,11 +2786,12 @@ const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; int obmc_stride= plane_index ? block_size : 2*block_size; int ref_stride= s->current_picture.linesize[plane_index]; + uint8_t *ref = s->last_picture.data[plane_index]; uint8_t *dst8= s->current_picture.data[plane_index]; int w= p->width; int h= p->height; START_TIMER - + if(s->keyframe || (s->avctx->debug&512)){ if(mb_y==mb_h) return; @@ -2758,22 +2825,22 @@ return; } - + for(mb_x=0; mb_x<=mb_w; mb_x++){ START_TIMER - add_yblock(s, 1, sb, old_buffer, dst8, obmc, + add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, block_w*mb_x - block_w/2, block_w*mb_y - block_w/2, block_w, block_w, w, h, w, ref_stride, obmc_stride, mb_x - 1, mb_y - 1, - add, 0, plane_index); - + add, plane_index); + STOP_TIMER("add_yblock") } - + STOP_TIMER("predict_slice") } @@ -2787,11 +2854,12 @@ const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; const int obmc_stride= plane_index ? block_size : 2*block_size; int ref_stride= s->current_picture.linesize[plane_index]; + uint8_t *ref = s->last_picture.data[plane_index]; uint8_t *dst8= s->current_picture.data[plane_index]; int w= p->width; int h= p->height; START_TIMER - + if(s->keyframe || (s->avctx->debug&512)){ if(mb_y==mb_h) return; @@ -2807,33 +2875,33 @@ } }else{ for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ - for(x=0; x<w; x++){ + for(x=0; x<w; x++){ buf[x + y*w]-= 128<<FRAC_BITS; } - } } - - return; } + + return; +} for(mb_x=0; mb_x<=mb_w; mb_x++){ START_TIMER - add_yblock(s, 0, NULL, buf, dst8, obmc, - block_w*mb_x - block_w/2, + add_yblock(s, buf, dst8, ref, obmc, + block_w*mb_x - block_w/2, block_w*mb_y - block_w/2, block_w, block_w, w, h, - w, ref_stride, obmc_stride, + w, ref_stride, obmc_stride, mb_x - 1, mb_y - 1, add, 1, plane_index); STOP_TIMER("add_yblock") } - + STOP_TIMER("predict_slice") -} - + } + static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ const int mb_h= s->b_height << s->block_max_depth; int mb_y; @@ -2849,6 +2917,7 @@ const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; const int obmc_stride= plane_index ? block_size : 2*block_size; const int ref_stride= s->current_picture.linesize[plane_index]; + uint8_t *ref= s-> last_picture.data[plane_index]; uint8_t *src= s-> input_picture.data[plane_index]; DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; const int b_stride = s->b_width << s->block_max_depth; @@ -2870,7 +2939,7 @@ int x= block_w*mb_x2 + block_w/2; int y= block_w*mb_y2 + block_w/2; - add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, + add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc, x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ @@ -2911,6 +2980,8 @@ if(x<0 || x>=b_stride || y>=b_height) return 0; + dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx); + dmy= b->my - mid_pred(left->my, top->my, tr->my); /* 1 0 0 01X 1-2 1 @@ -2924,14 +2995,9 @@ return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0])) + av_log2(2*ABS(left->color[1] - b->color[1])) + av_log2(2*ABS(left->color[2] - b->color[2]))); - }else{ - pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); - dmx-= b->mx; - dmy-= b->my; - return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda - + av_log2(2*ABS(dmy)) - + av_log2(2*b->ref)); - } + }else + return 2*(1 + av_log2(2*ABS(dmx)) + + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda } static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ @@ -2941,6 +3007,7 @@ const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; const int obmc_stride= plane_index ? block_size : 2*block_size; const int ref_stride= s->current_picture.linesize[plane_index]; + uint8_t *ref= s-> last_picture.data[plane_index]; uint8_t *dst= s->current_picture.data[plane_index]; uint8_t *src= s-> input_picture.data[plane_index]; DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; @@ -2961,7 +3028,7 @@ int y1= FFMIN(block_w*2, h-sy); int i,x,y; - pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); + pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); for(y=y0; y<y1; y++){ const uint8_t *obmc1= obmc_edged + y*obmc_stride; @@ -2992,23 +3059,12 @@ memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); } + //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block if(block_w==16){ - /* FIXME rearrange dsputil to fit 32x32 cmp functions */ - /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ - /* FIXME cmps overlap but don't cover the wavelet's whole support, - * so improving the score of one block is not strictly guaranteed to - * improve the score of the whole frame, so iterative motion est - * doesn't always converge. */ - if(s->avctx->me_cmp == FF_CMP_W97) - distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); - else if(s->avctx->me_cmp == FF_CMP_W53) - distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); - else{ - distortion = 0; - for(i=0; i<4; i++){ - int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; - distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); - } + distortion = 0; + for(i=0; i<4; i++){ + int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; + distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); } }else{ assert(block_w==8); @@ -3037,6 +3093,7 @@ const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; const int obmc_stride= plane_index ? block_size : 2*block_size; const int ref_stride= s->current_picture.linesize[plane_index]; + uint8_t *ref= s-> last_picture.data[plane_index]; uint8_t *dst= s->current_picture.data[plane_index]; uint8_t *src= s-> input_picture.data[plane_index]; static const DWTELEM zero_dst[4096]; //FIXME @@ -3054,7 +3111,7 @@ int x= block_w*mb_x2 + block_w/2; int y= block_w*mb_y2 + block_w/2; - add_yblock(s, 0, NULL, zero_dst, dst, obmc, + add_yblock(s, zero_dst, dst, ref, obmc, x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); //FIXME find a cleaner/simpler way to skip the outside stuff @@ -3110,7 +3167,7 @@ block->type |= BLOCK_INTRA; }else{ index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); - value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); + value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6); if(s->me_cache[index] == value) return 0; s->me_cache[index]= value; @@ -3138,7 +3195,7 @@ return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); } -static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ +static always_inline int check_4block_inter(SnowContext *s, int mb_x, ... [truncated message content] |