Diff of /rgb2rgb_template.c [65acde] .. [fbcd08] Maximize Restore

  Switch to unified view

a/rgb2rgb_template.c b/rgb2rgb_template.c
...
...
2699
        ::: "memory"
2699
        ::: "memory"
2700
        );
2700
        );
2701
#endif
2701
#endif
2702
}
2702
}
2703
2703
2704
static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
2705
{
2706
    dst +=   count;
2707
    src += 2*count;
2708
    count= - count;
2709
2710
#if HAVE_MMX
2711
    if(count <= -16){
2712
        count += 15;
2713
        __asm__ volatile(
2714
            "pcmpeqw       %%mm7, %%mm7        \n\t"
2715
            "psrlw            $8, %%mm7        \n\t"
2716
            "1:                                \n\t"
2717
            "movq -30(%1, %0, 2), %%mm0        \n\t"
2718
            "movq -22(%1, %0, 2), %%mm1        \n\t"
2719
            "movq -14(%1, %0, 2), %%mm2        \n\t"
2720
            "movq  -6(%1, %0, 2), %%mm3        \n\t"
2721
            "pand          %%mm7, %%mm0        \n\t"
2722
            "pand          %%mm7, %%mm1        \n\t"
2723
            "pand          %%mm7, %%mm2        \n\t"
2724
            "pand          %%mm7, %%mm3        \n\t"
2725
            "packuswb      %%mm1, %%mm0        \n\t"
2726
            "packuswb      %%mm3, %%mm2        \n\t"
2727
            MOVNTQ"        %%mm0,-15(%2, %0)   \n\t"
2728
            MOVNTQ"        %%mm2,- 7(%2, %0)   \n\t"
2729
            "add             $16, %0           \n\t"
2730
            " js 1b                            \n\t"
2731
            : "+r"(count)
2732
            : "r"(src), "r"(dst)
2733
        );
2734
        count -= 15;
2735
    }
2736
#endif
2737
    while(count<0){
2738
        dst[count]= src[2*count];
2739
        count++;
2740
    }
2741
}
2742
2743
static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
2744
{
2745
    dst0+=   count;
2746
    dst1+=   count;
2747
    src += 4*count;
2748
    count= - count;
2749
#if HAVE_MMX
2750
    if(count <= -8){
2751
        count += 7;
2752
        __asm__ volatile(
2753
            "pcmpeqw       %%mm7, %%mm7        \n\t"
2754
            "psrlw            $8, %%mm7        \n\t"
2755
            "1:                                \n\t"
2756
            "movq -28(%1, %0, 4), %%mm0        \n\t"
2757
            "movq -20(%1, %0, 4), %%mm1        \n\t"
2758
            "movq -12(%1, %0, 4), %%mm2        \n\t"
2759
            "movq  -4(%1, %0, 4), %%mm3        \n\t"
2760
            "pand          %%mm7, %%mm0        \n\t"
2761
            "pand          %%mm7, %%mm1        \n\t"
2762
            "pand          %%mm7, %%mm2        \n\t"
2763
            "pand          %%mm7, %%mm3        \n\t"
2764
            "packuswb      %%mm1, %%mm0        \n\t"
2765
            "packuswb      %%mm3, %%mm2        \n\t"
2766
            "movq          %%mm0, %%mm1        \n\t"
2767
            "movq          %%mm2, %%mm3        \n\t"
2768
            "psrlw            $8, %%mm0        \n\t"
2769
            "psrlw            $8, %%mm2        \n\t"
2770
            "pand          %%mm7, %%mm1        \n\t"
2771
            "pand          %%mm7, %%mm3        \n\t"
2772
            "packuswb      %%mm2, %%mm0        \n\t"
2773
            "packuswb      %%mm3, %%mm1        \n\t"
2774
            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
2775
            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
2776
            "add              $8, %0           \n\t"
2777
            " js 1b                            \n\t"
2778
            : "+r"(count)
2779
            : "r"(src), "r"(dst0), "r"(dst1)
2780
        );
2781
        count -= 7;
2782
    }
2783
#endif
2784
    while(count<0){
2785
        dst0[count]= src[4*count+0];
2786
        dst1[count]= src[4*count+2];
2787
        count++;
2788
    }
2789
}
2790
2791
static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
2792
{
2793
    dst0+=   count;
2794
    dst1+=   count;
2795
    src += 4*count;
2796
    count= - count;
2797
#if HAVE_MMX
2798
    if(count <= -8){
2799
        count += 7;
2800
        __asm__ volatile(
2801
            "pcmpeqw       %%mm7, %%mm7        \n\t"
2802
            "psrlw            $8, %%mm7        \n\t"
2803
            "1:                                \n\t"
2804
            "movq -28(%1, %0, 4), %%mm0        \n\t"
2805
            "movq -20(%1, %0, 4), %%mm1        \n\t"
2806
            "movq -12(%1, %0, 4), %%mm2        \n\t"
2807
            "movq  -4(%1, %0, 4), %%mm3        \n\t"
2808
            "psrlw            $8, %%mm0        \n\t"
2809
            "psrlw            $8, %%mm1        \n\t"
2810
            "psrlw            $8, %%mm2        \n\t"
2811
            "psrlw            $8, %%mm3        \n\t"
2812
            "packuswb      %%mm1, %%mm0        \n\t"
2813
            "packuswb      %%mm3, %%mm2        \n\t"
2814
            "movq          %%mm0, %%mm1        \n\t"
2815
            "movq          %%mm2, %%mm3        \n\t"
2816
            "psrlw            $8, %%mm0        \n\t"
2817
            "psrlw            $8, %%mm2        \n\t"
2818
            "pand          %%mm7, %%mm1        \n\t"
2819
            "pand          %%mm7, %%mm3        \n\t"
2820
            "packuswb      %%mm2, %%mm0        \n\t"
2821
            "packuswb      %%mm3, %%mm1        \n\t"
2822
            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
2823
            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
2824
            "add              $8, %0           \n\t"
2825
            " js 1b                            \n\t"
2826
            : "+r"(count)
2827
            : "r"(src), "r"(dst0), "r"(dst1)
2828
        );
2829
        count -= 7;
2830
    }
2831
#endif
2832
    while(count<0){
2833
        dst0[count]= src[4*count+0];
2834
        dst1[count]= src[4*count+2];
2835
        count++;
2836
    }
2837
}
2838
2839
static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
2840
                                      long width, long height,
2841
                                      long lumStride, long chromStride, long srcStride)
2842
{
2843
    long y;
2844
    const long chromWidth= -((-width)>>1);
2845
2846
    for (y=0; y<height; y++){
2847
        RENAME(extract_even)(src, ydst, width);
2848
        if(!(y&1)){
2849
            RENAME(extract_odd2)(src, udst, vdst, chromWidth);
2850
            udst+= chromStride;
2851
            vdst+= chromStride;
2852
        }
2853
2854
        src += srcStride;
2855
        ydst+= lumStride;
2856
    }
2857
#if HAVE_MMX
2858
    __asm__(
2859
        EMMS"       \n\t"
2860
        SFENCE"     \n\t"
2861
        ::: "memory"
2862
        );
2863
#endif
2864
}
2865
2866
static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
2867
                                      long width, long height,
2868
                                      long lumStride, long chromStride, long srcStride)
2869
{
2870
    long y;
2871
    const long chromWidth= -((-width)>>1);
2872
2873
    for (y=0; y<height; y++){
2874
        RENAME(extract_even)(src, ydst, width);
2875
        RENAME(extract_odd2)(src, udst, vdst, chromWidth);
2876
2877
        src += srcStride;
2878
        ydst+= lumStride;
2879
        udst+= chromStride;
2880
        vdst+= chromStride;
2881
    }
2882
#if HAVE_MMX
2883
    __asm__(
2884
        EMMS"       \n\t"
2885
        SFENCE"     \n\t"
2886
        ::: "memory"
2887
        );
2888
#endif
2889
}
2890
2891
static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
2892
                                      long width, long height,
2893
                                      long lumStride, long chromStride, long srcStride)
2894
{
2895
    long y;
2896
    const long chromWidth= -((-width)>>1);
2897
2898
    for (y=0; y<height; y++){
2899
        RENAME(extract_even)(src+1, ydst, width);
2900
        if(!(y&1)){
2901
            RENAME(extract_even2)(src, udst, vdst, chromWidth);
2902
            udst+= chromStride;
2903
            vdst+= chromStride;
2904
        }
2905
2906
        src += srcStride;
2907
        ydst+= lumStride;
2908
    }
2909
#if HAVE_MMX
2910
    __asm__(
2911
        EMMS"       \n\t"
2912
        SFENCE"     \n\t"
2913
        ::: "memory"
2914
        );
2915
#endif
2916
}
2917
2918
static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
2919
                                      long width, long height,
2920
                                      long lumStride, long chromStride, long srcStride)
2921
{
2922
    long y;
2923
    const long chromWidth= -((-width)>>1);
2924
2925
    for (y=0; y<height; y++){
2926
        RENAME(extract_even)(src+1, ydst, width);
2927
        RENAME(extract_even2)(src, udst, vdst, chromWidth);
2928
2929
        src += srcStride;
2930
        ydst+= lumStride;
2931
        udst+= chromStride;
2932
        vdst+= chromStride;
2933
    }
2934
#if HAVE_MMX
2935
    __asm__(
2936
        EMMS"       \n\t"
2937
        SFENCE"     \n\t"
2938
        ::: "memory"
2939
        );
2940
#endif
2941
}
2942
2704
static inline void RENAME(rgb2rgb_init)(void){
2943
static inline void RENAME(rgb2rgb_init)(void){
2705
    rgb15to16       = RENAME(rgb15to16);
2944
    rgb15to16       = RENAME(rgb15to16);
2706
    rgb15tobgr24    = RENAME(rgb15tobgr24);
2945
    rgb15tobgr24    = RENAME(rgb15tobgr24);
2707
    rgb15to32       = RENAME(rgb15to32);
2946
    rgb15to32       = RENAME(rgb15to32);
2708
    rgb16tobgr24    = RENAME(rgb16tobgr24);
2947
    rgb16tobgr24    = RENAME(rgb16tobgr24);
...
...
2723
    yv12toyuy2      = RENAME(yv12toyuy2);
2962
    yv12toyuy2      = RENAME(yv12toyuy2);
2724
    yv12touyvy      = RENAME(yv12touyvy);
2963
    yv12touyvy      = RENAME(yv12touyvy);
2725
    yuv422ptoyuy2   = RENAME(yuv422ptoyuy2);
2964
    yuv422ptoyuy2   = RENAME(yuv422ptoyuy2);
2726
    yuv422ptouyvy   = RENAME(yuv422ptouyvy);
2965
    yuv422ptouyvy   = RENAME(yuv422ptouyvy);
2727
    yuy2toyv12      = RENAME(yuy2toyv12);
2966
    yuy2toyv12      = RENAME(yuy2toyv12);
2728
//    uyvytoyv12      = RENAME(uyvytoyv12);
2729
//    yvu9toyv12      = RENAME(yvu9toyv12);
2967
//    yvu9toyv12      = RENAME(yvu9toyv12);
2730
    planar2x        = RENAME(planar2x);
2968
    planar2x        = RENAME(planar2x);
2731
    rgb24toyv12     = RENAME(rgb24toyv12);
2969
    rgb24toyv12     = RENAME(rgb24toyv12);
2732
    interleaveBytes = RENAME(interleaveBytes);
2970
    interleaveBytes = RENAME(interleaveBytes);
2733
    vu9_to_vu12     = RENAME(vu9_to_vu12);
2971
    vu9_to_vu12     = RENAME(vu9_to_vu12);
2734
    yvu9_to_yuy2    = RENAME(yvu9_to_yuy2);
2972
    yvu9_to_yuy2    = RENAME(yvu9_to_yuy2);
2973
2974
    uyvytoyuv420    = RENAME(uyvytoyuv420);
2975
    uyvytoyuv422    = RENAME(uyvytoyuv422);
2976
    yuyvtoyuv420    = RENAME(yuyvtoyuv420);
2977
    yuyvtoyuv422    = RENAME(yuyvtoyuv422);
2735
}
2978
}