Thanks to visit codestin.com
Credit goes to www.ffmpeg.org

FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <[email protected]>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
44 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
45 
46 DECLARE_ASM_CONST(8, uint64_t, M24A) = 0x00FF0000FF0000FFLL;
47 DECLARE_ASM_CONST(8, uint64_t, M24B) = 0xFF0000FF0000FF00LL;
48 DECLARE_ASM_CONST(8, uint64_t, M24C) = 0x0000FF0000FF0000LL;
49 
50 // MMXEXT versions
51 #if HAVE_MMXEXT_INLINE
52 #undef RENAME
53 #undef COMPILE_TEMPLATE_MMXEXT
54 #define COMPILE_TEMPLATE_MMXEXT 1
55 #define RENAME(a) a ## _mmxext
56 #include "swscale_template.c"
57 #endif
58 #endif /* HAVE_INLINE_ASM */
59 
61 {
62  const int dstH= c->opts.dst_h;
63  const int flags= c->opts.flags;
64 
65  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
66  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
67  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
68 
69  int hasAlpha = c->needAlpha;
70  int32_t *vLumFilterPos= c->vLumFilterPos;
71  int32_t *vChrFilterPos= c->vChrFilterPos;
72  int16_t *vLumFilter= c->vLumFilter;
73  int16_t *vChrFilter= c->vChrFilter;
74  int32_t *lumMmxFilter= c->lumMmxFilter;
75  int32_t *chrMmxFilter= c->chrMmxFilter;
76  av_unused int32_t *alpMmxFilter= c->alpMmxFilter;
77  const int vLumFilterSize= c->vLumFilterSize;
78  const int vChrFilterSize= c->vChrFilterSize;
79  const int chrDstY= dstY>>c->chrDstVSubSample;
80  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
81  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
82 
83  c->blueDither= ff_dither8[dstY&1];
84  if (c->opts.dst_format == AV_PIX_FMT_RGB555 || c->opts.dst_format == AV_PIX_FMT_BGR555)
85  c->greenDither= ff_dither8[dstY&1];
86  else
87  c->greenDither= ff_dither4[dstY&1];
88  c->redDither= ff_dither8[(dstY+1)&1];
89  if (dstY < dstH - 2) {
90  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
91  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
92  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
93 
94  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->opts.src_h) {
95  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
96 
97  int neg = -firstLumSrcY, i, end = FFMIN(c->opts.src_h - firstLumSrcY, vLumFilterSize);
98  for (i = 0; i < neg; i++)
99  tmpY[i] = lumSrcPtr[neg];
100  for ( ; i < end; i++)
101  tmpY[i] = lumSrcPtr[i];
102  for ( ; i < vLumFilterSize; i++)
103  tmpY[i] = tmpY[i-1];
104  lumSrcPtr = tmpY;
105 
106  if (alpSrcPtr) {
107  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
108  for (i = 0; i < neg; i++)
109  tmpA[i] = alpSrcPtr[neg];
110  for ( ; i < end; i++)
111  tmpA[i] = alpSrcPtr[i];
112  for ( ; i < vLumFilterSize; i++)
113  tmpA[i] = tmpA[i - 1];
114  alpSrcPtr = tmpA;
115  }
116  }
117  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
118  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
119  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
120  for (i = 0; i < neg; i++) {
121  tmpU[i] = chrUSrcPtr[neg];
122  }
123  for ( ; i < end; i++) {
124  tmpU[i] = chrUSrcPtr[i];
125  }
126  for ( ; i < vChrFilterSize; i++) {
127  tmpU[i] = tmpU[i - 1];
128  }
129  chrUSrcPtr = tmpU;
130  }
131 
132  if (flags & SWS_ACCURATE_RND) {
133  int s= APCK_SIZE / 8;
134  for (int i = 0; i < vLumFilterSize; i += 2) {
135  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
136  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
137  lumMmxFilter[s*i+APCK_COEF/4 ]=
138  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
139  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
140  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
141  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
142  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
143  alpMmxFilter[s*i+APCK_COEF/4 ]=
144  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
145  }
146  }
147  for (int i = 0; i < vChrFilterSize; i += 2) {
148  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
149  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
150  chrMmxFilter[s*i+APCK_COEF/4 ]=
151  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
152  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
153  }
154  } else {
155  for (int i = 0; i < vLumFilterSize; i++) {
156  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
157  lumMmxFilter[4*i+2]=
158  lumMmxFilter[4*i+3]=
159  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
160  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
161  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
162  alpMmxFilter[4*i+2]=
163  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
164  }
165  }
166  for (int i = 0; i < vChrFilterSize; i++) {
167  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
168  chrMmxFilter[4*i+2]=
169  chrMmxFilter[4*i+3]=
170  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
171  }
172  }
173  }
174 }
175 
176 #define YUV2YUVX_FUNC(opt, step) \
177 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
178  uint8_t *dest, int dstW, \
179  const uint8_t *dither, int offset); \
180 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
181  const int16_t **src, uint8_t *dest, int dstW, \
182  const uint8_t *dither, int offset) \
183 { \
184  int remainder = (dstW % step); \
185  int pixelsProcessed = dstW - remainder; \
186  if(((uintptr_t)dest) & 15){ \
187  yuv2yuvX_sse2(filter, filterSize, src, dest, dstW, dither, offset); \
188  return; \
189  } \
190  if(pixelsProcessed > 0) \
191  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
192  if(remainder > 0){ \
193  ff_yuv2yuvX_sse2(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
194  } \
195  return; \
196 }
197 
198 #if HAVE_SSE2_EXTERNAL
199 void ff_yuv2yuvX_sse2(const int16_t *filter, int filterSize, int srcOffset,
200  uint8_t *dest, int dstW,
201  const uint8_t *dither, int offset);
202 static void yuv2yuvX_sse2(const int16_t *filter, int filterSize,
203  const int16_t **src, uint8_t *dest, int dstW,
204  const uint8_t *dither, int offset)
205 {
206  if (dstW > 0)
207  ff_yuv2yuvX_sse2(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset);
208  return;
209 }
210 #if HAVE_SSE3_EXTERNAL
211 YUV2YUVX_FUNC(sse3, 32)
212 #endif
213 #if HAVE_AVX2_EXTERNAL
214 YUV2YUVX_FUNC(avx2, 64)
215 #endif
216 #endif
217 
218 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
219 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
220  SwsInternal *c, int16_t *data, \
221  int dstW, const uint8_t *src, \
222  const int16_t *filter, \
223  const int32_t *filterPos, int filterSize)
224 
225 #define SCALE_FUNCS(filter_n, opt) \
226  SCALE_FUNC(filter_n, 8, 15, opt); \
227  SCALE_FUNC(filter_n, 9, 15, opt); \
228  SCALE_FUNC(filter_n, 10, 15, opt); \
229  SCALE_FUNC(filter_n, 12, 15, opt); \
230  SCALE_FUNC(filter_n, 14, 15, opt); \
231  SCALE_FUNC(filter_n, 16, 15, opt); \
232  SCALE_FUNC(filter_n, 8, 19, opt); \
233  SCALE_FUNC(filter_n, 9, 19, opt); \
234  SCALE_FUNC(filter_n, 10, 19, opt); \
235  SCALE_FUNC(filter_n, 12, 19, opt); \
236  SCALE_FUNC(filter_n, 14, 19, opt); \
237  SCALE_FUNC(filter_n, 16, 19, opt)
238 
239 #define SCALE_FUNCS_MMX(opt) \
240  SCALE_FUNCS(4, opt); \
241  SCALE_FUNCS(8, opt); \
242  SCALE_FUNCS(X, opt)
243 
244 #define SCALE_FUNCS_SSE(opt) \
245  SCALE_FUNCS(4, opt); \
246  SCALE_FUNCS(8, opt); \
247  SCALE_FUNCS(X4, opt); \
248  SCALE_FUNCS(X8, opt)
249 
250 SCALE_FUNCS_SSE(sse2);
251 SCALE_FUNCS_SSE(ssse3);
252 SCALE_FUNCS_SSE(sse4);
253 
254 SCALE_FUNC(4, 8, 15, avx2);
255 SCALE_FUNC(X4, 8, 15, avx2);
256 
257 #define VSCALEX_FUNC(size, opt) \
258 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
259  const int16_t **src, uint8_t *dest, int dstW, \
260  const uint8_t *dither, int offset)
261 #define VSCALEX_FUNCS(opt) \
262  VSCALEX_FUNC(8, opt); \
263  VSCALEX_FUNC(9, opt); \
264  VSCALEX_FUNC(10, opt)
265 
266 VSCALEX_FUNCS(sse2);
267 VSCALEX_FUNCS(sse4);
268 VSCALEX_FUNC(16, sse4);
269 VSCALEX_FUNCS(avx);
270 
271 #define VSCALE_FUNC(size, opt) \
272 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
273  const uint8_t *dither, int offset)
274 #define VSCALE_FUNCS(opt1, opt2) \
275  VSCALE_FUNC(8, opt1); \
276  VSCALE_FUNC(9, opt2); \
277  VSCALE_FUNC(10, opt2); \
278  VSCALE_FUNC(16, opt1)
279 
280 VSCALE_FUNCS(sse2, sse2);
281 VSCALE_FUNC(16, sse4);
282 VSCALE_FUNCS(avx, avx);
283 
284 #define INPUT_Y_FUNC(fmt, opt) \
285 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
286  const uint8_t *unused1, const uint8_t *unused2, \
287  int w, uint32_t *unused, void *opq)
288 #define INPUT_UV_FUNC(fmt, opt) \
289 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
290  const uint8_t *unused0, \
291  const uint8_t *src1, \
292  const uint8_t *src2, \
293  int w, uint32_t *unused, void *opq)
294 #define INPUT_FUNC(fmt, opt) \
295  INPUT_Y_FUNC(fmt, opt); \
296  INPUT_UV_FUNC(fmt, opt)
297 #define INPUT_FUNCS(opt) \
298  INPUT_FUNC(uyvy, opt); \
299  INPUT_FUNC(yuyv, opt); \
300  INPUT_UV_FUNC(nv12, opt); \
301  INPUT_UV_FUNC(nv21, opt); \
302  INPUT_FUNC(rgba, opt); \
303  INPUT_FUNC(bgra, opt); \
304  INPUT_FUNC(argb, opt); \
305  INPUT_FUNC(abgr, opt); \
306  INPUT_FUNC(rgb24, opt); \
307  INPUT_FUNC(bgr24, opt)
308 
309 INPUT_FUNCS(sse2);
310 INPUT_FUNCS(ssse3);
311 INPUT_FUNCS(avx);
312 INPUT_FUNC(rgba, avx2);
313 INPUT_FUNC(bgra, avx2);
314 INPUT_FUNC(argb, avx2);
315 INPUT_FUNC(abgr, avx2);
316 INPUT_FUNC(rgb24, avx2);
317 INPUT_FUNC(bgr24, avx2);
318 
319 #if ARCH_X86_64
320 #define YUV2NV_DECL(fmt, opt) \
321 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
322  const int16_t *filter, int filterSize, \
323  const int16_t **u, const int16_t **v, \
324  uint8_t *dst, int dstWidth)
325 
326 YUV2NV_DECL(nv12, avx2);
327 YUV2NV_DECL(nv21, avx2);
328 
329 #define YUV2GBRP_FN_DECL(fmt, opt) \
330 void ff_yuv2##fmt##_full_X_ ##opt(SwsInternal *c, const int16_t *lumFilter, \
331  const int16_t **lumSrcx, int lumFilterSize, \
332  const int16_t *chrFilter, const int16_t **chrUSrcx, \
333  const int16_t **chrVSrcx, int chrFilterSize, \
334  const int16_t **alpSrcx, uint8_t **dest, \
335  int dstW, int y)
336 
337 #define YUV2GBRP_DECL(opt) \
338 YUV2GBRP_FN_DECL(gbrp, opt); \
339 YUV2GBRP_FN_DECL(gbrap, opt); \
340 YUV2GBRP_FN_DECL(gbrp9le, opt); \
341 YUV2GBRP_FN_DECL(gbrp10le, opt); \
342 YUV2GBRP_FN_DECL(gbrap10le, opt); \
343 YUV2GBRP_FN_DECL(gbrp12le, opt); \
344 YUV2GBRP_FN_DECL(gbrap12le, opt); \
345 YUV2GBRP_FN_DECL(gbrp14le, opt); \
346 YUV2GBRP_FN_DECL(gbrp16le, opt); \
347 YUV2GBRP_FN_DECL(gbrap16le, opt); \
348 YUV2GBRP_FN_DECL(gbrpf32le, opt); \
349 YUV2GBRP_FN_DECL(gbrapf32le, opt); \
350 YUV2GBRP_FN_DECL(gbrp9be, opt); \
351 YUV2GBRP_FN_DECL(gbrp10be, opt); \
352 YUV2GBRP_FN_DECL(gbrap10be, opt); \
353 YUV2GBRP_FN_DECL(gbrp12be, opt); \
354 YUV2GBRP_FN_DECL(gbrap12be, opt); \
355 YUV2GBRP_FN_DECL(gbrp14be, opt); \
356 YUV2GBRP_FN_DECL(gbrp16be, opt); \
357 YUV2GBRP_FN_DECL(gbrap16be, opt); \
358 YUV2GBRP_FN_DECL(gbrpf32be, opt); \
359 YUV2GBRP_FN_DECL(gbrapf32be, opt)
360 
361 YUV2GBRP_DECL(sse2);
362 YUV2GBRP_DECL(sse4);
363 YUV2GBRP_DECL(avx2);
364 
365 #define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \
366 void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \
367  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
368  void *opq)
369 
370 #define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \
371 void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \
372  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
373  void *opq)
374 
375 #define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \
376 void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \
377  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
378  void *opq)
379 
380 
381 #define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \
382 INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \
383 INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt)
384 
385 #define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \
386 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \
387 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt)
388 
389 #define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \
390 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \
391 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt)
392 
393 #define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \
394 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
395 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
396 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
397 
398 #define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \
399 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
400 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt)
401 
402 #define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \
403 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
404 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
405 
406 #define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \
407 INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \
408 INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \
409 INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \
410 INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \
411 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt)
412 
413 #define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \
414 INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \
415 INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \
416 INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \
417 INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \
418 INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \
419 INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \
420 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt)
421 
422 #define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \
423 INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \
424 INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \
425 INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \
426 INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \
427 INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \
428 INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \
429 INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt)
430 
431 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2);
432 INPUT_PLANAR_RGB_UV_ALL_DECL(sse2);
433 INPUT_PLANAR_RGB_A_ALL_DECL(sse2);
434 
435 INPUT_PLANAR_RGB_Y_ALL_DECL(sse4);
436 INPUT_PLANAR_RGB_UV_ALL_DECL(sse4);
437 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4);
438 
439 INPUT_PLANAR_RGB_Y_ALL_DECL(avx2);
440 INPUT_PLANAR_RGB_UV_ALL_DECL(avx2);
441 INPUT_PLANAR_RGB_A_ALL_DECL(avx2);
442 #endif
443 
444 #define RANGE_CONVERT_FUNCS(opt, bpc) do { \
445  if (c->opts.src_range) { \
446  c->lumConvertRange = ff_lumRangeFromJpeg##bpc##_##opt; \
447  c->chrConvertRange = ff_chrRangeFromJpeg##bpc##_##opt; \
448  } else { \
449  c->lumConvertRange = ff_lumRangeToJpeg##bpc##_##opt; \
450  c->chrConvertRange = ff_chrRangeToJpeg##bpc##_##opt; \
451  } \
452 } while (0)
453 
454 #define RANGE_CONVERT_FUNCS_DECL(opt, bpc) \
455 void ff_lumRangeFromJpeg##bpc##_##opt(int16_t *dst, int width, \
456  uint32_t coeff, int64_t offset); \
457 void ff_chrRangeFromJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \
458  uint32_t coeff, int64_t offset); \
459 void ff_lumRangeToJpeg##bpc##_##opt(int16_t *dst, int width, \
460  uint32_t coeff, int64_t offset); \
461 void ff_chrRangeToJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \
462  uint32_t coeff, int64_t offset); \
463 
465 RANGE_CONVERT_FUNCS_DECL(sse4, 16)
467 RANGE_CONVERT_FUNCS_DECL(avx2, 16)
468 
470 {
471  int cpu_flags = av_get_cpu_flags();
473  if (c->dstBpc <= 14) {
474  RANGE_CONVERT_FUNCS(avx2, 8);
475  } else {
476  RANGE_CONVERT_FUNCS(avx2, 16);
477  }
478  } else if (EXTERNAL_SSE2(cpu_flags) && c->dstBpc <= 14) {
479  RANGE_CONVERT_FUNCS(sse2, 8);
480  } else if (EXTERNAL_SSE4(cpu_flags) && c->dstBpc > 14) {
481  RANGE_CONVERT_FUNCS(sse4, 16);
482  }
483 }
484 
486 {
487  int cpu_flags = av_get_cpu_flags();
488  enum AVPixelFormat dst_format = c->opts.dst_format;
489 
490  c->use_mmx_vfilter = 0;
491 
492  if (X86_MMXEXT(cpu_flags)) {
493  if (!is16BPS(dst_format) && !isNBPS(dst_format) && !isSemiPlanarYUV(dst_format)
494  && dst_format != AV_PIX_FMT_GRAYF32BE && dst_format != AV_PIX_FMT_GRAYF32LE
495  && !(c->opts.flags & SWS_BITEXACT)) {
496  if (c->opts.flags & SWS_ACCURATE_RND) {
497 #if HAVE_MMXEXT_INLINE
498  if (!(c->opts.flags & SWS_FULL_CHR_H_INT)) {
499  switch (c->opts.dst_format) {
500  case AV_PIX_FMT_RGB32: c->yuv2packedX = yuv2rgb32_X_ar_mmxext; break;
501 #if HAVE_6REGS
502  case AV_PIX_FMT_BGR24: c->yuv2packedX = yuv2bgr24_X_ar_mmxext; break;
503 #endif
504  case AV_PIX_FMT_RGB555: c->yuv2packedX = yuv2rgb555_X_ar_mmxext; break;
505  case AV_PIX_FMT_RGB565: c->yuv2packedX = yuv2rgb565_X_ar_mmxext; break;
506  case AV_PIX_FMT_YUYV422: c->yuv2packedX = yuv2yuyv422_X_ar_mmxext; break;
507  default: break;
508  }
509  }
510 #endif
511  } else {
512 #if HAVE_SSE2_EXTERNAL
513  if (EXTERNAL_SSE2(cpu_flags)) {
514  c->use_mmx_vfilter = 1;
515  c->yuv2planeX = yuv2yuvX_sse2;
516 #if HAVE_SSE3_EXTERNAL
518  c->yuv2planeX = yuv2yuvX_sse3;
519 #endif
520 #if HAVE_AVX2_EXTERNAL
522  c->yuv2planeX = yuv2yuvX_avx2;
523 #endif
524  }
525 #endif /* HAVE_SSE2_EXTERNAL */
526 #if HAVE_MMXEXT_INLINE
527  if (!(c->opts.flags & SWS_FULL_CHR_H_INT)) {
528  switch (c->opts.dst_format) {
529  case AV_PIX_FMT_RGB32: c->yuv2packedX = yuv2rgb32_X_mmxext; break;
530  case AV_PIX_FMT_BGR32: c->yuv2packedX = yuv2bgr32_X_mmxext; break;
531 #if HAVE_6REGS
532  case AV_PIX_FMT_BGR24: c->yuv2packedX = yuv2bgr24_X_mmxext; break;
533 #endif
534  case AV_PIX_FMT_RGB555: c->yuv2packedX = yuv2rgb555_X_mmxext; break;
535  case AV_PIX_FMT_RGB565: c->yuv2packedX = yuv2rgb565_X_mmxext; break;
536  case AV_PIX_FMT_YUYV422: c->yuv2packedX = yuv2yuyv422_X_mmxext; break;
537  default: break;
538  }
539  }
540 #endif
541  }
542 #if HAVE_MMXEXT_INLINE
543  if (!(c->opts.flags & SWS_FULL_CHR_H_INT)) {
544  switch (c->opts.dst_format) {
545  case AV_PIX_FMT_RGB32:
546  c->yuv2packed1 = yuv2rgb32_1_mmxext;
547  c->yuv2packed2 = yuv2rgb32_2_mmxext;
548  break;
549  case AV_PIX_FMT_BGR24:
550  c->yuv2packed1 = yuv2bgr24_1_mmxext;
551  c->yuv2packed2 = yuv2bgr24_2_mmxext;
552  break;
553  case AV_PIX_FMT_RGB555:
554  c->yuv2packed1 = yuv2rgb555_1_mmxext;
555  c->yuv2packed2 = yuv2rgb555_2_mmxext;
556  break;
557  case AV_PIX_FMT_RGB565:
558  c->yuv2packed1 = yuv2rgb565_1_mmxext;
559  c->yuv2packed2 = yuv2rgb565_2_mmxext;
560  break;
561  case AV_PIX_FMT_YUYV422:
562  c->yuv2packed1 = yuv2yuyv422_1_mmxext;
563  c->yuv2packed2 = yuv2yuyv422_2_mmxext;
564  break;
565  default:
566  break;
567  }
568  }
569 #endif
570  }
571 #if HAVE_MMXEXT_INLINE
572  if (c->srcBpc == 8 && c->dstBpc <= 14) {
573  // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
574  if (c->opts.flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
575  c->hyscale_fast = ff_hyscale_fast_mmxext;
576  c->hcscale_fast = ff_hcscale_fast_mmxext;
577  } else {
578  c->hyscale_fast = NULL;
579  c->hcscale_fast = NULL;
580  }
581  }
582 #endif
583  }
584 
585 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
586  if (c->srcBpc == 8) { \
587  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
588  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
589  } else if (c->srcBpc == 9) { \
590  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
591  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
592  } else if (c->srcBpc == 10) { \
593  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
594  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
595  } else if (c->srcBpc == 12) { \
596  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
597  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
598  } else if (c->srcBpc == 14 || ((c->opts.src_format==AV_PIX_FMT_PAL8||isAnyRGB(c->opts.src_format)) && av_pix_fmt_desc_get(c->opts.src_format)->comp[0].depth<16)) { \
599  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
600  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
601  } else { /* c->srcBpc == 16 */ \
602  av_assert0(c->srcBpc == 16);\
603  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
604  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
605  } \
606 } while (0)
607 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case) \
608 switch(c->dstBpc){ \
609  case 16: do_16_case; break; \
610  case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format) && !isDataInHighBits(c->opts.dst_format)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
611  case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
612  case 8: if (!c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
613  }
614 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
615  switch(c->dstBpc){ \
616  case 16: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_16_ ## opt; break; \
617  case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format) && !isDataInHighBits(c->opts.dst_format)) vscalefn = ff_yuv2plane1_10_ ## opt; break; \
618  case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_9_ ## opt; break; \
619  case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
620  default: av_assert0(c->dstBpc>8); \
621  }
622 #define case_rgb(x, X, opt) \
623  case AV_PIX_FMT_ ## X: \
624  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
625  if (!c->chrSrcHSubSample) \
626  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
627  break
628 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
629  switch (filtersize) { \
630  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
631  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
632  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
633  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
634  break; \
635  }
636  if (EXTERNAL_SSE2(cpu_flags)) {
637  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
638  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
639  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, );
640  if (!(c->opts.flags & SWS_ACCURATE_RND))
641  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2);
642 
643  switch (c->opts.src_format) {
644  case AV_PIX_FMT_YA8:
645  c->lumToYV12 = ff_yuyvToY_sse2;
646  if (c->needAlpha)
647  c->alpToYV12 = ff_uyvyToY_sse2;
648  break;
649  case AV_PIX_FMT_YUYV422:
650  c->lumToYV12 = ff_yuyvToY_sse2;
651  c->chrToYV12 = ff_yuyvToUV_sse2;
652  break;
653  case AV_PIX_FMT_UYVY422:
654  c->lumToYV12 = ff_uyvyToY_sse2;
655  c->chrToYV12 = ff_uyvyToUV_sse2;
656  break;
657  case AV_PIX_FMT_NV12:
658  c->chrToYV12 = ff_nv12ToUV_sse2;
659  break;
660  case AV_PIX_FMT_NV21:
661  c->chrToYV12 = ff_nv21ToUV_sse2;
662  break;
663  case_rgb(rgb24, RGB24, sse2);
664  case_rgb(bgr24, BGR24, sse2);
665  case_rgb(bgra, BGRA, sse2);
666  case_rgb(rgba, RGBA, sse2);
667  case_rgb(abgr, ABGR, sse2);
668  case_rgb(argb, ARGB, sse2);
669  default:
670  break;
671  }
672  }
673  if (EXTERNAL_SSSE3(cpu_flags)) {
674  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
675  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
676  switch (c->opts.src_format) {
677  case_rgb(rgb24, RGB24, ssse3);
678  case_rgb(bgr24, BGR24, ssse3);
679  default:
680  break;
681  }
682  }
683  if (EXTERNAL_SSE4(cpu_flags)) {
684  /* Xto15 don't need special sse4 functions */
685  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
686  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
687  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
688  if (!isBE(c->opts.dst_format)) c->yuv2planeX = ff_yuv2planeX_16_sse4);
689  if (c->dstBpc == 16 && !isBE(c->opts.dst_format) && !(c->opts.flags & SWS_ACCURATE_RND))
690  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
691  }
692 
693  if (EXTERNAL_AVX(cpu_flags)) {
694  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, );
695  if (!(c->opts.flags & SWS_ACCURATE_RND))
696  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx);
697 
698  switch (c->opts.src_format) {
699  case AV_PIX_FMT_YUYV422:
700  c->chrToYV12 = ff_yuyvToUV_avx;
701  break;
702  case AV_PIX_FMT_UYVY422:
703  c->chrToYV12 = ff_uyvyToUV_avx;
704  break;
705  case AV_PIX_FMT_NV12:
706  c->chrToYV12 = ff_nv12ToUV_avx;
707  break;
708  case AV_PIX_FMT_NV21:
709  c->chrToYV12 = ff_nv21ToUV_avx;
710  break;
711  case_rgb(rgb24, RGB24, avx);
712  case_rgb(bgr24, BGR24, avx);
713  case_rgb(bgra, BGRA, avx);
714  case_rgb(rgba, RGBA, avx);
715  case_rgb(abgr, ABGR, avx);
716  case_rgb(argb, ARGB, avx);
717  default:
718  break;
719  }
720  }
721 
722 #if ARCH_X86_64
723 #define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
724  switch (filtersize) { \
725  case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
726  default: hscalefn = ff_hscale8to15_X4_avx2; break; \
727  break; \
728  }
729 
731  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
732  ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
733  ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
734  }
735  }
736 
738  if (ARCH_X86_64)
739  switch (c->opts.src_format) {
740  case_rgb(rgb24, RGB24, avx2);
741  case_rgb(bgr24, BGR24, avx2);
742  case_rgb(bgra, BGRA, avx2);
743  case_rgb(rgba, RGBA, avx2);
744  case_rgb(abgr, ABGR, avx2);
745  case_rgb(argb, ARGB, avx2);
746  }
747  if (!(c->opts.flags & SWS_ACCURATE_RND)) // FIXME
748  switch (c->opts.dst_format) {
749  case AV_PIX_FMT_NV12:
750  case AV_PIX_FMT_NV24:
751  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
752  break;
753  case AV_PIX_FMT_NV21:
754  case AV_PIX_FMT_NV42:
755  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
756  break;
757  default:
758  break;
759  }
760  }
761 
762 
763 #define INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(fmt, name, opt) \
764  case fmt: \
765  c->readAlpPlanar = ff_planar_##name##_to_a_##opt; \
766  av_fallthrough;
767 
768 #define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
769  case rgba_fmt: \
770  case rgb_fmt: \
771  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
772  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
773  break;
774 
775 #define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \
776  case fmt: \
777  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
778  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
779  break;
780 
781 #define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \
782  case fmt: \
783  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
784  break;
785 
786 #define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
787  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
788  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
789  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
790  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
791 
792 #define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
793  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
794  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
795  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
796  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
797 
798 #define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
799  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \
800  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt)
801 
802 #define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \
803  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
804  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
805 
806 #define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \
807  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
808  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
809 
810 #define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \
811  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, opt) \
812  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \
813  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \
814  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \
815  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \
816  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \
817  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \
818  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt)
819 
820 
821  if (EXTERNAL_SSE2(cpu_flags)) {
822  switch (c->opts.src_format) {
823  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, sse2);
824  INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2);
825  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2);
826  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2);
827  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2);
828  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2);
829  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2);
830  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2);
831  default:
832  break;
833  }
834  }
835 
836  if (EXTERNAL_SSE4(cpu_flags)) {
837  switch (c->opts.src_format) {
838  case AV_PIX_FMT_GBRAP:
839  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4);
840  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4);
841  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4);
842  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4);
843  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4);
844  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4);
845  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4);
846  default:
847  break;
848  }
849  }
850 
852  switch (c->opts.src_format) {
853  INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2)
854  default:
855  break;
856  }
857  }
858 
859  if(c->opts.flags & SWS_FULL_CHR_H_INT) {
860 
861 #define YUV2ANYX_FUNC_CASE(fmt, name, opt) \
862  case fmt: \
863  c->yuv2anyX = ff_yuv2##name##_full_X_##opt; \
864  break;
865 
866 #define YUV2ANYX_GBRAP_CASES(opt) \
867  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP, gbrp, opt) \
868  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP, gbrap, opt) \
869  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9LE, gbrp9le, opt) \
870  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10LE, gbrp10le, opt) \
871  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10LE, gbrap10le, opt) \
872  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12LE, gbrp12le, opt) \
873  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12LE, gbrap12le, opt) \
874  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14LE, gbrp14le, opt) \
875  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16LE, gbrp16le, opt) \
876  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16LE, gbrap16le, opt) \
877  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32LE, gbrpf32le, opt) \
878  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32LE, gbrapf32le, opt) \
879  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9BE, gbrp9be, opt) \
880  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10BE, gbrp10be, opt) \
881  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10BE, gbrap10be, opt) \
882  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12BE, gbrp12be, opt) \
883  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12BE, gbrap12be, opt) \
884  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14BE, gbrp14be, opt) \
885  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16BE, gbrp16be, opt) \
886  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16BE, gbrap16be, opt) \
887  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32BE, gbrpf32be, opt) \
888  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32BE, gbrapf32be, opt)
889 
890  if (EXTERNAL_SSE2(cpu_flags)) {
891  switch (c->opts.dst_format) {
892  YUV2ANYX_GBRAP_CASES(sse2)
893  default:
894  break;
895  }
896  }
897 
898  if (EXTERNAL_SSE4(cpu_flags)) {
899  switch (c->opts.dst_format) {
900  YUV2ANYX_GBRAP_CASES(sse4)
901  default:
902  break;
903  }
904  }
905 
907  switch (c->opts.dst_format) {
908  YUV2ANYX_GBRAP_CASES(avx2)
909  default:
910  break;
911  }
912  }
913  }
914 
915 #endif
916 }
ff_hyscale_fast_mmxext
void ff_hyscale_fast_mmxext(SwsInternal *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc)
Definition: hscale_fast_bilinear_simd.c:192
flags
const SwsFlags flags[]
Definition: swscale.c:72
AV_PIX_FMT_GBRAP16
#define AV_PIX_FMT_GBRAP16
Definition: pixfmt.h:565
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:70
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:1110
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:140
mem_internal.h
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:513
av_unused
#define av_unused
Definition: attributes.h:164
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:73
pixdesc.h
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:157
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
AV_PIX_FMT_GRAYF32LE
@ AV_PIX_FMT_GRAYF32LE
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:364
INPUT_FUNC
#define INPUT_FUNC(fmt, opt)
Definition: swscale.c:294
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:297
SWS_FAST_BILINEAR
@ SWS_FAST_BILINEAR
Scaler selection options.
Definition: swscale.h:176
is16BPS
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:745
rgb
Definition: rpzaenc.c:60
RANGE_CONVERT_FUNCS_DECL
#define RANGE_CONVERT_FUNCS_DECL(opt, bpc)
Definition: swscale.c:454
AV_PIX_FMT_GBRP14
#define AV_PIX_FMT_GBRP14
Definition: pixfmt.h:560
RANGE_CONVERT_FUNCS
#define RANGE_CONVERT_FUNCS(opt, bpc)
Definition: swscale.c:444
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:212
AV_PIX_FMT_GBRP10
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:558
isNBPS
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:759
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:62
avassert.h
av_cold
#define av_cold
Definition: attributes.h:119
AV_PIX_FMT_GBRAP10
#define AV_PIX_FMT_GBRAP10
Definition: pixfmt.h:562
intreadwrite.h
dither
static const uint16_t dither[8][8]
Definition: vf_gradfun.c:46
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_GBRAP12
#define AV_PIX_FMT_GBRAP12
Definition: pixfmt.h:563
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:71
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:1111
SCALE_FUNC
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt)
Definition: swscale.c:218
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:274
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case)
if
if(ret)
Definition: filter_design.txt:179
isSemiPlanarYUV
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:791
ff_hcscale_fast_mmxext
void ff_hcscale_fast_mmxext(SwsInternal *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc)
Definition: hscale_fast_bilinear_simd.c:282
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:257
ff_sws_init_range_convert_x86
av_cold void ff_sws_init_range_convert_x86(SwsInternal *c)
Definition: swscale.c:469
AV_PIX_FMT_GBRP16
#define AV_PIX_FMT_GBRP16
Definition: pixfmt.h:561
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:56
SwsPlane
Slice plane.
Definition: swscale_internal.h:1105
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
AV_PIX_FMT_GBRP9
#define AV_PIX_FMT_GBRP9
Definition: pixfmt.h:557
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:766
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:104
cpu.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
AV_PIX_FMT_GBRPF32
#define AV_PIX_FMT_GBRPF32
Definition: pixfmt.h:578
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:532
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:511
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:53
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsInternal *c)
Definition: swscale.c:485
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsInternal *c, int dstY)
Definition: swscale.c:60
AV_PIX_FMT_GBRP12
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:559
DECLARE_ASM_CONST
DECLARE_ASM_CONST(16, double, pd_1)[2]
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:527
swscale_internal.h
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:97
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
AV_PIX_FMT_RGB565
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:526
SwsInternal
Definition: swscale_internal.h:335
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:64
AV_PIX_FMT_GRAYF32BE
@ AV_PIX_FMT_GRAYF32BE
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:363
SWS_FULL_CHR_H_INT
@ SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:133
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:88
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:62
U
#define U(x)
Definition: vpx_arith.h:37
AV_PIX_FMT_GBRAPF32
#define AV_PIX_FMT_GBRAPF32
Definition: pixfmt.h:579
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:1108
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:271
case_rgb
#define case_rgb(x, X, opt)
int32_t
int32_t
Definition: audioconvert.c:56
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:42
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:176
SWS_ACCURATE_RND
@ SWS_ACCURATE_RND
Force bit-exact output.
Definition: swscale.h:156
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:59
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:244
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:72
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:261
src
#define src
Definition: vp8dsp.c:248
swscale.h
X86_MMXEXT
#define X86_MMXEXT(flags)
Definition: cpu.h:26
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33