21 #include "../SDL_internal.h"
25 #ifdef USE_MMX_ASSEMBLY
33 static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} };
34 static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} };
35 static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} };
37 static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
39 static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
40 static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
41 static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
42 static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
44 static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
45 static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
46 static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
47 static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
49 static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
50 static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
81 void ColorRGBDitherYV12MMX1X(
int *colortab,
Uint32 *rgb_2_pix,
82 unsigned char *lum,
unsigned char *cr,
83 unsigned char *cb,
unsigned char *out,
84 int rows,
int cols,
int mod )
89 unsigned char*
y = lum +cols*rows;
92 row2 = (
Uint32 *)out+cols+mod;
93 mod = (mod+cols+mod)*4;
95 __asm__ __volatile__ (
103 "punpcklbw %%mm7,%%mm1\n"
104 "punpckldq %%mm1,%%mm1\n"
116 "movq (%2,%4),%%mm3\n"
117 "punpckldq %%mm3,%%mm2\n"
126 "paddsw %%mm1, %%mm5\n"
127 "paddsw %%mm1, %%mm6\n"
128 "packuswb %%mm5,%%mm5\n"
129 "packuswb %%mm6,%%mm6\n"
131 "punpcklbw %%mm5,%%mm6\n"
135 "punpcklbw %%mm7,%%mm1\n"
136 "punpckldq %%mm1,%%mm1\n"
149 "paddsw %%mm5, %%mm3\n"
150 "paddsw %%mm5, %%mm7\n"
151 "paddsw %%mm0, %%mm3\n"
152 "paddsw %%mm0, %%mm7\n"
153 "packuswb %%mm3,%%mm3\n"
154 "packuswb %%mm7,%%mm7\n"
155 "punpcklbw %%mm3,%%mm7\n"
160 "paddsw %%mm1, %%mm3\n"
161 "paddsw %%mm1, %%mm5\n"
162 "packuswb %%mm3,%%mm3\n"
163 "packuswb %%mm5,%%mm5\n"
164 "punpcklbw %%mm3,%%mm5\n"
174 "punpcklbw %%mm4,%%mm1\n"
175 "punpcklbw %%mm4,%%mm3\n"
178 "punpcklwd %%mm1,%%mm3\n"
179 "punpckhwd %%mm2,%%mm0\n"
183 "punpcklbw %%mm1,%%mm2\n"
184 "punpcklwd %%mm4,%%mm2\n"
189 "punpcklbw %%mm1,%%mm4\n"
190 "punpckhwd %%mm2,%%mm4\n"
197 "punpckhbw %%mm2,%%mm6\n"
198 "punpckhbw %%mm1,%%mm5\n"
200 "punpcklwd %%mm6,%%mm1\n"
202 "punpckhwd %%mm6,%%mm5\n"
224 :
"r" (cr),
"r"(cb),
"r"(lum),
225 "r"(row1),
"r"(cols),
"r"(row2),
"m"(x),
"m"(y),
"m"(mod),
226 "m"(MMX_0080w),
"m"(MMX_VgrnRGB),
"m"(MMX_VredRGB),
227 "m"(MMX_FF00w),
"m"(MMX_00FFw),
"m"(MMX_UgrnRGB),
232 void Color565DitherYV12MMX1X(
int *colortab,
Uint32 *rgb_2_pix,
233 unsigned char *lum,
unsigned char *cr,
234 unsigned char *cb,
unsigned char *out,
235 int rows,
int cols,
int mod )
240 unsigned char* y = lum +cols*rows;
243 row2 = (
Uint16 *)out+cols+mod;
244 mod = (mod+cols+mod)*2;
246 __asm__ __volatile__(
251 "pxor %%mm7, %%mm7\n"
254 "punpcklbw %%mm7, %%mm0\n"
255 "punpcklbw %%mm7, %%mm1\n"
258 "movq %%mm0, %%mm2\n"
259 "movq %%mm1, %%mm3\n"
260 "pmullw %10, %%mm2\n"
262 "pmullw %11, %%mm0\n"
264 "pmullw %13, %%mm3\n"
266 "pmullw %14, %%mm1\n"
268 "pmullw %15, %%mm6\n"
269 "paddw %%mm3, %%mm2\n"
270 "pmullw %15, %%mm7\n"
272 "movq %%mm6, %%mm4\n"
273 "paddw %%mm0, %%mm6\n"
274 "movq %%mm4, %%mm5\n"
275 "paddw %%mm1, %%mm4\n"
276 "paddw %%mm2, %%mm5\n"
278 "movq %%mm7, %%mm3\n"
280 "paddw %%mm0, %%mm7\n"
282 "packuswb %%mm4, %%mm4\n"
283 "packuswb %%mm5, %%mm5\n"
284 "packuswb %%mm6, %%mm6\n"
285 "punpcklbw %%mm4, %%mm4\n"
286 "punpcklbw %%mm5, %%mm5\n"
290 "punpcklbw %%mm6, %%mm6\n"
295 "movq %%mm3, %%mm5\n"
296 "paddw %%mm1, %%mm3\n"
297 "paddw %%mm2, %%mm5\n"
301 "movq (%2, %4), %%mm6\n"
303 "packuswb %%mm3, %%mm3\n"
304 "packuswb %%mm5, %%mm5\n"
305 "packuswb %%mm7, %%mm7\n"
307 "punpcklbw %%mm3, %%mm3\n"
308 "punpcklbw %%mm5, %%mm5\n"
309 "pmullw %15, %%mm6\n"
310 "punpcklbw %%mm7, %%mm7\n"
317 "movq (%2,%4), %%mm7\n"
320 "movq %%mm4, %%mm5\n"
321 "punpcklwd %%mm3, %%mm4\n"
322 "pmullw %15, %%mm7\n"
323 "punpckhwd %%mm3, %%mm5\n"
326 "movq %%mm5, 8(%3)\n"
328 "movq %%mm6, %%mm4\n"
329 "paddw %%mm0, %%mm6\n"
331 "movq %%mm4, %%mm5\n"
332 "paddw %%mm1, %%mm4\n"
333 "paddw %%mm2, %%mm5\n"
335 "movq %%mm7, %%mm3\n"
337 "paddw %%mm0, %%mm7\n"
339 "movq %%mm3, %%mm0\n"
340 "packuswb %%mm4, %%mm4\n"
341 "paddw %%mm1, %%mm3\n"
342 "packuswb %%mm5, %%mm5\n"
343 "paddw %%mm2, %%mm0\n"
344 "packuswb %%mm6, %%mm6\n"
345 "punpcklbw %%mm4, %%mm4\n"
346 "punpcklbw %%mm5, %%mm5\n"
347 "punpcklbw %%mm6, %%mm6\n"
358 "packuswb %%mm3, %%mm3\n"
359 "packuswb %%mm0, %%mm0\n"
360 "packuswb %%mm7, %%mm7\n"
361 "punpcklbw %%mm3, %%mm3\n"
362 "punpcklbw %%mm0, %%mm0\n"
363 "punpcklbw %%mm7, %%mm7\n"
372 "movq %%mm4, %%mm5\n"
374 "punpcklwd %%mm3, %%mm4\n"
375 "punpckhwd %%mm3, %%mm5\n"
378 "movq %%mm5, 8(%5)\n"
397 :
"r" (cr),
"r"(cb),
"r"(lum),
398 "r"(row1),
"r"(cols),
"r"(row2),
"m"(x),
"m"(y),
"m"(mod),
399 "m"(MMX_0080w),
"m"(MMX_Ugrn565),
"m"(MMX_Ublu5x5),
400 "m"(MMX_00FFw),
"m"(MMX_Vgrn565),
"m"(MMX_Vred5x5),
401 "m"(MMX_Ycoeff),
"m"(MMX_red565),
"m"(MMX_grn565)
GLint GLint GLint GLint GLint x
GLint GLint GLint GLint GLint GLint y