|
|
|
@ -158,7 +158,7 @@ static void inline idct_col (int16_t * const block)
|
|
|
|
|
|
|
|
|
|
void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride)
|
|
|
|
|
{
|
|
|
|
|
uint64_t clamptqmask;
|
|
|
|
|
uint64_t clampmask;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < 8; i++)
|
|
|
|
@ -167,18 +167,18 @@ void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride)
|
|
|
|
|
for (i = 0; i < 8; i++)
|
|
|
|
|
idct_col (block + i);
|
|
|
|
|
|
|
|
|
|
clamptqmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */
|
|
|
|
|
clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */
|
|
|
|
|
do {
|
|
|
|
|
uint64_t shorts0, shorts1;
|
|
|
|
|
|
|
|
|
|
shorts0 = ldq (block);
|
|
|
|
|
shorts0 = maxsw4 (shorts0, 0);
|
|
|
|
|
shorts0 = minsw4 (shorts0, clamptqmask);
|
|
|
|
|
shorts0 = minsw4 (shorts0, clampmask);
|
|
|
|
|
stl (pkwb (shorts0), dest);
|
|
|
|
|
|
|
|
|
|
shorts1 = ldq (block + 4);
|
|
|
|
|
shorts1 = maxsw4 (shorts1, 0);
|
|
|
|
|
shorts1 = minsw4 (shorts1, clamptqmask);
|
|
|
|
|
shorts1 = minsw4 (shorts1, clampmask);
|
|
|
|
|
stl (pkwb (shorts1), dest + 4);
|
|
|
|
|
|
|
|
|
|
stq (0, block);
|
|
|
|
@ -192,8 +192,8 @@ void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride)
|
|
|
|
|
void mpeg2_idct_add_mvi (const int last, int16_t * block,
|
|
|
|
|
uint8_t * dest, const int stride)
|
|
|
|
|
{
|
|
|
|
|
uint64_t clamptqmask;
|
|
|
|
|
uint64_t signtqmask;
|
|
|
|
|
uint64_t clampmask;
|
|
|
|
|
uint64_t signmask;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
|
|
|
|
@ -201,9 +201,9 @@ void mpeg2_idct_add_mvi (const int last, int16_t * block,
|
|
|
|
|
idct_row (block + 8 * i);
|
|
|
|
|
for (i = 0; i < 8; i++)
|
|
|
|
|
idct_col (block + i);
|
|
|
|
|
clamptqmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */
|
|
|
|
|
signtqmask = zap (-1, 0x33);
|
|
|
|
|
signtqmask ^= signtqmask >> 1; /* 0x8000800080008000 */
|
|
|
|
|
clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */
|
|
|
|
|
signmask = zap (-1, 0x33);
|
|
|
|
|
signmask ^= signmask >> 1; /* 0x8000800080008000 */
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
uint64_t shorts0, pix0, signs0;
|
|
|
|
@ -214,22 +214,22 @@ void mpeg2_idct_add_mvi (const int last, int16_t * block,
|
|
|
|
|
|
|
|
|
|
pix0 = unpkbw (ldl (dest));
|
|
|
|
|
/* signed subword add (MMX paddw). */
|
|
|
|
|
signs0 = shorts0 & signtqmask;
|
|
|
|
|
shorts0 &= ~signtqmask;
|
|
|
|
|
signs0 = shorts0 & signmask;
|
|
|
|
|
shorts0 &= ~signmask;
|
|
|
|
|
shorts0 += pix0;
|
|
|
|
|
shorts0 ^= signs0;
|
|
|
|
|
/* clamp. */
|
|
|
|
|
shorts0 = maxsw4 (shorts0, 0);
|
|
|
|
|
shorts0 = minsw4 (shorts0, clamptqmask);
|
|
|
|
|
shorts0 = minsw4 (shorts0, clampmask);
|
|
|
|
|
|
|
|
|
|
/* next 4. */
|
|
|
|
|
pix1 = unpkbw (ldl (dest + 4));
|
|
|
|
|
signs1 = shorts1 & signtqmask;
|
|
|
|
|
shorts1 &= ~signtqmask;
|
|
|
|
|
signs1 = shorts1 & signmask;
|
|
|
|
|
shorts1 &= ~signmask;
|
|
|
|
|
shorts1 += pix1;
|
|
|
|
|
shorts1 ^= signs1;
|
|
|
|
|
shorts1 = maxsw4 (shorts1, 0);
|
|
|
|
|
shorts1 = minsw4 (shorts1, clamptqmask);
|
|
|
|
|
shorts1 = minsw4 (shorts1, clampmask);
|
|
|
|
|
|
|
|
|
|
stl (pkwb (shorts0), dest);
|
|
|
|
|
stl (pkwb (shorts1), dest + 4);
|
|
|
|
|