Fix invalid memory access and optimise Blit_3or4_to_3or4__*

Fix invalid write at last pixel of the surface:
  when surface has no padding (pitch == w * bpp) and bpp is 3
  with Blit, no colorkey, and NO_ALPHA same or inverse rgb triplet

Optimise by using int32 access:

BGR24 -> ARGB8888 :  faster x1.897875   (362405 -> 190953)
RGB24 -> ABGR8888 :  faster x1.660416   (363304 -> 218803)

ABGR8888 -> RGB24 :  faster x1.686319   (334962 -> 198635)
ARGB8888 -> BGR24 :  faster x1.691868   (324524 -> 191814)
BGR24 -> RGB888 :  faster x1.678459   (326811 -> 194709)
BGR888 -> RGB24 :  faster x1.731772   (327724 -> 189242)
RGB24 -> BGR888 :  faster x1.690989   (328916 -> 194511)
RGB888 -> BGR24 :  faster x1.698333   (326175 -> 192056)
This commit is contained in:
Sylvain Becker 2019-02-17 16:20:23 +01:00
parent 1aa2ad2fe8
commit afd1b3dae4
1 changed files with 66 additions and 12 deletions

View File

@ -2938,15 +2938,54 @@ Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info)
if (dstfmt->Amask) { if (dstfmt->Amask) {
/* SET_ALPHA */ /* SET_ALPHA */
Uint32 mask = info->a << dstfmt->Ashift; Uint32 mask = info->a << dstfmt->Ashift;
int last_line = 0;
if (srcbpp == 3 && height) {
height -= 1;
last_line = 1;
}
while (height--) { while (height--) {
/* *INDENT-OFF* */ /* *INDENT-OFF* */
DUFFS_LOOP( DUFFS_LOOP(
{ {
Uint32 *dst32 = (Uint32*)dst;
Uint32 *src32 = (Uint32*)src;
*dst32 = *src32 | mask;
dst += 4;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
if (last_line) {
while (width--) {
Uint32 *dst32 = (Uint32*)dst; Uint32 *dst32 = (Uint32*)dst;
Uint8 s0 = src[0]; Uint8 s0 = src[0];
Uint8 s1 = src[1]; Uint8 s1 = src[1];
Uint8 s2 = src[2]; Uint8 s2 = src[2];
*dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask; *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
dst += 4;
src += srcbpp;
}
}
} else {
/* NO_ALPHA */
int mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
int last_line = 0;
if ((dstbpp == 3 || srcbpp == 3) && height) {
height -= 1;
last_line = 1;
}
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
Uint32 *src32 = (Uint32*)src;
*dst32 = *src32 & mask;
dst += dstbpp; dst += dstbpp;
src += srcbpp; src += srcbpp;
}, width); }, width);
@ -2954,23 +2993,18 @@ Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info)
src += srcskip; src += srcskip;
dst += dstskip; dst += dstskip;
} }
} else {
/* NO_ALPHA */ if (last_line) {
while (height--) { while (width--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
Uint8 s0 = src[0]; Uint8 s0 = src[0];
Uint8 s1 = src[1]; Uint8 s1 = src[1];
Uint8 s2 = src[2]; Uint8 s2 = src[2];
*dst32 = (s0) | (s1 << 8) | (s2 << 16); dst[0] = s0;
dst[1] = s1;
dst[2] = s2;
dst += dstbpp; dst += dstbpp;
src += srcbpp; src += srcbpp;
}, width); }
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
} }
} }
return; return;
@ -3036,6 +3070,12 @@ Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info)
} }
} else { } else {
/* NO_ALPHA */ /* NO_ALPHA */
int last_line = 0;
if (dstbpp == 3 && height) {
height -= 1;
last_line = 1;
}
while (height--) { while (height--) {
/* *INDENT-OFF* */ /* *INDENT-OFF* */
DUFFS_LOOP( DUFFS_LOOP(
@ -3053,6 +3093,20 @@ Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info)
src += srcskip; src += srcskip;
dst += dstskip; dst += dstskip;
} }
if (last_line) {
while (width--) {
Uint8 s0 = src[0];
Uint8 s1 = src[1];
Uint8 s2 = src[2];
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
dst[0] = s2;
dst[1] = s1;
dst[2] = s0;
dst += dstbpp;
src += srcbpp;
}
}
} }
return; return;
} }