video: speed up SDL_FillRect[s] slightly (thanks, Jakub!).

"This patch does the following:

* Instead of SDL_FillRects calling SDL_FillRect in a loop the opposite
happens -- SDL_FillRect (a specific case) calls SDL_FillRects (a general case)
with a count of 1

* The switch/case block is moved out of the loop -- it modifies the color
once and stores the fill routine in a pointer which is then used throughout
the loop"

Fixes Bugzilla #4674.
This commit is contained in:
Ryan C. Gordon 2019-10-15 14:03:01 -04:00
parent e9c1e1269c
commit e23067eab2
1 changed files with 48 additions and 38 deletions

View File

@ -243,19 +243,8 @@ SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
return SDL_SetError("Passed NULL destination surface");
}
/* This function doesn't work on surfaces < 8 bpp */
if (dst->format->BitsPerPixel < 8) {
return SDL_SetError("SDL_FillRect(): Unsupported surface format");
}
/* If 'rect' == NULL, then fill the whole surface */
if (rect) {
/* Perform clipping */
if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) {
return 0;
}
rect = &clipped;
} else {
if (!rect) {
rect = &dst->clip_rect;
/* Don't attempt to fill if the surface's clip_rect is empty */
if (SDL_RectEmpty(rect)) {
@ -263,13 +252,36 @@ SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
}
}
return SDL_FillRects(dst, rect, 1, color);
}
int
SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
Uint32 color)
{
SDL_Rect clipped;
Uint8 *pixels;
const SDL_Rect* rect;
void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h);
int i;
if (!dst) {
return SDL_SetError("Passed NULL destination surface");
}
/* This function doesn't work on surfaces < 8 bpp */
if (dst->format->BitsPerPixel < 8) {
return SDL_SetError("SDL_FillRect(): Unsupported surface format");
}
/* Perform software fill */
if (!dst->pixels) {
return SDL_SetError("SDL_FillRect(): You must lock the surface");
}
pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch +
rect->x * dst->format->BytesPerPixel;
if (!rects) {
return SDL_SetError("SDL_FillRects() passed NULL rects");
}
switch (dst->format->BytesPerPixel) {
case 1:
@ -278,11 +290,11 @@ SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
color |= (color << 16);
#ifdef __SSE__
if (SDL_HasSSE()) {
SDL_FillRect1SSE(pixels, dst->pitch, color, rect->w, rect->h);
fill_function = SDL_FillRect1SSE;
break;
}
#endif
SDL_FillRect1(pixels, dst->pitch, color, rect->w, rect->h);
fill_function = SDL_FillRect1;
break;
}
@ -291,18 +303,18 @@ SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
color |= (color << 16);
#ifdef __SSE__
if (SDL_HasSSE()) {
SDL_FillRect2SSE(pixels, dst->pitch, color, rect->w, rect->h);
fill_function = SDL_FillRect2SSE;
break;
}
#endif
SDL_FillRect2(pixels, dst->pitch, color, rect->w, rect->h);
fill_function = SDL_FillRect2;
break;
}
case 3:
/* 24-bit RGB is a slow path, at least for now. */
{
SDL_FillRect3(pixels, dst->pitch, color, rect->w, rect->h);
fill_function = SDL_FillRect3;
break;
}
@ -310,34 +322,32 @@ SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
{
#ifdef __SSE__
if (SDL_HasSSE()) {
SDL_FillRect4SSE(pixels, dst->pitch, color, rect->w, rect->h);
fill_function = SDL_FillRect4SSE;
break;
}
#endif
SDL_FillRect4(pixels, dst->pitch, color, rect->w, rect->h);
fill_function = SDL_FillRect4;
break;
}
}
for (i = 0; i < count; ++i) {
rect = &rects[i];
/* Perform clipping */
if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) {
continue;
}
rect = &clipped;
pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch +
rect->x * dst->format->BytesPerPixel;
fill_function(pixels, dst->pitch, color, rect->w, rect->h);
}
/* We're done! */
return 0;
}
int
SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
Uint32 color)
{
int i;
int status = 0;
if (!rects) {
return SDL_SetError("SDL_FillRects() passed NULL rects");
}
for (i = 0; i < count; ++i) {
status += SDL_FillRect(dst, &rects[i], color);
}
return status;
}
/* vi: set ts=4 sw=4 expandtab: */