From: Tony Cook Date: Sun, 24 Oct 2010 00:52:33 +0000 (+0000) Subject: the convert() method now optimizes the case where all output X-Git-Tag: Imager-0.79~21 X-Git-Url: http://git.imager.perl.org/imager.git/commitdiff_plain/2a2c791feae353656d92993ca11a2d795fb0ac14 the convert() method now optimizes the case where all output channels are either 0, sourced from a single input channel or 1. This significantly speeds up presets like "addalpha", "green". https://rt.cpan.org/Ticket/Display.html?id=51254 --- diff --git a/Changes b/Changes index 8ded547f..c63250ca 100644 --- a/Changes +++ b/Changes @@ -6,6 +6,11 @@ Imager 0.79 - unreleased - add Imager::Test to the POD coverage tests and document the missing functions. + - the convert() method now optimizes the case where all output + channels are either 0, sourced from a single input channel or 1. + This significantly speeds up presets like "addalpha", "green". + https://rt.cpan.org/Ticket/Display.html?id=51254 + Bug fixes: - treat the co-efficients for convert() as doubles instead of floats. diff --git a/bench/convert.pl b/bench/convert.pl index 71e51f53..47a8ba85 100644 --- a/bench/convert.pl +++ b/bench/convert.pl @@ -49,8 +49,15 @@ __END__ Original: -addalpha: 0.9 /s (1.082000 / iter) -gray: 3.3 /s (0.303529 / iter) -green: 4.1 /s (0.244286 / iter) -noalpha: 1.1 /s (0.876667 / iter) +addalpha: 1.3 /s (0.797143 / iter) +gray: 4.3 /s (0.233636 / iter) +green: 4.9 /s (0.205600 / iter) +noalpha: 1.6 /s (0.608889 / iter) + +convert_via_copy: + +addalpha: 4.9 /s (0.205600 / iter) +gray: 4.2 /s (0.235909 / iter) +green: 8.6 /s (0.115682 / iter) +noalpha: 5.4 /s (0.185556 / iter) diff --git a/convert.im b/convert.im index ada2f266..fce2b2b1 100644 --- a/convert.im +++ b/convert.im @@ -19,6 +19,28 @@ converting from RGBA to greyscale and back. #include "imager.h" +struct chan_copy { + /* channels to copy */ + int copy_count; + int from[MAXCHANNELS]; + int to[MAXCHANNELS]; + + /* channels to zero */ + int zero_count; + int zero[MAXCHANNELS]; + + /* channels to set to maxsample */ + int one_count; + int one[MAXCHANNELS]; +}; + +static int +is_channel_copy(i_img *im, const double *coeff, + int outchan, int inchan, + struct chan_copy *info); + +static i_img * +convert_via_copy(i_img *im, i_img *src, struct chan_copy *info); /* =item i_convert(src, coeff, outchan, inchan) @@ -42,10 +64,10 @@ Now handles images with more than 8-bits/sample. i_img * i_convert(i_img *src, const double *coeff, int outchan, int inchan) { + double work[MAXCHANNELS]; int x, y; int i, j; int ilimit; - double work[MAXCHANNELS]; i_img *im = NULL; mm_log((1,"i_convert(src %p, coeff %p,outchan %d, inchan %d)\n",im,src, coeff,outchan, inchan)); @@ -61,37 +83,44 @@ i_convert(i_img *src, const double *coeff, int outchan, int inchan) { } if (src->type == i_direct_type) { + struct chan_copy info; im = i_sametype_chans(src, src->xsize, src->ysize, outchan); -#code src->bits <= i_8_bits - IM_COLOR *vals; - /* we can always allocate a single scanline of i_color */ - vals = mymalloc(sizeof(IM_COLOR) * src->xsize); /* checked 04Jul05 tonyc */ - for (y = 0; y < src->ysize; ++y) { - IM_GLIN(src, 0, src->xsize, y, vals); - for (x = 0; x < src->xsize; ++x) { - for (j = 0; j < outchan; ++j) { - work[j] = 0; - for (i = 0; i < ilimit; ++i) { - work[j] += coeff[i+inchan*j] * vals[x].channel[i]; + if (is_channel_copy(src, coeff, outchan, inchan, &info)) { + return convert_via_copy(im, src, &info); + } + else { +#code src->bits <= i_8_bits + IM_COLOR *vals; + + /* we can always allocate a single scanline of i_color */ + vals = mymalloc(sizeof(IM_COLOR) * src->xsize); /* checked 04Jul05 tonyc */ + for (y = 0; y < src->ysize; ++y) { + IM_GLIN(src, 0, src->xsize, y, vals); + for (x = 0; x < src->xsize; ++x) { + for (j = 0; j < outchan; ++j) { + work[j] = 0; + for (i = 0; i < ilimit; ++i) { + work[j] += coeff[i+inchan*j] * vals[x].channel[i]; + } + if (i < inchan) { + work[j] += coeff[i+inchan*j] * IM_SAMPLE_MAX; + } } - if (i < inchan) { - work[j] += coeff[i+inchan*j] * IM_SAMPLE_MAX; + for (j = 0; j < outchan; ++j) { + if (work[j] < 0) + vals[x].channel[j] = 0; + else if (work[j] >= IM_SAMPLE_MAX) + vals[x].channel[j] = IM_SAMPLE_MAX; + else + vals[x].channel[j] = work[j]; } } - for (j = 0; j < outchan; ++j) { - if (work[j] < 0) - vals[x].channel[j] = 0; - else if (work[j] >= IM_SAMPLE_MAX) - vals[x].channel[j] = IM_SAMPLE_MAX; - else - vals[x].channel[j] = work[j]; - } + IM_PLIN(im, 0, src->xsize, y, vals); } - IM_PLIN(im, 0, src->xsize, y, vals); - } - myfree(vals); + myfree(vals); #/code + } } else { int count; @@ -151,6 +180,139 @@ i_convert(i_img *src, const double *coeff, int outchan, int inchan) { return im; } +/* +=item is_channel_copy(coeff, outchan, inchan, chan_copy_info) + +Test if the coefficients represent just copying channels around, and +initialize lists of the channels to copy, zero or set to max. + +=cut +*/ + +static +int is_channel_copy(i_img *im, const double *coeff, int outchan, int inchan, + struct chan_copy *info) { + int srcchan[MAXCHANNELS]; + int onechan[MAXCHANNELS]; + int i, j; + int ilimit = im->channels > inchan ? inchan : im->channels; + + for (j = 0; j < outchan; ++j) { + srcchan[j] = -1; + onechan[j] = 0; + } + + for (j = 0; j < outchan; ++j) { + for (i = 0; i < ilimit; ++i) { + if (coeff[i+inchan*j] == 1.0) { + if (srcchan[j] != -1) { + /* from two or more channels, not a copy */ + return 0; + } + srcchan[j] = i; + } + else if (coeff[i+inchan*j]) { + /* some other non-zero value, not a copy */ + return 0; + } + } + if (i < inchan) { + if (coeff[i+inchan*j] == 1.0) { + if (srcchan[j] != -1) { + /* can't do both */ + return 0; + } + onechan[j] = 1; + } + else if (coeff[i+inchan*j]) { + /* some other non-zero value, not a copy */ + return 0; + } + } + } + + /* build our working data structures */ + info->copy_count = info->zero_count = info->one_count = 0; + for (j = 0; j < outchan; ++j) { + if (srcchan[j] != -1) { + info->from[info->copy_count] = srcchan[j]; + info->to[info->copy_count] = j; + ++info->copy_count; + } + else if (onechan[j]) { + info->one[info->one_count] = j; + ++info->one_count; + } + else { + info->zero[info->zero_count] = j; + ++info->zero_count; + } + } + +#if 0 + { + for (i = 0; i < info->copy_count; ++i) { + printf("From %d to %d\n", info->from[i], info->to[i]); + } + for (i = 0; i < info->one_count; ++i) { + printf("One %d\n", info->one[i]); + } + for (i = 0; i < info->zero_count; ++i) { + printf("Zero %d\n", info->zero[i]); + } + fflush(stdout); + } +#endif + + return 1; +} + +/* +=item convert_via_copy(im, src, chan_copy_info) + +Perform a convert that only requires channel copies. + +=cut +*/ + +i_img * +convert_via_copy(i_img *im, i_img *src, struct chan_copy *info) { +#code src->bits <= i_8_bits + IM_COLOR *in_line = mymalloc(sizeof(IM_COLOR) * src->xsize); + IM_COLOR *out_line = mymalloc(sizeof(IM_COLOR) * src->xsize); + i_img_dim x, y; + int i; + IM_COLOR *inp, *outp; + + for (y = 0; y < src->ysize; ++y) { + IM_GLIN(src, 0, src->xsize, y, in_line); + + inp = in_line; + outp = out_line; + for (x = 0; x < src->xsize; ++x) { + for (i = 0; i < info->copy_count; ++i) { + outp->channel[info->to[i]] = inp->channel[info->from[i]]; + } + for (i = 0; i < info->one_count; ++i) { + outp->channel[info->one[i]] = IM_SAMPLE_MAX; + } + for (i = 0; i < info->zero_count; ++i) { + outp->channel[info->zero[i]] = 0; + } + ++inp; + ++outp; + } + + IM_PLIN(im, 0, src->xsize, y, out_line); + } + + myfree(in_line); + myfree(out_line); +#/code + + return im; +} + /* =back