From: Tony Cook <tony@develop=help.com>
Date: Sun, 24 Oct 2010 00:52:33 +0000 (+0000)
Subject: the convert() method now optimizes the case where all output
X-Git-Tag: Imager-0.79~21
X-Git-Url: http://git.imager.perl.org/imager.git/commitdiff_plain/2a2c791feae353656d92993ca11a2d795fb0ac14

the convert() method now optimizes the case where all output
   channels are either 0, sourced from a single input channel or 1.
   This significantly speeds up presets like "addalpha", "green".
   https://rt.cpan.org/Ticket/Display.html?id=51254
---

diff --git a/Changes b/Changes
index 8ded547f..c63250ca 100644
--- a/Changes
+++ b/Changes
@@ -6,6 +6,11 @@ Imager 0.79 - unreleased
  - add Imager::Test to the POD coverage tests and document the missing
    functions.
 
+ - the convert() method now optimizes the case where all output
+   channels are either 0, sourced from a single input channel or 1.
+   This significantly speeds up presets like "addalpha", "green".
+   https://rt.cpan.org/Ticket/Display.html?id=51254
+
 Bug fixes:
 
  - treat the co-efficients for convert() as doubles instead of floats.
diff --git a/bench/convert.pl b/bench/convert.pl
index 71e51f53..47a8ba85 100644
--- a/bench/convert.pl
+++ b/bench/convert.pl
@@ -49,8 +49,15 @@ __END__
 
 Original:
 
-addalpha: 0.9 /s (1.082000 / iter)
-gray: 3.3 /s (0.303529 / iter)
-green: 4.1 /s (0.244286 / iter)
-noalpha: 1.1 /s (0.876667 / iter)
+addalpha: 1.3 /s (0.797143 / iter)
+gray: 4.3 /s (0.233636 / iter)
+green: 4.9 /s (0.205600 / iter)
+noalpha: 1.6 /s (0.608889 / iter)
+
+convert_via_copy:
+
+addalpha: 4.9 /s (0.205600 / iter)
+gray: 4.2 /s (0.235909 / iter)
+green: 8.6 /s (0.115682 / iter)
+noalpha: 5.4 /s (0.185556 / iter)
 
diff --git a/convert.im b/convert.im
index ada2f266..fce2b2b1 100644
--- a/convert.im
+++ b/convert.im
@@ -19,6 +19,28 @@ converting from RGBA to greyscale and back.
 
 #include "imager.h"
 
+struct chan_copy {
+  /* channels to copy */
+  int copy_count;
+  int from[MAXCHANNELS];
+  int to[MAXCHANNELS];
+
+  /* channels to zero */
+  int zero_count;
+  int zero[MAXCHANNELS];
+
+  /* channels to set to maxsample */
+  int one_count;
+  int one[MAXCHANNELS];
+};
+
+static int 
+is_channel_copy(i_img *im, const double *coeff, 
+		int outchan, int inchan, 
+		struct chan_copy *info);
+
+static i_img *
+convert_via_copy(i_img *im, i_img *src, struct chan_copy *info);
 
 /*
 =item i_convert(src, coeff, outchan, inchan)
@@ -42,10 +64,10 @@ Now handles images with more than 8-bits/sample.
 
 i_img *
 i_convert(i_img *src, const double *coeff, int outchan, int inchan) {
+  double work[MAXCHANNELS];
   int x, y;
   int i, j;
   int ilimit;
-  double work[MAXCHANNELS];
   i_img *im = NULL;
 
   mm_log((1,"i_convert(src %p, coeff %p,outchan %d, inchan %d)\n",im,src, coeff,outchan, inchan));
@@ -61,37 +83,44 @@ i_convert(i_img *src, const double *coeff, int outchan, int inchan) {
   }
 
   if (src->type == i_direct_type) {
+    struct chan_copy info;
     im = i_sametype_chans(src, src->xsize, src->ysize, outchan);
-#code src->bits <= i_8_bits
-    IM_COLOR *vals;
     
-    /* we can always allocate a single scanline of i_color */
-    vals = mymalloc(sizeof(IM_COLOR) * src->xsize); /* checked 04Jul05 tonyc */
-    for (y = 0; y < src->ysize; ++y) {
-      IM_GLIN(src, 0, src->xsize, y, vals);
-      for (x = 0; x < src->xsize; ++x) {
-	for (j = 0; j < outchan; ++j) {
-	  work[j] = 0;
-	  for (i = 0; i < ilimit; ++i) {
-	    work[j] += coeff[i+inchan*j] * vals[x].channel[i];
+    if (is_channel_copy(src, coeff, outchan, inchan, &info)) {
+      return convert_via_copy(im, src, &info);
+    }
+    else {
+#code src->bits <= i_8_bits
+      IM_COLOR *vals;
+      
+      /* we can always allocate a single scanline of i_color */
+      vals = mymalloc(sizeof(IM_COLOR) * src->xsize); /* checked 04Jul05 tonyc */
+      for (y = 0; y < src->ysize; ++y) {
+	IM_GLIN(src, 0, src->xsize, y, vals);
+	for (x = 0; x < src->xsize; ++x) {
+	  for (j = 0; j < outchan; ++j) {
+	    work[j] = 0;
+	    for (i = 0; i < ilimit; ++i) {
+	      work[j] += coeff[i+inchan*j] * vals[x].channel[i];
+	    }
+	    if (i < inchan) {
+	      work[j] += coeff[i+inchan*j] * IM_SAMPLE_MAX;
+	    }
 	  }
-	  if (i < inchan) {
-	    work[j] += coeff[i+inchan*j] * IM_SAMPLE_MAX;
+	  for (j = 0; j < outchan; ++j) {
+	    if (work[j] < 0)
+	      vals[x].channel[j] = 0;
+	    else if (work[j] >= IM_SAMPLE_MAX)
+	      vals[x].channel[j] = IM_SAMPLE_MAX;
+	    else
+	      vals[x].channel[j] = work[j];
 	  }
 	}
-	for (j = 0; j < outchan; ++j) {
-	  if (work[j] < 0)
-	    vals[x].channel[j] = 0;
-	  else if (work[j] >= IM_SAMPLE_MAX)
-	    vals[x].channel[j] = IM_SAMPLE_MAX;
-	  else
-	    vals[x].channel[j] = work[j];
-	}
+	IM_PLIN(im, 0, src->xsize, y, vals);
       }
-      IM_PLIN(im, 0, src->xsize, y, vals);
-    }
-    myfree(vals);
+      myfree(vals);
 #/code
+    }
   }
   else {
     int count;
@@ -151,6 +180,139 @@ i_convert(i_img *src, const double *coeff, int outchan, int inchan) {
   return im;
 }
 
+/*
+=item is_channel_copy(coeff, outchan, inchan, chan_copy_info)
+
+Test if the coefficients represent just copying channels around, and
+initialize lists of the channels to copy, zero or set to max.
+
+=cut
+*/
+
+static
+int is_channel_copy(i_img *im, const double *coeff, int outchan, int inchan, 
+		    struct chan_copy *info) {
+  int srcchan[MAXCHANNELS];
+  int onechan[MAXCHANNELS];
+  int i, j;
+  int ilimit = im->channels > inchan ? inchan : im->channels;
+
+  for (j = 0; j < outchan; ++j) {
+    srcchan[j] = -1;
+    onechan[j] = 0;
+  }
+
+  for (j = 0; j < outchan; ++j) {
+    for (i = 0; i < ilimit; ++i) {
+      if (coeff[i+inchan*j] == 1.0) {
+	if (srcchan[j] != -1) {
+	  /* from two or more channels, not a copy */
+	  return 0;
+	}
+	srcchan[j] = i;
+      }
+      else if (coeff[i+inchan*j]) {
+	/* some other non-zero value, not a copy */
+	return 0;
+      }
+    }
+    if (i < inchan) {
+      if (coeff[i+inchan*j] == 1.0) {
+	if (srcchan[j] != -1) {
+	  /* can't do both */
+	  return 0;
+	}
+	onechan[j] = 1;
+      }
+      else if (coeff[i+inchan*j]) {
+	/* some other non-zero value, not a copy */
+	return 0;
+      }
+    }
+  }
+
+  /* build our working data structures */
+  info->copy_count = info->zero_count = info->one_count = 0;
+  for (j = 0; j < outchan; ++j) {
+    if (srcchan[j] != -1) {
+      info->from[info->copy_count] = srcchan[j];
+      info->to[info->copy_count] = j;
+      ++info->copy_count;
+    }
+    else if (onechan[j]) {
+      info->one[info->one_count] = j;
+      ++info->one_count;
+    }
+    else {
+      info->zero[info->zero_count] = j;
+      ++info->zero_count;
+    }
+  }
+
+#if 0
+  {
+    for (i = 0; i < info->copy_count; ++i) {
+      printf("From %d to %d\n", info->from[i], info->to[i]);
+    }
+    for (i = 0; i < info->one_count; ++i) {
+      printf("One %d\n", info->one[i]);
+    }
+    for (i = 0; i < info->zero_count; ++i) {
+      printf("Zero %d\n", info->zero[i]);
+    }
+    fflush(stdout);
+  }
+#endif
+
+  return 1;
+}
+
+/*
+=item convert_via_copy(im, src, chan_copy_info)
+
+Perform a convert that only requires channel copies.
+
+=cut
+*/
+
+i_img *
+convert_via_copy(i_img *im, i_img *src, struct chan_copy *info) {
+#code src->bits <= i_8_bits
+  IM_COLOR *in_line = mymalloc(sizeof(IM_COLOR) * src->xsize);
+  IM_COLOR *out_line = mymalloc(sizeof(IM_COLOR) * src->xsize);
+  i_img_dim x, y;
+  int i;
+  IM_COLOR *inp, *outp;
+
+  for (y = 0; y < src->ysize; ++y) {
+    IM_GLIN(src, 0, src->xsize, y, in_line);
+
+    inp = in_line;
+    outp = out_line;
+    for (x = 0; x < src->xsize; ++x) {
+      for (i = 0; i < info->copy_count; ++i) {
+	outp->channel[info->to[i]] = inp->channel[info->from[i]];
+      }
+      for (i = 0; i < info->one_count; ++i) {
+	outp->channel[info->one[i]] = IM_SAMPLE_MAX;
+      }
+      for (i = 0; i < info->zero_count; ++i) {
+	outp->channel[info->zero[i]] = 0;
+      }
+      ++inp;
+      ++outp;
+    }
+    
+    IM_PLIN(im, 0, src->xsize, y, out_line);
+  }
+  
+  myfree(in_line);
+  myfree(out_line);
+#/code
+      
+  return im;
+}
+
 /*
 =back