CxImage bicubic插值算法优化
CxImage是个好东西,开源,支持图片种类多,操作花样多,如果非要说缺点的话,那估计只有1个,效率低下,尤其某些图形处理算法效率更低,无任何优化。
这里说到的是CxImage的bicubic插值算法的优化。
Bicubic插值,对与目标图形的某个点,需要其源图像对应点周围的16个点参与计算,因此计算量很大,并且还是浮点运算。
就优化而言,有多种途径,浮点转定点,浮点数转整数,sse等。这里说到的优化方法是浮点转定点,浮点数转整数。
基本思想是,用整数近视替代浮点数,精度是1/256,即将任一浮点数向精度1/256的整数倍靠近,之后计算bicubic函数的权重数据,存到表格里。bicubic插值时查表即可。
CxImage的原bicubic插值函数片段是
// float f_x, f_y, a, b, rr, gg, bb, r1, r2; // int32_t i_x, i_y, xx, yy; // RGBQUAD rgb; // uint8_t* iDst; // for(int32_t y=0; y<newy; y++){ // info.nProgress = (int32_t)(100*y/newy); // if (info.nEscape) break; // f_y = (float) y * yScale - 0.5f; // i_y = (int32_t) floor(f_y); // a = f_y - (float)floor(f_y); // for(int32_t x=0; x<newx; x++){ // f_x = (float) x * xScale - 0.5f; // i_x = (int32_t) floor(f_x); // b = f_x - (float)floor(f_x); // // rr = gg = bb = 0.0f; // for(int32_t m=-1; m<3; m++) { // r1 = KernelBSpline((float) m - a); // yy = i_y+m; // if (yy<0) yy=0; // if (yy>=head.biHeight) yy = head.biHeight-1; // for(int32_t n=-1; n<3; n++) { // r2 = r1 * KernelBSpline(b - (float)n); // xx = i_x+n; // if (xx<0) xx=0; // if (xx>=head.biWidth) xx=head.biWidth-1; // // if (head.biClrUsed){ // rgb = GetPixelColor(xx,yy); // } else { // iDst = info.pImage + yy*info.dwEffWidth + xx*3; // rgb.rgbBlue = *iDst++; // rgb.rgbGreen= *iDst++; // rgb.rgbRed = *iDst; // } // // rr += rgb.rgbRed * r2; // gg += rgb.rgbGreen * r2; // bb += rgb.rgbBlue * r2; // } // } // // if (head.biClrUsed) // newImage.SetPixelColor(x,y,RGB(rr,gg,bb)); // else { // iDst = newImage.info.pImage + y*newImage.info.dwEffWidth + x*3; // *iDst++ = (uint8_t)bb; // *iDst++ = (uint8_t)gg; // *iDst = (uint8_t)rr; // } // // } // } // break;
以下是经过初步优化的代码,其中s_BicubicTblX,s_BicubicTblY是两个预先计算好的权重表
//float f_x, f_y, a, b, rr, gg, bb, r1, r2; int32_t i_x, i_y, xx, yy, a, b, r_x, r_y; int32_t r1,r2; int32_t rr,gg,bb; uint8_t r_r,g_g,b_b; RGBQUAD rgb; uint8_t* iDst; uint8_t* iSrc; r_x = (int32_t)((float)(head.biWidth << 8)/((float)newx)+0.5); r_y = (int32_t)((float)(head.biHeight << 8)/((float)newy)+0.5); for(int32_t y=0; y<newy; y++){ info.nProgress = (int32_t)(100*y/newy); if (info.nEscape) break; i_y = (y*r_y) >> 8; a = (uint8_t)(y*r_y); for(int32_t x=0; x<newx; x++){ i_x = (x*r_x) >> 8; b = (uint8_t)(x*r_x); rr = gg = bb = 0; for(int32_t m=0; m<4; m++) { r1 = s_BicubicTblY[m][a]; yy = i_y+m; if (yy<0) yy=0; if (yy>=head.biHeight) yy = head.biHeight-1; for(int32_t n=0; n<4; n++) { r2 = s_BicubicTblX[n][b]; xx = i_x+n; if (xx<0) xx=0; if (xx>=head.biWidth) xx=head.biWidth-1; if (head.biClrUsed){ rgb = GetPixelColor(xx,yy); } else { iSrc = info.pImage + yy*info.dwEffWidth + xx*3; //memcpy(&rgb,iSrc,3); rgb.rgbBlue = *iSrc++; rgb.rgbGreen= *iSrc++; rgb.rgbRed = *iSrc; } rr += rgb.rgbRed * r1 * r2; gg += rgb.rgbGreen * r1 * r2; bb += rgb.rgbBlue * r1 * r2; } } bb = ((bb<=0)-1) & bb; gg = ((gg<=0)-1) & gg; rr = ((rr<=0)-1) & rr; if (head.biClrUsed) newImage.SetPixelColor(x,y,RGB(rr,gg,bb)); else { iDst = newImage.info.pImage + y*newImage.info.dwEffWidth + x*3; b_b = ((bb & 0xFFFF0000) >= 0xFF0000) ? (0xFF) : ((uint8_t)(bb>>16)); g_g = ((gg & 0xFFFF0000) >= 0xFF0000) ? (0xFF) : ((uint8_t)(gg>>16)); r_r = ((rr & 0xFFFF0000) >= 0xFF0000) ? (0xFF) : ((uint8_t)(rr>>16)); *iDst++ = (uint8_t)b_b; *iDst++ = (uint8_t)g_g; *iDst = (uint8_t)r_r; } } } break;
优化后,速度提升大约1倍左右,对SSE不熟,不知是否还有利用SSE优化的空间
最新评论