Here above are target images come from patterned glass - original image is around 1000 pixels in short dimension but are reduced to blur any specific feature.
There can be number of methods but in here, optimization - coordinate search - is tried and here is related code snippet. https://en.wikipedia.org/wiki/Coordinate_descent. Code is based on the opencv Mat class. The usage of unit matrix u and integer variable i and j are beautiful. Refer the comment around the usage.
void Optimize(InputArray x0, OutputArray xn)
{
int maxIter = GetValue("MaxIteration");
double delta = GetValue("Delta");
double eps = GetValue("TerminalEpsilon");
Mat xk = x0.getMat().t();
int N = xk.rows;
Mat u = Mat::eye(N, N, CV_64FC1);
hconcat(u, -u, u); // for each dimension, search in positive and negative direction
int i = 0;
double fxk = m_Callback->Function(xk);
int iter = 0;
while (iter++ < maxIter)
{
int j = 0;
for (; j < u.cols; ++j)
{
i = (i + j) % u.cols;
Mat xki = xk + u.col(i) * delta;
double fxki = m_Callback->Function(xki); // user says score of the move
if (fxki < fxk)
{
xk = xki;
fxk = fxki;
break; // if found better score, try next dimension
}
}
if (j == u.cols)
{
delta /= 2; // make small delta as there is no better score found with current delta
}
if (delta < eps)
break;
}
xn.create(xk.rows, xk.cols, xk.type());
xk.copyTo(xn);
SetValue("IterationCount", iter);
SetValue("Minimum", fxk);
}
To say the score of each move, sub-pixel image difference can be calculated; If two image are matched, the difference will be near zero. If not, it will go up.
Going through every pixel of image and calculate sub-pixel difference is an expensive operation. Here is code snippet that does the comparison in SIMD using intrinsic code. Every loop, 8 pixels are differenced in fixed-point arithmatic.
struct TranslationInSub
{
int NX, NY;
double FX, FY;
static TranslationInSub FromPoint(const cv::Point2d& t)
{
TranslationInSub r = { (int)floor(t.x), (int)floor(t.y), 0, 0 };
r.FX = t.x - r.NX;
r.FY = t.y - r.NY;
return r;
}
};
template< typename T >
struct WeightCompass
{
T NW, NE, SW, SE;
static WeightCompass< T > FromTranslation(const TranslationInSub& t, double scale )
{
WeightCompass< T > r =
{
(T)boost::math::round((1.0 - t.FX)*(1.0 - t.FY) * scale),
(T)boost::math::round(t.FX*(1.0 - t.FY) * scale),
(T)boost::math::round((1.0 - t.FX)*t.FY * scale),
(T)boost::math::round(t.FX*t.FY * scale)
};
return r;
}
};
struct WeightCompassSMM
{
__m128i NW, NE, SW, SE;
static WeightCompassSMM FromWeightCompass( const WeightCompass< uint16_t >& f)
{
WeightCompassSMM r;
uint16_t fNWSE[8];
fill(fNWSE, fNWSE + 8, f.NW);
r.NW = _mm_loadu_si128((__m128i *)fNWSE);
fill(fNWSE, fNWSE + 8, f.NE);
r.NE = _mm_loadu_si128((__m128i *)fNWSE);
fill(fNWSE, fNWSE + 8, f.SW);
r.SW = _mm_loadu_si128((__m128i *)fNWSE);
fill(fNWSE, fNWSE + 8, f.SE);
r.SE = _mm_loadu_si128((__m128i *)fNWSE);
return r;
}
};
int Details::DiffSumBySIMD( const cv::Mat& ref, const cv::Mat& tar, cv::Rect roi, cv::Point2d t)
{
const uint16_t SCALE = 0x40; // scale to convert 8 bits gray level to 16bit fixed point float
TranslationInSub tN = TranslationInSub::FromPoint(t);
const WeightCompass< uint16_t > fN = WeightCompass< uint16_t >::FromTranslation(tN, SCALE);
const WeightCompassSMM mfN = WeightCompassSMM::FromWeightCompass(fN);
int sum = 0;
const __m128i mZero = _mm_setzero_si128();
uint16_t ps[8];
for (int y = roi.y; y < (roi.y + roi.height); y++)
{
const uint8_t* PC_C = ref.ptr(y, roi.x); // reference image
const uint8_t* PN_NW = tar.ptr(y + tN.NY, roi.x + tN.NX); // target image at translation offset - integer
for (int x = roi.x; x < (roi.x+roi.width); x += 8, PC_C += 8, PN_NW += 8)
{
__m128i mC = _mm_loadu_si128((__m128i *)PC_C);
mC = _mm_unpacklo_epi8(mC, mZero);
mC = _mm_slli_epi16(mC, 6); // load 8 pixels as 16 bits fixed point float
__m128i mPN = mZero;
__m128i mP = _mm_loadu_si128((__m128i *)PN_NW); // North West
mP = _mm_unpacklo_epi8(mP, mZero);
mP = _mm_mullo_epi16(mP, mfN.NW);
mPN = _mm_adds_epu16(mPN, mP);
mP = _mm_loadu_si128((__m128i *)(PN_NW + 1)); // North East
mP = _mm_unpacklo_epi8(mP, mZero);
mP = _mm_mullo_epi16(mP, mfN.NE);
mPN = _mm_adds_epu16(mPN, mP);
mP = _mm_loadu_si128((__m128i *)(PN_NW + tar.cols)); // South West
mP = _mm_unpacklo_epi8(mP, mZero);
mP = _mm_mullo_epi16(mP, mfN.SW);
mPN = _mm_adds_epu16(mPN, mP);
mP = _mm_loadu_si128((__m128i *)(PN_NW + tar.cols + 1)); // South East
mP = _mm_unpacklo_epi8(mP, mZero);
mP = _mm_mullo_epi16(mP, mfN.SE);
mPN = _mm_adds_epu16(mPN, mP);
__m128i mDiff = _mm_abs_epi16(_mm_subs_epi16(mC, mPN));
mDiff = _mm_srai_epi16(mDiff, 6);
__m128i mPartialSum = _mm_sad_epu8(mDiff, mZero);
_mm_storel_epi64((__m128i *)(ps), mPartialSum);
sum += (ps[0] + ps[3]);
}
}
return sum;
}
Above methodology were at a proposal on a reserch project on some company but didn't launched up so just open in here.
No comments:
Post a Comment