essay on programming languages, computer science, information techonlogies and all.

Wednesday, January 10, 2018

Find translation between two images in sub pixel

To find a translation - x, y offset - between two images is called image registration - https://en.wikipedia.org/wiki/Image_registration. After finding out the translation, image difference can be applied to see if there is any defect on the object.

Here above are target images come from patterned glass - original image is around 1000 pixels in short dimension but are reduced to blur any specific feature.

There can be number of methods but in here, optimization - coordinate search - is tried and here is related code snippet. https://en.wikipedia.org/wiki/Coordinate_descent. Code is based on the opencv Mat class. The usage of unit matrix u and integer variable i and j are beautiful. Refer the comment around the usage.


 void Optimize(InputArray x0, OutputArray xn)
 {
  int maxIter = GetValue("MaxIteration");
  double delta = GetValue("Delta");
  double eps = GetValue("TerminalEpsilon");

  Mat xk = x0.getMat().t();
  int N = xk.rows;
  Mat u = Mat::eye(N, N, CV_64FC1);
  hconcat(u, -u, u);  // for each dimension, search in positive and negative direction
  int i = 0;
  double fxk = m_Callback->Function(xk);

  int iter = 0;
  while (iter++ < maxIter)
  {
   int j = 0;
   for (; j < u.cols; ++j)
   {
    i = (i + j) % u.cols;
    Mat xki = xk + u.col(i) * delta;
    double fxki = m_Callback->Function(xki);  // user says score of the move

    if (fxki < fxk)  
    {
     xk = xki;
     fxk = fxki;
     break;  // if found better score, try next dimension
    }
   }

   if (j == u.cols)
   {
    delta /= 2;   // make small delta as there is no better score found with current delta 
   }

   if (delta < eps)
    break;
  }

  xn.create(xk.rows, xk.cols, xk.type());
  xk.copyTo(xn);

  SetValue("IterationCount", iter);
  SetValue("Minimum", fxk);
 }



To say the score of each move, sub-pixel image difference can be calculated; If two image are matched, the difference will be near zero. If not, it will go up.
Going through every pixel of image and calculate sub-pixel difference is an expensive operation. Here is code snippet that does the comparison in SIMD using intrinsic code. Every loop, 8 pixels are differenced in fixed-point arithmatic.


struct TranslationInSub
{
 int NX, NY;
 double FX, FY;

 static TranslationInSub FromPoint(const cv::Point2d& t)
 {
  TranslationInSub r = { (int)floor(t.x), (int)floor(t.y), 0, 0 };
  r.FX = t.x - r.NX;
  r.FY = t.y - r.NY;
  return r;
 }
};

template< typename T >
struct WeightCompass
{
 T NW, NE, SW, SE;

 static WeightCompass< T > FromTranslation(const TranslationInSub& t, double scale )
 {
  WeightCompass< T > r =
  {
   (T)boost::math::round((1.0 - t.FX)*(1.0 - t.FY) * scale),
   (T)boost::math::round(t.FX*(1.0 - t.FY) * scale),
   (T)boost::math::round((1.0 - t.FX)*t.FY * scale),
   (T)boost::math::round(t.FX*t.FY * scale)
  };

  return r;
 }
};

struct WeightCompassSMM
{
 __m128i NW, NE, SW, SE;

 static WeightCompassSMM FromWeightCompass( const WeightCompass< uint16_t >& f)
 {
  WeightCompassSMM  r;
  
  uint16_t fNWSE[8];
  fill(fNWSE, fNWSE + 8, f.NW);
  r.NW = _mm_loadu_si128((__m128i *)fNWSE);
  fill(fNWSE, fNWSE + 8, f.NE);
  r.NE = _mm_loadu_si128((__m128i *)fNWSE);
  fill(fNWSE, fNWSE + 8, f.SW);
  r.SW = _mm_loadu_si128((__m128i *)fNWSE);
  fill(fNWSE, fNWSE + 8, f.SE);
  r.SE = _mm_loadu_si128((__m128i *)fNWSE);

  return r;
 }
};

int Details::DiffSumBySIMD( const cv::Mat& ref, const cv::Mat& tar, cv::Rect roi, cv::Point2d t)
{
 const uint16_t SCALE = 0x40; // scale to convert 8 bits gray level to 16bit fixed point float  

 TranslationInSub tN = TranslationInSub::FromPoint(t);
 const WeightCompass< uint16_t > fN = WeightCompass< uint16_t >::FromTranslation(tN, SCALE);
 const WeightCompassSMM mfN = WeightCompassSMM::FromWeightCompass(fN);

 int sum = 0;
 const __m128i mZero = _mm_setzero_si128();

 uint16_t ps[8];

 for (int y = roi.y; y < (roi.y + roi.height); y++)
 {
  const uint8_t* PC_C = ref.ptr(y, roi.x);  // reference image 
  const uint8_t* PN_NW = tar.ptr(y + tN.NY, roi.x + tN.NX);  // target image at translation offset - integer

  for (int x = roi.x; x < (roi.x+roi.width); x += 8, PC_C += 8, PN_NW += 8)
  {
   __m128i mC = _mm_loadu_si128((__m128i *)PC_C);  
   mC = _mm_unpacklo_epi8(mC, mZero);
   mC = _mm_slli_epi16(mC, 6);   // load 8 pixels as 16 bits fixed point float

   __m128i mPN = mZero;
   __m128i mP = _mm_loadu_si128((__m128i *)PN_NW);   // North West
   mP = _mm_unpacklo_epi8(mP, mZero);
   mP = _mm_mullo_epi16(mP, mfN.NW);
   mPN = _mm_adds_epu16(mPN, mP);

   mP = _mm_loadu_si128((__m128i *)(PN_NW + 1));    // North East
   mP = _mm_unpacklo_epi8(mP, mZero);
   mP = _mm_mullo_epi16(mP, mfN.NE);
   mPN = _mm_adds_epu16(mPN, mP);

   mP = _mm_loadu_si128((__m128i *)(PN_NW + tar.cols));  // South West
   mP = _mm_unpacklo_epi8(mP, mZero);
   mP = _mm_mullo_epi16(mP, mfN.SW);
   mPN = _mm_adds_epu16(mPN, mP);

   mP = _mm_loadu_si128((__m128i *)(PN_NW + tar.cols + 1));  // South East
   mP = _mm_unpacklo_epi8(mP, mZero);
   mP = _mm_mullo_epi16(mP, mfN.SE);
   mPN = _mm_adds_epu16(mPN, mP);

   __m128i mDiff = _mm_abs_epi16(_mm_subs_epi16(mC, mPN));
   mDiff = _mm_srai_epi16(mDiff, 6);

   __m128i mPartialSum = _mm_sad_epu8(mDiff, mZero);
   _mm_storel_epi64((__m128i *)(ps), mPartialSum);

   sum += (ps[0] + ps[3]);
  }
 }

 return sum;
}



Above methodology were at a proposal on a reserch project on some company but didn't launched up so just open in here.

No comments: