essay on programming languages, computer science, information techonlogies and all.

Thursday, January 18, 2018

Aritificial neural network tutorial with C++ and OpenCV

Here is a tutorial of ANN with C++ and OpenCV. This is based on the book 'An Introduction to Machine Learning' by Miroslav Kubat.

ANN consists of nodes, connection and bias. Nodes fires input signal nonlinearly. Connections are weights between nodes which increase or decrese signal. And there are bias that charaterize the network - what a biased comment on bias !


With inputs on the network, the output generated by going through the network - called forward propagation.

Mat ForwardPropagation(const Mat& X, const Mat& HN, const Mat& ON, const Mat& HB, const Mat& OB, Mat& H)
{
 auto sigmoid = [](double &v, const int* pos) {
  v = 1.0 / (1.0 + exp(-v)); 
 };

 H = HN * X;
 H.forEach([=](double &v, const int* pos) { v += HB.at(pos[0]); });
 H.forEach(sigmoid);
 Mat O = ON * H;
 O.forEach([=](double &v, const int* pos) { v += OB.at(pos[0]); });
 O.forEach(sigmoid);

 return O;
}


After forward propagation , network weight and bias can be adjusted by backward-propagation.

void BackwardPropagation(
 const Mat& X, Mat& HN, Mat& ON, Mat& HB, Mat& OB, const Mat& H, 
 const Mat& Y, const Mat& T, double nu )
{
 Mat D1 = Y.clone();
 D1.forEach([=](double &yi, const int* i) {
  double ti = T.at(i);
  yi = yi * (1 - yi) * (ti - yi);
 });

 Mat P = ON.t() * D1;
 Mat D2 = H.clone();
 D2.forEach([=](double &hi, const int*i) {
  double pi = P.at(i);
  hi = hi * (1 - hi) * pi;
 });

 for (int x = 0; x < H.cols; ++x)
 {
  ON += nu * D1.col(x) * H.col(x).t();
  OB += nu * D1.col(x);
  HN += nu * D2.col(x) * X.col(x).t();
  HB += nu * D2.col(x);
 }
}


To train, one of data at web can be used. Here http://archive.ics.uci.edu/ml/datasets/steel+plates+faults is the one used in here.

enum class Direction {
 Row = 0,
 Column = 1,
};

template< typename T> 
void Normalize(Mat& m, Direction dir = Direction::Row )
{
 int count = dir == Direction::Row ? m.rows : m.cols;
 auto getVector = [dir](Mat& m, int i) { return dir == Direction::Row ? m.row(i) : m.col(i); };
  
 for (int i = 0; i < count; ++i)
 {
  Mat X = getVector( m, i );
  auto r = minmax_element(X.begin(), X.end());
  T minv = *r.first, maxv = *r.second;

  X.forEach([=](T &x, const int* pos) { x = (T)(2*(x-minv) / (maxv-minv) - 1.0); });
 }
}

double AverageDistance(const Mat& E )
{
 double sum = 0.0;
 for (int j = 0; j < E.cols; ++j)
 {
  Mat ej = E.col(j);
  sum += sqrt(ej.dot(ej));
 }
 return sum / E.cols;
}


Mat D = ReadMat("SteelPlateFaults.txt", " \t");
D = D.rowRange(0, 340);

Mat X = D.colRange(4, 27).t();
Normalize(X);
Mat T = D.colRange(27, 29).t();

int M = T.rows, N = X.rows;
Mat HN(N + 1, N, CV_64FC1, Scalar(0.01));
Mat HB(N + 1, 1, CV_64FC1, Scalar(0.0));
Mat ON(M, N + 1, CV_64FC1, Scalar(0.01));
Mat OB(M, 1, CV_64FC1, Scalar(0.0));

Mat H, Y;
for (int i = 0; i < 100; ++i)
{
 Y = ForwardPropagation(X, HN, ON, HB, OB, H);
 BackwardPropagation(X, HN, ON, HB, OB, H, Y, T, 0.1);

 printf("iteration %d, Error %.3f\n", i, AverageDistance(T-Y));
}

printf( "Trained Error %.3f\n", AverageDistance(T - Y));

iteration 0, Error 0.708
iteration 1, Error 0.748
iteration 2, Error 0.814
…
iteration 98, Error 0.095
iteration 99, Error 0.090
Trained Error 0.090


There is Machine Learning module in OpenCV. Here above code can be simplified like below. The forward and backward propagation are done in the ANN_MLP::train(). ANN_MLP::predict() does forward propagation only.

int BestIndex(const Mat& x )
{
 auto mpi = max_element( x.begin(), x.end());
 return (int)distance( x.begin(), mpi);
}

using namespace cv::ml;

...
Ptr mlp = ANN_MLP::create();

Mat layer = (Mat_(3, 1) << X.cols, 60, T.cols);
mlp->setLayerSizes(layer);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);
mlp->setTermCriteria(TermCriteria( 
  TermCriteria::Type::COUNT + TermCriteria::Type::EPS, 100000, 0.01 ));
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);

Ptr trainingData = TrainData::create( X, SampleTypes::ROW_SAMPLE, T );
bool isTrained = mlp->train(trainingData);

int sum = 0;
for (int i = 0; i < X.rows; i++) {
 Mat result; mlp->predict( X.row(i), result);
 int pi = BestIndex(result);
 int ti = BestIndex(T.row(i));
 sum += (pi == ti ? 1 : 0);
}

printf("%d/%d matched : %.1f accuray\n", sum, T.rows, ((double)sum / T.rows) * 100.0);

335/340 matched : 98.5 accuray

Wednesday, January 10, 2018

Find translation between two images in sub pixel

To find a translation - x, y offset - between two images is called image registration - https://en.wikipedia.org/wiki/Image_registration. After finding out the translation, image difference can be applied to see if there is any defect on the object.

Here above are target images come from patterned glass - original image is around 1000 pixels in short dimension but are reduced to blur any specific feature.

There can be number of methods but in here, optimization - coordinate search - is tried and here is related code snippet. https://en.wikipedia.org/wiki/Coordinate_descent. Code is based on the opencv Mat class. The usage of unit matrix u and integer variable i and j are beautiful. Refer the comment around the usage.


 void Optimize(InputArray x0, OutputArray xn)
 {
  int maxIter = GetValue("MaxIteration");
  double delta = GetValue("Delta");
  double eps = GetValue("TerminalEpsilon");

  Mat xk = x0.getMat().t();
  int N = xk.rows;
  Mat u = Mat::eye(N, N, CV_64FC1);
  hconcat(u, -u, u);  // for each dimension, search in positive and negative direction
  int i = 0;
  double fxk = m_Callback->Function(xk);

  int iter = 0;
  while (iter++ < maxIter)
  {
   int j = 0;
   for (; j < u.cols; ++j)
   {
    i = (i + j) % u.cols;
    Mat xki = xk + u.col(i) * delta;
    double fxki = m_Callback->Function(xki);  // user says score of the move

    if (fxki < fxk)  
    {
     xk = xki;
     fxk = fxki;
     break;  // if found better score, try next dimension
    }
   }

   if (j == u.cols)
   {
    delta /= 2;   // make small delta as there is no better score found with current delta 
   }

   if (delta < eps)
    break;
  }

  xn.create(xk.rows, xk.cols, xk.type());
  xk.copyTo(xn);

  SetValue("IterationCount", iter);
  SetValue("Minimum", fxk);
 }



To say the score of each move, sub-pixel image difference can be calculated; If two image are matched, the difference will be near zero. If not, it will go up.
Going through every pixel of image and calculate sub-pixel difference is an expensive operation. Here is code snippet that does the comparison in SIMD using intrinsic code. Every loop, 8 pixels are differenced in fixed-point arithmatic.


struct TranslationInSub
{
 int NX, NY;
 double FX, FY;

 static TranslationInSub FromPoint(const cv::Point2d& t)
 {
  TranslationInSub r = { (int)floor(t.x), (int)floor(t.y), 0, 0 };
  r.FX = t.x - r.NX;
  r.FY = t.y - r.NY;
  return r;
 }
};

template< typename T >
struct WeightCompass
{
 T NW, NE, SW, SE;

 static WeightCompass< T > FromTranslation(const TranslationInSub& t, double scale )
 {
  WeightCompass< T > r =
  {
   (T)boost::math::round((1.0 - t.FX)*(1.0 - t.FY) * scale),
   (T)boost::math::round(t.FX*(1.0 - t.FY) * scale),
   (T)boost::math::round((1.0 - t.FX)*t.FY * scale),
   (T)boost::math::round(t.FX*t.FY * scale)
  };

  return r;
 }
};

struct WeightCompassSMM
{
 __m128i NW, NE, SW, SE;

 static WeightCompassSMM FromWeightCompass( const WeightCompass< uint16_t >& f)
 {
  WeightCompassSMM  r;
  
  uint16_t fNWSE[8];
  fill(fNWSE, fNWSE + 8, f.NW);
  r.NW = _mm_loadu_si128((__m128i *)fNWSE);
  fill(fNWSE, fNWSE + 8, f.NE);
  r.NE = _mm_loadu_si128((__m128i *)fNWSE);
  fill(fNWSE, fNWSE + 8, f.SW);
  r.SW = _mm_loadu_si128((__m128i *)fNWSE);
  fill(fNWSE, fNWSE + 8, f.SE);
  r.SE = _mm_loadu_si128((__m128i *)fNWSE);

  return r;
 }
};

int Details::DiffSumBySIMD( const cv::Mat& ref, const cv::Mat& tar, cv::Rect roi, cv::Point2d t)
{
 const uint16_t SCALE = 0x40; // scale to convert 8 bits gray level to 16bit fixed point float  

 TranslationInSub tN = TranslationInSub::FromPoint(t);
 const WeightCompass< uint16_t > fN = WeightCompass< uint16_t >::FromTranslation(tN, SCALE);
 const WeightCompassSMM mfN = WeightCompassSMM::FromWeightCompass(fN);

 int sum = 0;
 const __m128i mZero = _mm_setzero_si128();

 uint16_t ps[8];

 for (int y = roi.y; y < (roi.y + roi.height); y++)
 {
  const uint8_t* PC_C = ref.ptr(y, roi.x);  // reference image 
  const uint8_t* PN_NW = tar.ptr(y + tN.NY, roi.x + tN.NX);  // target image at translation offset - integer

  for (int x = roi.x; x < (roi.x+roi.width); x += 8, PC_C += 8, PN_NW += 8)
  {
   __m128i mC = _mm_loadu_si128((__m128i *)PC_C);  
   mC = _mm_unpacklo_epi8(mC, mZero);
   mC = _mm_slli_epi16(mC, 6);   // load 8 pixels as 16 bits fixed point float

   __m128i mPN = mZero;
   __m128i mP = _mm_loadu_si128((__m128i *)PN_NW);   // North West
   mP = _mm_unpacklo_epi8(mP, mZero);
   mP = _mm_mullo_epi16(mP, mfN.NW);
   mPN = _mm_adds_epu16(mPN, mP);

   mP = _mm_loadu_si128((__m128i *)(PN_NW + 1));    // North East
   mP = _mm_unpacklo_epi8(mP, mZero);
   mP = _mm_mullo_epi16(mP, mfN.NE);
   mPN = _mm_adds_epu16(mPN, mP);

   mP = _mm_loadu_si128((__m128i *)(PN_NW + tar.cols));  // South West
   mP = _mm_unpacklo_epi8(mP, mZero);
   mP = _mm_mullo_epi16(mP, mfN.SW);
   mPN = _mm_adds_epu16(mPN, mP);

   mP = _mm_loadu_si128((__m128i *)(PN_NW + tar.cols + 1));  // South East
   mP = _mm_unpacklo_epi8(mP, mZero);
   mP = _mm_mullo_epi16(mP, mfN.SE);
   mPN = _mm_adds_epu16(mPN, mP);

   __m128i mDiff = _mm_abs_epi16(_mm_subs_epi16(mC, mPN));
   mDiff = _mm_srai_epi16(mDiff, 6);

   __m128i mPartialSum = _mm_sad_epu8(mDiff, mZero);
   _mm_storel_epi64((__m128i *)(ps), mPartialSum);

   sum += (ps[0] + ps[3]);
  }
 }

 return sum;
}



Above methodology were at a proposal on a reserch project on some company but didn't launched up so just open in here.