ANN consists of nodes, connection and bias. Nodes fires input signal nonlinearly. Connections are weights between nodes which increase or decrese signal. And there are bias that charaterize the network - what a biased comment on bias !
With inputs on the network, the output generated by going through the network - called forward propagation.
Mat ForwardPropagation(const Mat& X, const Mat& HN, const Mat& ON, const Mat& HB, const Mat& OB, Mat& H)
{
auto sigmoid = [](double &v, const int* pos) {
v = 1.0 / (1.0 + exp(-v));
};
H = HN * X;
H.forEach([=](double &v, const int* pos) { v += HB.at(pos[0]); });
H.forEach(sigmoid);
Mat O = ON * H;
O.forEach([=](double &v, const int* pos) { v += OB.at(pos[0]); });
O.forEach(sigmoid);
return O;
}
After forward propagation , network weight and bias can be adjusted by backward-propagation.
void BackwardPropagation(
const Mat& X, Mat& HN, Mat& ON, Mat& HB, Mat& OB, const Mat& H,
const Mat& Y, const Mat& T, double nu )
{
Mat D1 = Y.clone();
D1.forEach([=](double &yi, const int* i) {
double ti = T.at(i);
yi = yi * (1 - yi) * (ti - yi);
});
Mat P = ON.t() * D1;
Mat D2 = H.clone();
D2.forEach([=](double &hi, const int*i) {
double pi = P.at(i);
hi = hi * (1 - hi) * pi;
});
for (int x = 0; x < H.cols; ++x)
{
ON += nu * D1.col(x) * H.col(x).t();
OB += nu * D1.col(x);
HN += nu * D2.col(x) * X.col(x).t();
HB += nu * D2.col(x);
}
}
To train, one of data at web can be used. Here http://archive.ics.uci.edu/ml/datasets/steel+plates+faults is the one used in here.
enum class Direction {
Row = 0,
Column = 1,
};
template< typename T>
void Normalize(Mat& m, Direction dir = Direction::Row )
{
int count = dir == Direction::Row ? m.rows : m.cols;
auto getVector = [dir](Mat& m, int i) { return dir == Direction::Row ? m.row(i) : m.col(i); };
for (int i = 0; i < count; ++i)
{
Mat X = getVector( m, i );
auto r = minmax_element(X.begin(), X.end());
T minv = *r.first, maxv = *r.second;
X.forEach([=](T &x, const int* pos) { x = (T)(2*(x-minv) / (maxv-minv) - 1.0); });
}
}
double AverageDistance(const Mat& E )
{
double sum = 0.0;
for (int j = 0; j < E.cols; ++j)
{
Mat ej = E.col(j);
sum += sqrt(ej.dot(ej));
}
return sum / E.cols;
}
Mat D = ReadMat("SteelPlateFaults.txt", " \t");
D = D.rowRange(0, 340);
Mat X = D.colRange(4, 27).t();
Normalize(X);
Mat T = D.colRange(27, 29).t();
int M = T.rows, N = X.rows;
Mat HN(N + 1, N, CV_64FC1, Scalar(0.01));
Mat HB(N + 1, 1, CV_64FC1, Scalar(0.0));
Mat ON(M, N + 1, CV_64FC1, Scalar(0.01));
Mat OB(M, 1, CV_64FC1, Scalar(0.0));
Mat H, Y;
for (int i = 0; i < 100; ++i)
{
Y = ForwardPropagation(X, HN, ON, HB, OB, H);
BackwardPropagation(X, HN, ON, HB, OB, H, Y, T, 0.1);
printf("iteration %d, Error %.3f\n", i, AverageDistance(T-Y));
}
printf( "Trained Error %.3f\n", AverageDistance(T - Y));
iteration 0, Error 0.708
iteration 1, Error 0.748
iteration 2, Error 0.814
…
iteration 98, Error 0.095
iteration 99, Error 0.090
Trained Error 0.090
There is Machine Learning module in OpenCV. Here above code can be simplified like below. The forward and backward propagation are done in the ANN_MLP::train(). ANN_MLP::predict() does forward propagation only.
int BestIndex(const Mat& x )
{
auto mpi = max_element( x.begin(), x.end());
return (int)distance( x.begin(), mpi);
}
using namespace cv::ml;
...
Ptr mlp = ANN_MLP::create();
Mat layer = (Mat_(3, 1) << X.cols, 60, T.cols);
mlp->setLayerSizes(layer);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);
mlp->setTermCriteria(TermCriteria(
TermCriteria::Type::COUNT + TermCriteria::Type::EPS, 100000, 0.01 ));
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);
Ptr trainingData = TrainData::create( X, SampleTypes::ROW_SAMPLE, T );
bool isTrained = mlp->train(trainingData);
int sum = 0;
for (int i = 0; i < X.rows; i++) {
Mat result; mlp->predict( X.row(i), result);
int pi = BestIndex(result);
int ti = BestIndex(T.row(i));
sum += (pi == ti ? 1 : 0);
}
printf("%d/%d matched : %.1f accuray\n", sum, T.rows, ((double)sum / T.rows) * 100.0);
335/340 matched : 98.5 accuray