dlib C++ Library / Discussion / Help: Fatal Error detected when trying to test a trained net.

Hi,
I'm getting an error after the training is completed at the testing step. I'm training a net for neck recognition for an AR project and I'm using the dog hipsterizer and mmod examples as a starting point. I'm using dlib with cuda support on Ubuntu 16.04 on a GTX 660 with cropper samples reduced to 25 from 150 to get rid of out of memory issues on my card.
When program reaches this line:

cout << "training results: " << test_object_detection_function(net, images_train, neck_boxes_train) << endl;

I get this error message:

**************************** FATAL ERROR DETECTED ****************************

Error detected at line 591.
Error detected in file /home/cata/Workspace/dlib-19.2/dlib/dnn/cuda_dlib.cu.
Error detected in function void dlib::cuda::affine_transform(dlib::tensor&, const dlib::tensor&, const dlib::tensor&, const dlib::tensor&).

Failing expression was A.nr()==B.nr() && B.nr()==src.nr() && A.nc()==B.nc() && B.nc()==src.nc() && A.k() ==B.k() && B.k()==src.k().

A.nr(): 556
B.nr(): 556
src.nr(): 807
A.nc(): 148
B.nc(): 148
src.nc(): 145
A.k(): 16
B.k(): 16
src.k(): 16

******************************************************************************

I'm only starting to learn dlib and Machine Learning in general so most probably I'm doing something wrong. I see from the error that some of the values are not as expected by the failing expression but I have no ideea what that means.

Cheers,
Catalin

Bellow's also the program listing. Additionally I can provide the training set and the serialized net file if needed.

#include <iostream>
#include <string>
#include <vector>
#include <dlib/dnn.h>
#include <dlib/data_io.h>
#include <dlib/image_processing.h>

using namespace std;
using namespace dlib;

void show_usage()
{
    cout << "Usage:" << endl << "Nectracking <option> <path>" << endl;
    cout << "Options:" << endl << "\t-train\t\tTrains the detector using the set found at <path>" << endl;
    cout << "\t-track\t\tStarts the detector using the test images found at <path>" << endl;
}

// ----------------------------------------------------------------------------------------

std::vector<std::vector<double> > get_interocular_distances (
    const std::vector<std::vector<full_object_detection> >& objects
);
/*!
    ensures
        - returns an object D such that:
            - D[i][j] == the distance, in pixels, between the eyes for the face represented
              by objects[i][j].
!*/

// ----------------------------------------------------------------------------------------

template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
template <long num_filters, typename SUBNET> using con5  = con<num_filters,5,5,1,1,SUBNET>;

template <typename SUBNET> using downsampler  = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>;
template <typename SUBNET> using rcon5  = relu<affine<con5<45,SUBNET>>>;

using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;

// ----------------------------------------------------------------------------------------

int main(int argc, char *argv[])
{
    bool train = false;
    string path;

    if (argc < 3)
    {
        show_usage();
    }
    else
    {
        std::vector<string> args;
        for (auto i = 1; i < argc; ++i)
            args.push_back(argv[i]);

        path = args[1];
        if (strcmp(args[0].c_str(), "-train") == 0)
        {
            train = true;
        }
        else if (strcmp(args[0].c_str(), "-track") != 0)
        {
            show_usage();
            return 1;
        }

    }

    if (train)
    {
        std::vector<matrix<rgb_pixel>> images_train;
        std::vector<std::vector<mmod_rect>> neck_boxes_train;
        load_image_dataset(images_train, neck_boxes_train, path);

        cout << "num training images: " << images_train.size() << endl;

        mmod_options options(neck_boxes_train, 80*80);
        cout << "detection window width,height:      " << options.detector_width << "," << options.detector_height << endl;
        cout << "overlap NMS IOU thresh:             " << options.overlaps_nms.get_iou_thresh() << endl;
        cout << "overlap NMS percent covered thresh: " << options.overlaps_nms.get_percent_covered_thresh() << endl;

        // Now we are ready to create our network and trainer.
        net_type net(options);
        dnn_trainer<net_type> trainer(net);
        trainer.set_learning_rate(0.1);
        trainer.be_verbose();
        trainer.set_synchronization_file("neck_track_sync", std::chrono::minutes(5));
        trainer.set_iterations_without_progress_threshold(300);

        // Now let's train the network.  We are going to use mini-batches of 150
        // images.   The images are random crops from our training set (see
        // random_cropper_ex.cpp for a discussion of the random_cropper).
        std::vector<matrix<rgb_pixel>> mini_batch_samples;
        std::vector<std::vector<mmod_rect>> mini_batch_labels;
        random_cropper cropper;
        dlib::rand rnd;
        // Run the trainer until the learning rate gets small.  This will probably take several
        // hours.
        while(trainer.get_learning_rate() >= 1e-4)
        {
            cropper(25, images_train, neck_boxes_train, mini_batch_samples, mini_batch_labels);
            // We can also randomly jitter the colors and that often helps a detector
            // generalize better to new images.
            for (auto&& img : mini_batch_samples)
                disturb_colors(img, rnd);

            trainer.train_one_step(mini_batch_samples, mini_batch_labels);
        }
        // wait for training threads to stop
        trainer.get_net();
        cout << "done training" << endl;

        // Save the network to disk
        net.clean();
        serialize("neck_network.dat") << net;

        // Now that we have a face detector we can test it.  The first statement tests it
        // on the training data.  It will print the precision, recall, and then average precision.
        // This statement should indicate that the network works perfectly on the
        // training data.
        cout << "training results: " << test_object_detection_function(net, images_train, neck_boxes_train) << endl;

        cout << "Now let's train the shape predictor:" << endl;

        std::vector<std::vector<full_object_detection> > shapes_train;
        load_image_dataset(images_train, shapes_train, path);

        // Now make the object responsible for training the model.
        shape_predictor_trainer sp_trainer;
        // This algorithm has a bunch of parameters you can mess with.  The
        // documentation for the shape_predictor_trainer explains all of them.
        // You should also read Kazemi's paper which explains all the parameters
        // in great detail.  However, here I'm just setting three of them
        // differently than their default values.  I'm doing this because we
        // have a very small dataset.  In particular, setting the oversampling
        // to a high amount (300) effectively boosts the training set size, so
        // that helps this example.
        sp_trainer.set_oversampling_amount(300);
        // I'm also reducing the capacity of the model by explicitly increasing
        // the regularization (making nu smaller) and by using trees with
        // smaller depths.
        sp_trainer.set_nu(0.05);
        sp_trainer.set_tree_depth(5);
        sp_trainer.set_cascade_depth(20);
        sp_trainer.set_feature_pool_region_padding(0.2);

        // some parts of training process can be parallelized.
        // Trainer will use this count of threads when possible
        sp_trainer.set_num_threads(2);

        // Tell the trainer to print status messages to the console so we can
        // see how long the training will take.
        sp_trainer.be_verbose();

        // Now finally generate the shape model
        shape_predictor sp = sp_trainer.train(images_train, shapes_train);

        // Now that we have a model we can test it.  This function measures the
        // average distance between a face landmark output by the
        // shape_predictor and where it should be according to the truth data.
        // Note that there is an optional 4th argument that lets us rescale the
        // distances.  Here we are causing the output to scale each face's
        // distances by the interocular distance, as is customary when
        // evaluating face landmarking systems.
        cout << "mean training error: "<<
            test_shape_predictor(sp, images_train, shapes_train, get_interocular_distances(shapes_train)) << endl;

        // Finally, we save the model to disk so we can use it later.
        serialize("sp.dat") << sp;
    }
    else
    {
        std::vector<matrix<rgb_pixel>> images_train;
        std::vector<std::vector<mmod_rect>> neck_boxes_train;
        load_image_dataset(images_train, neck_boxes_train, path);

        cout << "num testing images: " << images_train.size() << endl;

        net_type net;
        deserialize("neck_network.dat") >> net;

        // Now that we have a face detector we can test it.  The first statement tests it
        // on the training data.  It will print the precision, recall, and then average precision.
        // This statement should indicate that the network works perfectly on the
        // training data.
        cout << "training results: " << test_object_detection_function(net, images_train, neck_boxes_train) << endl;
    }

    return 0;
}

// ----------------------------------------------------------------------------------------

double interocular_distance (
    const full_object_detection& det
)
{
    dlib::vector<double,2> l, r;
    double cnt = 0;
    // Find the center of the left eye by averaging the points around
    // the eye.
    for (unsigned long i = 36; i <= 41; ++i)
    {
        l += det.part(i);
        ++cnt;
    }
    l /= cnt;

    // Find the center of the right eye by averaging the points around
    // the eye.
    cnt = 0;
    for (unsigned long i = 42; i <= 47; ++i)
    {
        r += det.part(i);
        ++cnt;
    }
    r /= cnt;

    // Now return the distance between the centers of the eyes
    return length(l-r);
}

std::vector<std::vector<double> > get_interocular_distances (
    const std::vector<std::vector<full_object_detection> >& objects
)
{
    std::vector<std::vector<double> > temp(objects.size());
    for (unsigned long i = 0; i < objects.size(); ++i)
    {
        for (unsigned long j = 0; j < objects[i].size(); ++j)
        {
            temp[i].push_back(interocular_distance(objects[i][j]));
        }
    }
    return temp;
}

// ----------------------------------------------------------------------------------------

Last edit: Catalin Moldovan 2016-10-14

Fatal Error detected when trying to test a trained net.

Forums

Help

Fatal Error detected when trying to test a trained net. document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

Fatal Error detected when trying to test a trained net.