The retinaface_tensorrt from azhe198827

About caffe2onnx

I use your wonderful caffe2onnx tool to convert my own retinaface caffemodel，which very useful for me.However, there are something wrong with the Crop layer and upsample layer, could you please tell me how to deal with these two layers.

转换之后出错

我用和https://github.com/clancylian/retinaface/blob/master/model/mnet25.prototxt 一模一样的结构进行转换(除了输入大小为640),转换之后的onnx上采样层还是没有连接到网络中.

The operation of group deconvolution when convert model

Great work!
I am doing related work recently. I would like to ask how the operation of group deconvolution is handled. I don't see support in src/OPs.

Did you try mxnet2onnx?

你好，谢谢你上传了这么好的代码。请问你试过将mxnet的模型直接转onnx的模型这样方式么？

Issues on Video

Changed this line:

auto parser = nvonnxparser::createParser(*network, gLogger);

#include <algorithm>
#include <assert.h>
#include <cmath>
#include <cuda_runtime_api.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <sys/stat.h>
#include "sys/time.h"
#include "opencv2/opencv.hpp"
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvOnnxParserRuntime.h"
#include "NvOnnxConfig.h"
#include <time.h>
using namespace nvinfer1;

static const int INPUT_H = 28;
static const int INPUT_W = 28;
static const int OUTPUT_SIZE = 10;
static int gUseDLACore{-1};

struct LayerInfo
{
	std::vector<int> dim;
	std::string name;
	int index;
	int size;
};
nvinfer1::IExecutionContext* context;
nvinfer1::IRuntime* runtime;
nvinfer1::ICudaEngine* engine;
cudaStream_t stream;
std::vector<LayerInfo> output_layer;
int input_size;
//std::vector<int> m_output_size;
void* buffers[10];
int inputIndex;


float m_nms_threshold = 0.4;
float data0[8] = { -248,-248,263,263,-120,-120,135,135 };
float data1[8] = { -56,-56,71,71,-24,-24,39,39 };
float data2[8] = { -8,-8,23,23,0,0,15,15 };




class Anchor {
public:
	bool operator<(const Anchor &t) const {
		return score < t.score;
	}

	bool operator>(const Anchor &t) const {
		return score > t.score;
	}

	float& operator[](int i) {
		assert(0 <= i && i <= 4);

		if (i == 0)
			return finalbox.x;
		if (i == 1)
			return finalbox.y;
		if (i == 2)
			return finalbox.width;
		if (i == 3)
			return finalbox.height;
	}

	float operator[](int i) const {
		assert(0 <= i && i <= 4);

		if (i == 0)
			return finalbox.x;
		if (i == 1)
			return finalbox.y;
		if (i == 2)
			return finalbox.width;
		if (i == 3)
			return finalbox.height;
	}

	cv::Rect_< float > anchor; // x1,y1,x2,y2
	float reg[4]; // offset reg
	cv::Point center; // anchor feat center
	float score; // cls score
	std::vector<cv::Point2f> pts; // pred pts

	cv::Rect_< float > finalbox; // final box res
};

void nms_cpu(std::vector<Anchor>& boxes, float threshold, std::vector<Anchor>& filterOutBoxes) {
	filterOutBoxes.clear();
	if (boxes.size() == 0)
		return;
	std::vector<size_t> idx(boxes.size());

	for (unsigned i = 0; i < idx.size(); i++)
	{
		idx[i] = i;
	}

	//descending sort
	sort(boxes.begin(), boxes.end(), std::greater<Anchor>());

	while (idx.size() > 0)
	{
		int good_idx = idx[0];
		filterOutBoxes.push_back(boxes[good_idx]);

		std::vector<size_t> tmp = idx;
		idx.clear();
		for (unsigned i = 1; i < tmp.size(); i++)
		{
			int tmp_i = tmp[i];
			float inter_x1 = std::max(boxes[good_idx][0], boxes[tmp_i][0]);
			float inter_y1 = std::max(boxes[good_idx][1], boxes[tmp_i][1]);
			float inter_x2 = std::min(boxes[good_idx][2], boxes[tmp_i][2]);
			float inter_y2 = std::min(boxes[good_idx][3], boxes[tmp_i][3]);

			float w = std::max((inter_x2 - inter_x1 + 1), 0.0F);
			float h = std::max((inter_y2 - inter_y1 + 1), 0.0F);

			float inter_area = w * h;
			float area_1 = (boxes[good_idx][2] - boxes[good_idx][0] + 1) * (boxes[good_idx][3] - boxes[good_idx][1] + 1);
			float area_2 = (boxes[tmp_i][2] - boxes[tmp_i][0] + 1) * (boxes[tmp_i][3] - boxes[tmp_i][1] + 1);
			float o = inter_area / (area_1 + area_2 - inter_area);
			if (o <= threshold)
				idx.push_back(tmp_i);
		}
	}
}

class CRect2f {
public:
	CRect2f(float x1, float y1, float x2, float y2) {
		val[0] = x1;
		val[1] = y1;
		val[2] = x2;
		val[3] = y2;
	}

	float& operator[](int i) {
		return val[i];
	}

	float operator[](int i) const {
		return val[i];
	}

	float val[4];

	void print() {
		printf("rect %f %f %f %f\n", val[0], val[1], val[2], val[3]);
	}
};

class AnchorGenerator {
public:
	void Init(int stride, int num, float* data)
	{
		anchor_stride = stride; // anchor tile stride
		preset_anchors.push_back(CRect2f(data[0], data[1], data[2], data[3]));
		preset_anchors.push_back(CRect2f(data[4], data[5], data[6], data[7]));
		anchor_num = num; // anchor type num
	}
	// filter anchors and return valid anchors
	int FilterAnchor(float* cls, float* reg, float* pts, int w, int h, int c, std::vector<Anchor>& result)
	{
		int pts_length = 0;

		pts_length = c / anchor_num / 2;

		for (int i = 0; i < h; ++i) {
			for (int j = 0; j < w; ++j) {
				int id = i * w + j;
				for (int a = 0; a < anchor_num; ++a)
				{
					float score = cls[(anchor_num + a)*w*h + id];
					if (score >= m_cls_threshold) {
						CRect2f box(j * anchor_stride + preset_anchors[a][0],
							i * anchor_stride + preset_anchors[a][1],
							j * anchor_stride + preset_anchors[a][2],
							i * anchor_stride + preset_anchors[a][3]);
						//printf("%f %f %f %f\n", box[0], box[1], box[2], box[3]);
						CRect2f delta(reg[(a * 4 + 0)*w*h + id],
							reg[(a * 4 + 1)*w*h + id],
							reg[(a * 4 + 2)*w*h + id],
							reg[(a * 4 + 3)*w*h + id]);

						Anchor res;
						res.anchor = cv::Rect_< float >(box[0], box[1], box[2], box[3]);
						bbox_pred(box, delta, res.finalbox);
						//printf("bbox pred\n");
						res.score = score;
						res.center = cv::Point(j, i);

						//printf("center %d %d\n", j, i);

						if (1) {
							std::vector<cv::Point2f> pts_delta(pts_length);
							for (int p = 0; p < pts_length; ++p) {
								pts_delta[p].x = pts[(a*pts_length * 2 + p * 2)*w*h + id];
								pts_delta[p].y = pts[(a*pts_length * 2 + p * 2 + 1)*w*h + id];
							}
							//printf("ready landmark_pred\n");
							landmark_pred(box, pts_delta, res.pts);
							//printf("landmark_pred\n");
						}
						result.push_back(res);
					}
				}
			}
		}
		return 0;
	}

private:
	void bbox_pred(const CRect2f& anchor, const CRect2f& delta, cv::Rect_< float >& box)
	{
		float w = anchor[2] - anchor[0] + 1;
		float h = anchor[3] - anchor[1] + 1;
		float x_ctr = anchor[0] + 0.5 * (w - 1);
		float y_ctr = anchor[1] + 0.5 * (h - 1);

		float dx = delta[0];
		float dy = delta[1];
		float dw = delta[2];
		float dh = delta[3];

		float pred_ctr_x = dx * w + x_ctr;
		float pred_ctr_y = dy * h + y_ctr;
		float pred_w = std::exp(dw) * w;
		float pred_h = std::exp(dh) * h;

		box = cv::Rect_< float >(pred_ctr_x - 0.5 * (pred_w - 1.0),
			pred_ctr_y - 0.5 * (pred_h - 1.0),
			pred_ctr_x + 0.5 * (pred_w - 1.0),
			pred_ctr_y + 0.5 * (pred_h - 1.0));
	}

	void landmark_pred(const CRect2f anchor, const std::vector<cv::Point2f>& delta, std::vector<cv::Point2f>& pts)
	{
		float w = anchor[2] - anchor[0] + 1;
		float h = anchor[3] - anchor[1] + 1;
		float x_ctr = anchor[0] + 0.5 * (w - 1);
		float y_ctr = anchor[1] + 0.5 * (h - 1);

		pts.resize(delta.size());
		for (int i = 0; i < delta.size(); ++i) {
			pts[i].x = delta[i].x*w + x_ctr;
			pts[i].y = delta[i].y*h + y_ctr;
		}
	}

	int anchor_stride; // anchor tile stride
	std::vector<CRect2f> preset_anchors;
	int anchor_num; // anchor type num
	float m_cls_threshold = 0.8;
};



float* cls[3];
float* reg[3];
float* pts[3];
AnchorGenerator ac[3];
std::vector<int> get_dim_size(Dims dim)
{
	std::vector<int> size;
	for (int i = 0; i < dim.nbDims; ++i)
		size.emplace_back(dim.d[i]);
	return size;
}

int total_size(std::vector<int> dim)
{
	int size = 1 * sizeof(float);
	for (auto d : dim)
		size *= d;
	return size;
}
class Logger : public nvinfer1::ILogger
{
public:
	Logger(Severity severity = Severity::kINFO)
		: reportableSeverity(severity)
	{
	}

	void log(Severity severity, const char* msg) override
	{
		// suppress messages with severity enum value greater than the reportable
		if (severity > reportableSeverity)
			return;

		switch (severity)
		{
		case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
		case Severity::kERROR: std::cerr << "ERROR: "; break;
		case Severity::kWARNING: std::cerr << "WARNING: "; break;
		case Severity::kINFO: std::cerr << "INFO: "; break;
		default: std::cerr << "UNKNOWN: "; break;
		}
		std::cerr << msg << std::endl;
	}

	Severity reportableSeverity;
};

void load_onnx(std::string model)
{
	Logger gLogger;
	IBuilder* builder = createInferBuilder(gLogger);
	// parse the onnx model to populate the network, then set the outputs
	INetworkDefinition* network = builder->createNetwork();
	auto parser = nvonnxparser::createParser(*network, gLogger);

	std::ifstream onnx_file(model.c_str(), std::ios::binary | std::ios::ate);
	std::streamsize file_size = onnx_file.tellg();
	onnx_file.seekg(0, std::ios::beg);
	std::vector<char> onnx_buf(file_size);
	onnx_file.read(onnx_buf.data(), onnx_buf.size());
	if (!parser->parse(onnx_buf.data(), onnx_buf.size()))
	{
		int nerror = parser->getNbErrors();
		for (int i = 0; i < nerror; ++i)
		{
			nvonnxparser::IParserError const* error = parser->getError(i);
			std::cerr << "ERROR: "
				<< error->file() << ":" << error->line()
				<< " In function " << error->func() << ":\n"
				<< "[" << static_cast<int>(error->code()) << "] " << error->desc()
				<< std::endl;
		}
	}
	ITensor* tensor_input = network->getInput(0);
	Dims dim_input = tensor_input->getDimensions();
	input_size = total_size(get_dim_size(dim_input));
	int outnode_size = network->getNbOutputs();
	//m_output_size.resize(m_outnode_size);
	for (int i = 0; i < outnode_size; ++i)
	{
		LayerInfo l;
		ITensor* tensor_output = network->getOutput(i);
		l.name = tensor_output->getName();
		Dims dim_output = tensor_output->getDimensions();
		l.dim = get_dim_size(dim_output);
		l.size = total_size(l.dim);
		output_layer.emplace_back(l);
	}
	int num_layer = network->getNbLayers();
	builder->setMaxBatchSize(1);
	builder->setMaxWorkspaceSize(1 << 20);
	//builder->allowGPUFallback(true);
	//builder->setDebugSync(true);
	engine = builder->buildCudaEngine(*network);
	runtime = createInferRuntime(gLogger);
	int gUseDLACore = -1;
	if (gUseDLACore >= 0)
	{
		//	m_runtime->setDLACore(gUseDLACore);
	}
	context = engine->createExecutionContext();
	for (int b = 0; b < engine->getNbBindings(); ++b)
	{
		if (engine->bindingIsInput(b))
			inputIndex = b;
		else
			output_layer[b - 1].index = b;
	}

	cudaStreamCreate(&stream);
	cudaMalloc(&buffers[inputIndex], input_size);   // data
	for (int i = 0; i < output_layer.size(); ++i)
		cudaMalloc(&buffers[output_layer[i].index], output_layer[i].size); // bbox_pred
	network->destroy();
	builder->destroy();

	std::cout << "RT init done!" << std::endl;
}




void doInference(cv::Mat img, std::vector<Anchor>& faces)
{
	cv::Mat image = img.clone();
	cv::Mat image_temp;
	cv::cvtColor(image, image, CV_BGR2RGB);
	cv::Mat image_resize(cv::Size(640, 640), CV_8UC3);
	float resize_scale = 1;
	if (image.cols >= image.rows&&image.cols > 640)
	{
		resize_scale = 640 / image.cols;
		cv::resize(image, image_temp, cv::Size(0, 0), resize_scale, resize_scale);
	}
	else if (image.cols < image.rows&&image.rows>640)
	{
		resize_scale = 640 / image.rows;
		cv::resize(image, image_temp, cv::Size(0, 0), resize_scale, resize_scale);
	}
	else
	{
		image_temp = image.clone();
	}
	cv::Mat imageROI0(image_resize(cv::Rect(0, 0, image_temp.cols, image_temp.rows)));
	image_temp.copyTo(imageROI0);
	int total_size = image_resize.rows*image_resize.cols*image_resize.channels();
	std::vector<float> input;
	input.resize(total_size);
	for (int k = 0; k < 3; k++)
		for (int i = 0; i < image_resize.rows; i++)
			for (int j = 0; j < image_resize.cols; j++)
			{
				input[i * image_resize.cols + j + k * image_resize.cols * image_resize.rows] =
					(float)image_resize.data[(i * image_resize.cols + j) * 3 + k];
			}
	std::vector<std::vector<float>> output;
	output.resize(output_layer.size());
	for (int i = 0; i < output_layer.size(); ++i)
		output[i].resize(output_layer[i].size / sizeof(float));
	cudaMemcpyAsync(buffers[inputIndex], input.data(),  input_size, cudaMemcpyHostToDevice, stream);
	context->enqueue(1, buffers, stream, nullptr);
	for (int i = 0; i < output_layer.size(); ++i)
		cudaMemcpyAsync(output[i].data(), buffers[output_layer[i].index], output_layer[i].size, cudaMemcpyDeviceToHost, stream);
	cudaStreamSynchronize(stream);




	for (int i = 0; i < 3; ++i)
	{
		reg[i] = output[i * 3 + 0].data();
		pts[i] = output[i * 3 + 1].data();
		cls[i] = output[i * 3 + 2].data();
	}
	std::vector<Anchor> proposals;
	for (int i = 0; i < 3; i++)
	{
		ac[i].FilterAnchor(cls[i], reg[i], pts[i], output_layer[i * 3 + 1].dim[2],
			output_layer[i * 3 + 1].dim[1], output_layer[i * 3 + 1].dim[0], proposals);
	}
	faces.clear();
	nms_cpu(proposals, m_nms_threshold, faces);
	std::sort(faces.begin(), faces.end(), [&](Anchor a, Anchor b)
	{
		return a.finalbox.area() > b.finalbox.area();
	});
	for (auto &face : faces)
	{
		face.finalbox.width /= resize_scale;
		face.finalbox.x /= resize_scale;
		face.finalbox.height /= resize_scale;
		face.finalbox.y /= resize_scale;
		for (int i = 0; i < 5; ++i)
		{
			face.pts[i].x /= resize_scale;
			face.pts[i].y /= resize_scale;
		}
	}

}


int main(int argc, char** argv)
{

	float data0[8] = { -248,-248,263,263,-120,-120,135,135 };
	float data1[8] = { -56,-56,71,71,-24,-24,39,39 };
	float data2[8] = { -8,-8,23,23,0,0,15,15 };
	ac[0].Init(32, 2, data0);
	ac[1].Init(16, 2, data1);
	ac[2].Init(8, 2, data2);
    // create a TensorRT model from the onnx model and serialize it to a stream
    IHostMemory* trtModelStream{nullptr};
    load_onnx("./retina.onnx");
	std::vector<Anchor> faces;

    cv::VideoCapture cap(0);
      if(!cap.isOpened()){
        std::cout << "Error opening video stream or file" << std::endl;
        return -1;
      }

      cv::Mat frame;

      while(1){
        cap >> frame;
        if (frame.empty())
          break;

        doInference(frame, faces);
        std::cout<<"No of faces::"<<faces.size()<<std::endl;

        for (int i = 0; i < faces.size(); i++)
        {
            cv::rectangle(frame, cv::Point((int)faces[i].finalbox.x, (int)faces[i].finalbox.y), cv::Point((int)faces[i].finalbox.width, (int)faces[i].finalbox.height), cv::Scalar(0, 255, 255), 2, 8, 0);
            for (int j = 0; j < faces[i].pts.size(); ++j) {
                cv::circle(frame, cv::Point((int)faces[i].pts[j].x, (int)faces[i].pts[j].y), 1, cv::Scalar(225, 0, 225), 2, 8);
            }
        }

        cv::imshow( "Detection", frame );
        cv::waitKey(1);
      }
  cap.release();
  cv::destroyAllWindows();

  return 0;
}

Error:
INFO: Total Activation Memory: 13541376
INFO: Data initialization and engine generation completed in 0.013506 seconds.
RT init done!
OpenCV Error: Assertion failed (dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0)) in resize, file /opt/opencv/modules/imgproc/src/resize.cpp, line 4045
terminate called after throwing an instance of 'cv::Exception'
what(): /opt/opencv/modules/imgproc/src/resize.cpp:4045: error: (-215) dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) in function resize

Aborted (core dumped)

No face detected?

Firstly, I build the src demo:

cd src/ && mkdir build && cd build
cmake ..
make -j3
some error occurred:

Then, I changed the src code at line 320:
nvonnxparser::IParser* parser = nvonnxparser::createParser(network, gLogger);
To:
nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger);
compile completed.

But when I run the main file, no face detected.

my operation system is jetson nano, I wanna to know how to solve this problem, Can you give me some advice? thx a lot.

azhe198827 / retinaface_tensorrt Goto Github PK

retinaface_tensorrt's People

Contributors

Stargazers

Watchers

Forkers

retinaface_tensorrt's Issues

About caffe2onnx

转换之后出错

The operation of group deconvolution when convert model

Did you try mxnet2onnx?

Issues on Video

No face detected?

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent