Giter VIP home page Giter VIP logo

retinaface_tensorrt's People

Contributors

azhe198827 avatar

Stargazers

 avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar

Watchers

 avatar

retinaface_tensorrt's Issues

About caffe2onnx

I use your wonderful caffe2onnx tool to convert my own retinaface caffemodel,which very useful for me.However, there are something wrong with the Crop layer and upsample layer, could you please tell me how to deal with these two layers.

Did you try mxnet2onnx?

你好,谢谢你上传了这么好的代码。请问你试过将mxnet的模型直接转onnx的模型这样方式么?

Issues on Video

Changed this line:

auto parser = nvonnxparser::createParser(*network, gLogger);

#include <algorithm>
#include <assert.h>
#include <cmath>
#include <cuda_runtime_api.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <sys/stat.h>
#include "sys/time.h"
#include "opencv2/opencv.hpp"
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvOnnxParserRuntime.h"
#include "NvOnnxConfig.h"
#include <time.h>
using namespace nvinfer1;

static const int INPUT_H = 28;
static const int INPUT_W = 28;
static const int OUTPUT_SIZE = 10;
static int gUseDLACore{-1};

struct LayerInfo
{
	std::vector<int> dim;
	std::string name;
	int index;
	int size;
};
nvinfer1::IExecutionContext* context;
nvinfer1::IRuntime* runtime;
nvinfer1::ICudaEngine* engine;
cudaStream_t stream;
std::vector<LayerInfo> output_layer;
int input_size;
//std::vector<int> m_output_size;
void* buffers[10];
int inputIndex;


float m_nms_threshold = 0.4;
float data0[8] = { -248,-248,263,263,-120,-120,135,135 };
float data1[8] = { -56,-56,71,71,-24,-24,39,39 };
float data2[8] = { -8,-8,23,23,0,0,15,15 };




class Anchor {
public:
	bool operator<(const Anchor &t) const {
		return score < t.score;
	}

	bool operator>(const Anchor &t) const {
		return score > t.score;
	}

	float& operator[](int i) {
		assert(0 <= i && i <= 4);

		if (i == 0)
			return finalbox.x;
		if (i == 1)
			return finalbox.y;
		if (i == 2)
			return finalbox.width;
		if (i == 3)
			return finalbox.height;
	}

	float operator[](int i) const {
		assert(0 <= i && i <= 4);

		if (i == 0)
			return finalbox.x;
		if (i == 1)
			return finalbox.y;
		if (i == 2)
			return finalbox.width;
		if (i == 3)
			return finalbox.height;
	}

	cv::Rect_< float > anchor; // x1,y1,x2,y2
	float reg[4]; // offset reg
	cv::Point center; // anchor feat center
	float score; // cls score
	std::vector<cv::Point2f> pts; // pred pts

	cv::Rect_< float > finalbox; // final box res
};

void nms_cpu(std::vector<Anchor>& boxes, float threshold, std::vector<Anchor>& filterOutBoxes) {
	filterOutBoxes.clear();
	if (boxes.size() == 0)
		return;
	std::vector<size_t> idx(boxes.size());

	for (unsigned i = 0; i < idx.size(); i++)
	{
		idx[i] = i;
	}

	//descending sort
	sort(boxes.begin(), boxes.end(), std::greater<Anchor>());

	while (idx.size() > 0)
	{
		int good_idx = idx[0];
		filterOutBoxes.push_back(boxes[good_idx]);

		std::vector<size_t> tmp = idx;
		idx.clear();
		for (unsigned i = 1; i < tmp.size(); i++)
		{
			int tmp_i = tmp[i];
			float inter_x1 = std::max(boxes[good_idx][0], boxes[tmp_i][0]);
			float inter_y1 = std::max(boxes[good_idx][1], boxes[tmp_i][1]);
			float inter_x2 = std::min(boxes[good_idx][2], boxes[tmp_i][2]);
			float inter_y2 = std::min(boxes[good_idx][3], boxes[tmp_i][3]);

			float w = std::max((inter_x2 - inter_x1 + 1), 0.0F);
			float h = std::max((inter_y2 - inter_y1 + 1), 0.0F);

			float inter_area = w * h;
			float area_1 = (boxes[good_idx][2] - boxes[good_idx][0] + 1) * (boxes[good_idx][3] - boxes[good_idx][1] + 1);
			float area_2 = (boxes[tmp_i][2] - boxes[tmp_i][0] + 1) * (boxes[tmp_i][3] - boxes[tmp_i][1] + 1);
			float o = inter_area / (area_1 + area_2 - inter_area);
			if (o <= threshold)
				idx.push_back(tmp_i);
		}
	}
}

class CRect2f {
public:
	CRect2f(float x1, float y1, float x2, float y2) {
		val[0] = x1;
		val[1] = y1;
		val[2] = x2;
		val[3] = y2;
	}

	float& operator[](int i) {
		return val[i];
	}

	float operator[](int i) const {
		return val[i];
	}

	float val[4];

	void print() {
		printf("rect %f %f %f %f\n", val[0], val[1], val[2], val[3]);
	}
};

class AnchorGenerator {
public:
	void Init(int stride, int num, float* data)
	{
		anchor_stride = stride; // anchor tile stride
		preset_anchors.push_back(CRect2f(data[0], data[1], data[2], data[3]));
		preset_anchors.push_back(CRect2f(data[4], data[5], data[6], data[7]));
		anchor_num = num; // anchor type num
	}
	// filter anchors and return valid anchors
	int FilterAnchor(float* cls, float* reg, float* pts, int w, int h, int c, std::vector<Anchor>& result)
	{
		int pts_length = 0;

		pts_length = c / anchor_num / 2;

		for (int i = 0; i < h; ++i) {
			for (int j = 0; j < w; ++j) {
				int id = i * w + j;
				for (int a = 0; a < anchor_num; ++a)
				{
					float score = cls[(anchor_num + a)*w*h + id];
					if (score >= m_cls_threshold) {
						CRect2f box(j * anchor_stride + preset_anchors[a][0],
							i * anchor_stride + preset_anchors[a][1],
							j * anchor_stride + preset_anchors[a][2],
							i * anchor_stride + preset_anchors[a][3]);
						//printf("%f %f %f %f\n", box[0], box[1], box[2], box[3]);
						CRect2f delta(reg[(a * 4 + 0)*w*h + id],
							reg[(a * 4 + 1)*w*h + id],
							reg[(a * 4 + 2)*w*h + id],
							reg[(a * 4 + 3)*w*h + id]);

						Anchor res;
						res.anchor = cv::Rect_< float >(box[0], box[1], box[2], box[3]);
						bbox_pred(box, delta, res.finalbox);
						//printf("bbox pred\n");
						res.score = score;
						res.center = cv::Point(j, i);

						//printf("center %d %d\n", j, i);

						if (1) {
							std::vector<cv::Point2f> pts_delta(pts_length);
							for (int p = 0; p < pts_length; ++p) {
								pts_delta[p].x = pts[(a*pts_length * 2 + p * 2)*w*h + id];
								pts_delta[p].y = pts[(a*pts_length * 2 + p * 2 + 1)*w*h + id];
							}
							//printf("ready landmark_pred\n");
							landmark_pred(box, pts_delta, res.pts);
							//printf("landmark_pred\n");
						}
						result.push_back(res);
					}
				}
			}
		}
		return 0;
	}

private:
	void bbox_pred(const CRect2f& anchor, const CRect2f& delta, cv::Rect_< float >& box)
	{
		float w = anchor[2] - anchor[0] + 1;
		float h = anchor[3] - anchor[1] + 1;
		float x_ctr = anchor[0] + 0.5 * (w - 1);
		float y_ctr = anchor[1] + 0.5 * (h - 1);

		float dx = delta[0];
		float dy = delta[1];
		float dw = delta[2];
		float dh = delta[3];

		float pred_ctr_x = dx * w + x_ctr;
		float pred_ctr_y = dy * h + y_ctr;
		float pred_w = std::exp(dw) * w;
		float pred_h = std::exp(dh) * h;

		box = cv::Rect_< float >(pred_ctr_x - 0.5 * (pred_w - 1.0),
			pred_ctr_y - 0.5 * (pred_h - 1.0),
			pred_ctr_x + 0.5 * (pred_w - 1.0),
			pred_ctr_y + 0.5 * (pred_h - 1.0));
	}

	void landmark_pred(const CRect2f anchor, const std::vector<cv::Point2f>& delta, std::vector<cv::Point2f>& pts)
	{
		float w = anchor[2] - anchor[0] + 1;
		float h = anchor[3] - anchor[1] + 1;
		float x_ctr = anchor[0] + 0.5 * (w - 1);
		float y_ctr = anchor[1] + 0.5 * (h - 1);

		pts.resize(delta.size());
		for (int i = 0; i < delta.size(); ++i) {
			pts[i].x = delta[i].x*w + x_ctr;
			pts[i].y = delta[i].y*h + y_ctr;
		}
	}

	int anchor_stride; // anchor tile stride
	std::vector<CRect2f> preset_anchors;
	int anchor_num; // anchor type num
	float m_cls_threshold = 0.8;
};



float* cls[3];
float* reg[3];
float* pts[3];
AnchorGenerator ac[3];
std::vector<int> get_dim_size(Dims dim)
{
	std::vector<int> size;
	for (int i = 0; i < dim.nbDims; ++i)
		size.emplace_back(dim.d[i]);
	return size;
}

int total_size(std::vector<int> dim)
{
	int size = 1 * sizeof(float);
	for (auto d : dim)
		size *= d;
	return size;
}
class Logger : public nvinfer1::ILogger
{
public:
	Logger(Severity severity = Severity::kINFO)
		: reportableSeverity(severity)
	{
	}

	void log(Severity severity, const char* msg) override
	{
		// suppress messages with severity enum value greater than the reportable
		if (severity > reportableSeverity)
			return;

		switch (severity)
		{
		case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
		case Severity::kERROR: std::cerr << "ERROR: "; break;
		case Severity::kWARNING: std::cerr << "WARNING: "; break;
		case Severity::kINFO: std::cerr << "INFO: "; break;
		default: std::cerr << "UNKNOWN: "; break;
		}
		std::cerr << msg << std::endl;
	}

	Severity reportableSeverity;
};

void load_onnx(std::string model)
{
	Logger gLogger;
	IBuilder* builder = createInferBuilder(gLogger);
	// parse the onnx model to populate the network, then set the outputs
	INetworkDefinition* network = builder->createNetwork();
	auto parser = nvonnxparser::createParser(*network, gLogger);

	std::ifstream onnx_file(model.c_str(), std::ios::binary | std::ios::ate);
	std::streamsize file_size = onnx_file.tellg();
	onnx_file.seekg(0, std::ios::beg);
	std::vector<char> onnx_buf(file_size);
	onnx_file.read(onnx_buf.data(), onnx_buf.size());
	if (!parser->parse(onnx_buf.data(), onnx_buf.size()))
	{
		int nerror = parser->getNbErrors();
		for (int i = 0; i < nerror; ++i)
		{
			nvonnxparser::IParserError const* error = parser->getError(i);
			std::cerr << "ERROR: "
				<< error->file() << ":" << error->line()
				<< " In function " << error->func() << ":\n"
				<< "[" << static_cast<int>(error->code()) << "] " << error->desc()
				<< std::endl;
		}
	}
	ITensor* tensor_input = network->getInput(0);
	Dims dim_input = tensor_input->getDimensions();
	input_size = total_size(get_dim_size(dim_input));
	int outnode_size = network->getNbOutputs();
	//m_output_size.resize(m_outnode_size);
	for (int i = 0; i < outnode_size; ++i)
	{
		LayerInfo l;
		ITensor* tensor_output = network->getOutput(i);
		l.name = tensor_output->getName();
		Dims dim_output = tensor_output->getDimensions();
		l.dim = get_dim_size(dim_output);
		l.size = total_size(l.dim);
		output_layer.emplace_back(l);
	}
	int num_layer = network->getNbLayers();
	builder->setMaxBatchSize(1);
	builder->setMaxWorkspaceSize(1 << 20);
	//builder->allowGPUFallback(true);
	//builder->setDebugSync(true);
	engine = builder->buildCudaEngine(*network);
	runtime = createInferRuntime(gLogger);
	int gUseDLACore = -1;
	if (gUseDLACore >= 0)
	{
		//	m_runtime->setDLACore(gUseDLACore);
	}
	context = engine->createExecutionContext();
	for (int b = 0; b < engine->getNbBindings(); ++b)
	{
		if (engine->bindingIsInput(b))
			inputIndex = b;
		else
			output_layer[b - 1].index = b;
	}

	cudaStreamCreate(&stream);
	cudaMalloc(&buffers[inputIndex], input_size);   // data
	for (int i = 0; i < output_layer.size(); ++i)
		cudaMalloc(&buffers[output_layer[i].index], output_layer[i].size); // bbox_pred
	network->destroy();
	builder->destroy();

	std::cout << "RT init done!" << std::endl;
}




void doInference(cv::Mat img, std::vector<Anchor>& faces)
{
	cv::Mat image = img.clone();
	cv::Mat image_temp;
	cv::cvtColor(image, image, CV_BGR2RGB);
	cv::Mat image_resize(cv::Size(640, 640), CV_8UC3);
	float resize_scale = 1;
	if (image.cols >= image.rows&&image.cols > 640)
	{
		resize_scale = 640 / image.cols;
		cv::resize(image, image_temp, cv::Size(0, 0), resize_scale, resize_scale);
	}
	else if (image.cols < image.rows&&image.rows>640)
	{
		resize_scale = 640 / image.rows;
		cv::resize(image, image_temp, cv::Size(0, 0), resize_scale, resize_scale);
	}
	else
	{
		image_temp = image.clone();
	}
	cv::Mat imageROI0(image_resize(cv::Rect(0, 0, image_temp.cols, image_temp.rows)));
	image_temp.copyTo(imageROI0);
	int total_size = image_resize.rows*image_resize.cols*image_resize.channels();
	std::vector<float> input;
	input.resize(total_size);
	for (int k = 0; k < 3; k++)
		for (int i = 0; i < image_resize.rows; i++)
			for (int j = 0; j < image_resize.cols; j++)
			{
				input[i * image_resize.cols + j + k * image_resize.cols * image_resize.rows] =
					(float)image_resize.data[(i * image_resize.cols + j) * 3 + k];
			}
	std::vector<std::vector<float>> output;
	output.resize(output_layer.size());
	for (int i = 0; i < output_layer.size(); ++i)
		output[i].resize(output_layer[i].size / sizeof(float));
	cudaMemcpyAsync(buffers[inputIndex], input.data(),  input_size, cudaMemcpyHostToDevice, stream);
	context->enqueue(1, buffers, stream, nullptr);
	for (int i = 0; i < output_layer.size(); ++i)
		cudaMemcpyAsync(output[i].data(), buffers[output_layer[i].index], output_layer[i].size, cudaMemcpyDeviceToHost, stream);
	cudaStreamSynchronize(stream);




	for (int i = 0; i < 3; ++i)
	{
		reg[i] = output[i * 3 + 0].data();
		pts[i] = output[i * 3 + 1].data();
		cls[i] = output[i * 3 + 2].data();
	}
	std::vector<Anchor> proposals;
	for (int i = 0; i < 3; i++)
	{
		ac[i].FilterAnchor(cls[i], reg[i], pts[i], output_layer[i * 3 + 1].dim[2],
			output_layer[i * 3 + 1].dim[1], output_layer[i * 3 + 1].dim[0], proposals);
	}
	faces.clear();
	nms_cpu(proposals, m_nms_threshold, faces);
	std::sort(faces.begin(), faces.end(), [&](Anchor a, Anchor b)
	{
		return a.finalbox.area() > b.finalbox.area();
	});
	for (auto &face : faces)
	{
		face.finalbox.width /= resize_scale;
		face.finalbox.x /= resize_scale;
		face.finalbox.height /= resize_scale;
		face.finalbox.y /= resize_scale;
		for (int i = 0; i < 5; ++i)
		{
			face.pts[i].x /= resize_scale;
			face.pts[i].y /= resize_scale;
		}
	}

}


int main(int argc, char** argv)
{

	float data0[8] = { -248,-248,263,263,-120,-120,135,135 };
	float data1[8] = { -56,-56,71,71,-24,-24,39,39 };
	float data2[8] = { -8,-8,23,23,0,0,15,15 };
	ac[0].Init(32, 2, data0);
	ac[1].Init(16, 2, data1);
	ac[2].Init(8, 2, data2);
    // create a TensorRT model from the onnx model and serialize it to a stream
    IHostMemory* trtModelStream{nullptr};
    load_onnx("./retina.onnx");
	std::vector<Anchor> faces;

    cv::VideoCapture cap(0);
      if(!cap.isOpened()){
        std::cout << "Error opening video stream or file" << std::endl;
        return -1;
      }

      cv::Mat frame;

      while(1){
        cap >> frame;
        if (frame.empty())
          break;

        doInference(frame, faces);
        std::cout<<"No of faces::"<<faces.size()<<std::endl;

        for (int i = 0; i < faces.size(); i++)
        {
            cv::rectangle(frame, cv::Point((int)faces[i].finalbox.x, (int)faces[i].finalbox.y), cv::Point((int)faces[i].finalbox.width, (int)faces[i].finalbox.height), cv::Scalar(0, 255, 255), 2, 8, 0);
            for (int j = 0; j < faces[i].pts.size(); ++j) {
                cv::circle(frame, cv::Point((int)faces[i].pts[j].x, (int)faces[i].pts[j].y), 1, cv::Scalar(225, 0, 225), 2, 8);
            }
        }

        cv::imshow( "Detection", frame );
        cv::waitKey(1);
      }
  cap.release();
  cv::destroyAllWindows();

  return 0;
}

Error:
INFO: Total Activation Memory: 13541376
INFO: Data initialization and engine generation completed in 0.013506 seconds.
RT init done!
OpenCV Error: Assertion failed (dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0)) in resize, file /opt/opencv/modules/imgproc/src/resize.cpp, line 4045
terminate called after throwing an instance of 'cv::Exception'
what(): /opt/opencv/modules/imgproc/src/resize.cpp:4045: error: (-215) dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) in function resize

Aborted (core dumped)

No face detected?

Firstly, I build the src demo:

cd src/ && mkdir build && cd build
cmake ..
make -j3
some error occurred:
image
Then, I changed the src code at line 320:
nvonnxparser::IParser* parser = nvonnxparser::createParser(network, gLogger);
To:
nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger);
compile completed.
image
But when I run the main file, no face detected.
image
my operation system is jetson nano, I wanna to know how to solve this problem, Can you give me some advice? thx a lot.

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.