First, thank you for doing this. It was immensely helpful to see how to use the texture rendering chain to parse the outputs of UltraFace. I have a version of your work running, but I have modified it to work during runtime on a HoloLens 2, using MIcrosoft's MediaCapture APIs to capture the RGB camera fames from the front of the HL2. From these frame, I create a SoftwareBitmap and convert to a Unity Texture for use as an input into your rendering chain. When I do this, the model returns 0 faces, for reasons I cannot understand. Can you think of a way around this?
var pre = _resources.preprocess;
pre.SetInts("ImageSize", _config.InputWidth, _config.InputHeight);
pre.SetTexture(0, "Input", (Texture)imageMediaTexture);
pre.SetBuffer(0, "Output", _buffers.preprocess);
pre.DispatchThreads(0, _config.InputWidth, _config.InputHeight, 1);
// NNworker invocation
using (var t = new Tensor(_config.InputShape, _buffers.preprocess))
_worker.Execute(t);
// NN output retrieval
_worker.CopyOutput("scores", _buffers.scores);
_worker.CopyOutput("boxes", _buffers.boxes);
// Counter buffer reset
_buffers.post2.SetCounterValue(0);
_buffers.counter.SetCounterValue(0);
// First stage postprocessing: detection data aggregation
var post1 = _resources.postprocess1;
post1.SetTexture(0, "Scores", _buffers.scores);
post1.SetTexture(0, "Boxes", _buffers.boxes);
post1.SetDimensions("InputSize", _buffers.boxes);
post1.SetFloat("Threshold", .5f);
post1.SetBuffer(0, "Output", _buffers.post1);
post1.SetBuffer(0, "OutputCount", _buffers.counter);
post1.DispatchThreadPerPixel(0, _buffers.boxes);
// Second stage postprocessing: overlap removal
var post2 = _resources.postprocess2;
post2.SetFloat("Threshold", 0.5f);
post2.SetBuffer(0, "Input", _buffers.post1);
post2.SetBuffer(0, "InputCount", _buffers.counter);
post2.SetBuffer(0, "Output", _buffers.post2);
post2.Dispatch(0, 1, 1, 1);
ComputeBuffer.CopyCount(_buffers.post2, _buffers.countRead, 0);
List<DetectedFace> tempFaces = new List<DetectedFace>();
foreach(var detection in _readCache.Cached){
Rect tempBbox = new Rect();
tempBbox.X = (uint)(detection.x2 - detection.x1) / 2;
tempBbox.Y = (uint)(detection.y2 - detection.y1) / 2;
tempBbox.Width = (uint)(detection.x2 - detection.x1);
tempBbox.Height = (uint)(detection.x2 - detection.x1);
tempFaces.Add(new DetectedFace(){bbox = tempBbox, confidence = detection.score});
}
_readCache.Invalidate();
return new DetectedFaces(){originalImageBitmap = returnFrame.bitmap, Faces = tempFaces.ToArray()};
}