#!/usr/bin/python
import os.path
import math
import sys
import timeit
import xdnn, xdnn_io
import numpy as np
import types
def main():
args = xdnn_io.processCommandLine()
ret = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", args['xlnxlib'])
if ret != 0:
sys.exit(1)
(weightsBlob, fcWeight, fcBias ) = xdnn_io.loadWeights( args )
for i in range(300):
(fpgaInputs, batch_sz) = xdnn_io.prepareInput( args )
fpgaOutput = xdnn_io.prepareOutput(args['fpgaoutsz'], batch_sz)
startTime = timeit.default_timer()
xdnn.execute(args['netcfg'],
weightsBlob, fpgaInputs, fpgaOutput,
batch_sz, # num batches
args['quantizecfg'], args['scaleB'], args['PE'])
elapsedTime = timeit.default_timer() - startTime
print "\nAfter FPGA (%f ms)" % (elapsedTime*1000)
startTime = timeit.default_timer()
if(fcWeight is None):
startTime = timeit.default_timer()
softmaxOut = xdnn.computeSoftmax(fpgaOutput, batch_sz)
elapsedTime = timeit.default_timer() - startTime
print "\nAfter Softmax (%f ms)" % (elapsedTime*1000)
else:
fcOut = xdnn.computeFC(fcWeight, fcBias, fpgaOutput,
batch_sz, args['outsz'], args['fpgaoutsz'], args['useblas'])
elapsedTime = timeit.default_timer() - startTime
print "\nAfter FC (%f ms)" % (elapsedTime*1000)
#for i in range(10):
# print "%f" % fpgaOutput[i],
startTime = timeit.default_timer()
softmaxOut = xdnn.computeSoftmax(fcOut, batch_sz)
elapsedTime = timeit.default_timer() - startTime
#print "\nAfter Softmax (%f ms)" % (elapsedTime*1000)
#for i in range(10):
# print "%f" % fpgaOutput[i],
xdnn_io.printClassification(softmaxOut, args);
if(i % 3 == 1):
args["images"] = ['./flower.jpg']
elif(i % 3 == 2):
args["images"] = ['./cat.jpg']
else:
args["images"] = ['./dog.jpg']
#print "\nSuccess!\n"
xdnn.closeHandle()
if __name__ == '__main__':
main()
I check fcWeight if it is None or not since there are some models that do not have fc (which is instead implemented by Conv layer).
python: xmlrt.cpp:106: std::pair<_cl_mem*, int> XComputeUnit::getCreateBuffer(void*, int, cl_mem_flags): Assertion `_numFreeMemSlots > 0' failed.