Skip to content

Commit

Permalink
Merge pull request #338 from Wwupup/master
Browse files Browse the repository at this point in the history
version -> v0.0.3
  • Loading branch information
ShiqiYu committed Sep 25, 2022
2 parents 14abaa7 + 93992f9 commit 24bc540
Show file tree
Hide file tree
Showing 6 changed files with 680 additions and 840 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ if (BUILD_SHARED_LIBS)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
endif()

SET(BUILD_VERSION "v0.0.1")
SET(BUILD_VERSION "v0.0.3")
# Find Git Version Patch
IF(EXISTS "${CMAKE_SOURCE_DIR}/.git")
if(NOT GIT)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ You can also compile the source code to a static or dynamic library, and then us


## Performance on WIDER Face
Run on default settings: scales=[1.], confidence_threshold=0.3, floating point:
Run on default settings: scales=[1.], confidence_threshold=0.02, floating point:
```
AP_easy=0.856, AP_medium=0.842, AP_hard=0.727
AP_easy=0.887, AP_medium=0.871, AP_hard=0.768
```

## Author
Expand Down
319 changes: 165 additions & 154 deletions src/facedetectcnn-data.cpp

Large diffs are not rendered by default.

239 changes: 65 additions & 174 deletions src/facedetectcnn-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ are permitted provided that the following conditions are met:
* Neither the names of the copyright holders nor the names of the contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
without specific prior written permission.
This software is provided by the copyright holders and contributors "as is" and
any express or implied warranties, including, but not limited to, the implied
Expand All @@ -38,9 +38,11 @@ the use of this software, even if advised of the possibility of such damage.


#include "facedetectcnn.h"
#include <iostream>
#include <stdio.h>
#include <string.h>


#if 0
#include <opencv2/opencv.hpp>
cv::TickMeter cvtm;
Expand All @@ -52,7 +54,7 @@ cv::TickMeter cvtm;
#endif


#define NUM_CONV_LAYER 43
#define NUM_CONV_LAYER 53

extern ConvInfoStruct param_pConvInfo[NUM_CONV_LAYER];
Filters<float> g_pFilters[NUM_CONV_LAYER];
Expand All @@ -63,27 +65,12 @@ void init_parameters()
{
for(int i = 0; i < NUM_CONV_LAYER; i++)
g_pFilters[i] = param_pConvInfo[i];
transFilter(g_pFilters[0]);

}

vector<FaceRect> objectdetect_cnn(unsigned char * rgbImageData, int width, int height, int step)
{
CDataBlob<float> dataBlobs[21];
CDataBlob<float> conv3priorbox, conv4priorbox, conv5priorbox, conv6priorbox;
CDataBlob<float> conv3priorbox_flat, conv4priorbox_flat, conv5priorbox_flat, conv6priorbox_flat, mbox_priorbox;

CDataBlob<float> conv3_loc, conv3_conf, conv3_iou;
CDataBlob<float> conv3_loc_flat, conv3_conf_flat, conv3_iou_flat;

CDataBlob<float> conv4_loc, conv4_conf, conv4_iou;
CDataBlob<float> conv4_loc_flat, conv4_conf_flat, conv4_iou_flat;

CDataBlob<float> conv5_loc, conv5_conf, conv5_iou;
CDataBlob<float> conv5_loc_flat, conv5_conf_flat, conv5_iou_flat;

CDataBlob<float> conv6_loc, conv6_conf, conv6_iou;
CDataBlob<float> conv6_loc_flat, conv6_conf_flat, conv6_iou_flat;

CDataBlob<float> mbox_loc, mbox_conf, mbox_iou;

TIME_START;
if (!param_initialized)
Expand All @@ -93,228 +80,132 @@ vector<FaceRect> objectdetect_cnn(unsigned char * rgbImageData, int width, int h
}
TIME_END("init");


TIME_START;
dataBlobs[0].setDataFrom3x3S2P1to1x1S1P0FromImage(rgbImageData, width, height, 3, step);
auto fx = setDataFrom3x3S2P1to1x1S1P0FromImage(rgbImageData, width, height, 3, step);
TIME_END("convert data");

/***************CONV0*********************/
TIME_START;
convolution(dataBlobs[0], g_pFilters[0], dataBlobs[1]);
fx = convolution(fx, g_pFilters[0]);
TIME_END("conv_head");

TIME_START;
convolutionDP(dataBlobs[1], g_pFilters[1], g_pFilters[2], dataBlobs[2]);
fx = convolutionDP(fx, g_pFilters[1], g_pFilters[2]);
TIME_END("conv0");

TIME_START;
maxpooling2x2S2(dataBlobs[2], dataBlobs[3]);
fx = maxpooling2x2S2(fx);
TIME_END("pool0");

/***************CONV1*********************/
TIME_START;
convolution4layerUnit(dataBlobs[3], g_pFilters[3], g_pFilters[4], g_pFilters[5], g_pFilters[6], dataBlobs[4]);
fx = convolution4layerUnit(fx, g_pFilters[3], g_pFilters[4], g_pFilters[5], g_pFilters[6]);
TIME_END("conv1");

/***************CONV2*********************/
TIME_START;
convolution4layerUnit(dataBlobs[4], g_pFilters[7], g_pFilters[8], g_pFilters[9], g_pFilters[10], dataBlobs[5]);
fx = convolution4layerUnit(fx, g_pFilters[7], g_pFilters[8], g_pFilters[9], g_pFilters[10]);
TIME_END("conv2");

/***************CONV3*********************/
TIME_START;
maxpooling2x2S2(dataBlobs[5], dataBlobs[6]);
fx = maxpooling2x2S2(fx);
TIME_END("pool3");

TIME_START;
convolution4layerUnit(dataBlobs[6], g_pFilters[11], g_pFilters[12], g_pFilters[13], g_pFilters[14], dataBlobs[7]);
auto fb1 = convolution4layerUnit(fx, g_pFilters[11], g_pFilters[12], g_pFilters[13], g_pFilters[14]);
TIME_END("conv3");

/***************CONV4*********************/
TIME_START;
maxpooling2x2S2(dataBlobs[7], dataBlobs[8]);
fx = maxpooling2x2S2(fb1);
TIME_END("pool4");

TIME_START;
convolution4layerUnit(dataBlobs[8], g_pFilters[15], g_pFilters[16], g_pFilters[17], g_pFilters[18], dataBlobs[9]);
auto fb2 = convolution4layerUnit(fx, g_pFilters[15], g_pFilters[16], g_pFilters[17], g_pFilters[18]);
TIME_END("conv4");

/***************CONV5*********************/
TIME_START;
maxpooling2x2S2(dataBlobs[9], dataBlobs[10]);
fx = maxpooling2x2S2(fb2);
TIME_END("pool5");
TIME_START;
convolution4layerUnit(dataBlobs[10], g_pFilters[19], g_pFilters[20], g_pFilters[21], g_pFilters[22], dataBlobs[11]);
TIME_END("conv5");

/***************CONV6*********************/
TIME_START;
maxpooling2x2S2(dataBlobs[11], dataBlobs[12]);
TIME_END("pool6");
TIME_START;
convolution4layerUnit(dataBlobs[12], g_pFilters[23], g_pFilters[24], g_pFilters[25], g_pFilters[26], dataBlobs[13]);
TIME_END("conv6");

/***************branch6*********************/
TIME_START;
convolutionDP(dataBlobs[13], g_pFilters[39], g_pFilters[40], dataBlobs[14]);
convolutionDP(dataBlobs[14], g_pFilters[41], g_pFilters[42], dataBlobs[15], false);
// convolution4layerUnit(dataBlobs[7], g_pFilters[27], g_pFilters[28], g_pFilters[29], g_pFilters[30], dataBlobs[14], false);
TIME_END("branch6");

/*****************add6*********************/
TIME_START;
upsamplex2withadd(dataBlobs[14], dataBlobs[11]);
TIME_END("add6");
auto fb3 = convolution4layerUnit(fx, g_pFilters[19], g_pFilters[20], g_pFilters[21], g_pFilters[22]);
TIME_END("conv5");

CDataBlob<float> pred_reg[3], pred_cls[3], pred_kps[3], pred_obj[3];
/***************branch5*********************/
TIME_START;
convolutionDP(dataBlobs[11], g_pFilters[35], g_pFilters[36], dataBlobs[16]);
convolutionDP(dataBlobs[16], g_pFilters[37], g_pFilters[38], dataBlobs[17], false);
fb3 = convolutionDP(fb3, g_pFilters[27], g_pFilters[28]);
pred_cls[2] = convolutionDP(fb3, g_pFilters[33], g_pFilters[34], false);
pred_reg[2] = convolutionDP(fb3, g_pFilters[39], g_pFilters[40], false);
pred_kps[2] = convolutionDP(fb3, g_pFilters[51], g_pFilters[52], false);
pred_obj[2] = convolutionDP(fb3, g_pFilters[45], g_pFilters[46], false);
TIME_END("branch5");

/*****************add5*********************/
/*****************add5*********************/
TIME_START;
upsamplex2withadd(dataBlobs[16], dataBlobs[9]);
fb2 = elementAdd(upsampleX2(fb3), fb2);
TIME_END("add5");

/***************branch4*********************/
/*****************add6*********************/
TIME_START;
convolutionDP(dataBlobs[9], g_pFilters[31], g_pFilters[32], dataBlobs[18]);
convolutionDP(dataBlobs[18], g_pFilters[33], g_pFilters[34], dataBlobs[19], false);
fb2 = convolutionDP(fb2, g_pFilters[25], g_pFilters[26]);
pred_cls[1] = convolutionDP(fb2, g_pFilters[31], g_pFilters[32], false);
pred_reg[1] = convolutionDP(fb2, g_pFilters[37], g_pFilters[38], false);
pred_kps[1] = convolutionDP(fb2, g_pFilters[49], g_pFilters[50], false);
pred_obj[1] = convolutionDP(fb2, g_pFilters[43], g_pFilters[44], false);
TIME_END("branch4");

/*****************add4*********************/
TIME_START;
upsamplex2withadd(dataBlobs[18], dataBlobs[7]);
fb1 = elementAdd(upsampleX2(fb2), fb1);
TIME_END("add4");

/***************branch3*********************/
TIME_START;
convolution4layerUnit(dataBlobs[7], g_pFilters[27], g_pFilters[28], g_pFilters[29], g_pFilters[30], dataBlobs[20], false);
fb1 = convolutionDP(fb1, g_pFilters[23], g_pFilters[24]);
pred_cls[0] = convolutionDP(fb1, g_pFilters[29], g_pFilters[30], false);
pred_reg[0] = convolutionDP(fb1, g_pFilters[35], g_pFilters[36], false);
pred_kps[0] = convolutionDP(fb1, g_pFilters[47], g_pFilters[48], false);
pred_obj[0] = convolutionDP(fb1, g_pFilters[41], g_pFilters[42], false);
TIME_END("branch3");


/***************PRIORBOX*********************/
TIME_START;
float pSizes3[3] = {10, 16, 24};
priorbox(dataBlobs[7].cols, dataBlobs[7].rows, width, height, 8, 3, pSizes3, conv3priorbox);
TIME_END("prior3");
auto prior3 = meshgrid(fb1.cols, fb1.rows, 8);
auto prior4 = meshgrid(fb2.cols, fb2.rows, 16);
auto prior5 = meshgrid(fb3.cols, fb3.rows, 32);
TIME_END("prior");
/***************PRIORBOX*********************/

TIME_START;
float pSizes4[2] = { 32, 48};
priorbox(dataBlobs[9].cols, dataBlobs[9].rows, width, height, 16, 2, pSizes4, conv4priorbox);
TIME_END("prior4");
bbox_decode(pred_reg[0], prior3, 8);
bbox_decode(pred_reg[1], prior4, 16);
bbox_decode(pred_reg[2], prior5, 32);

TIME_START;
float pSizes5[2] = { 64, 96 };
priorbox(dataBlobs[11].cols, dataBlobs[11].rows, width, height, 32, 2, pSizes5, conv5priorbox);
TIME_END("prior5");
kps_decode(pred_kps[0], prior3, 8);
kps_decode(pred_kps[1], prior4, 16);
kps_decode(pred_kps[2], prior5, 32);

TIME_START;
float pSizes6[3] = { 128, 192, 256 };
priorbox(dataBlobs[13].cols, dataBlobs[13].rows, width, height, 64, 3, pSizes6, conv6priorbox);
TIME_END("prior6");
auto cls = concat3(blob2vector(pred_cls[0]), blob2vector(pred_cls[1]), blob2vector(pred_cls[2]));
auto reg = concat3(blob2vector(pred_reg[0]), blob2vector(pred_reg[1]), blob2vector(pred_reg[2]));
auto kps = concat3(blob2vector(pred_kps[0]), blob2vector(pred_kps[1]), blob2vector(pred_kps[2]));
auto obj = concat3(blob2vector(pred_obj[0]), blob2vector(pred_obj[1]), blob2vector(pred_obj[2]));

/***************PRIORBOX*********************/
TIME_START;
blob2vector(conv3priorbox, conv3priorbox_flat);
extract(dataBlobs[20], conv3_loc, conv3_conf, conv3_iou, 3);
blob2vector(conv3_loc, conv3_loc_flat);
blob2vector(conv3_conf, conv3_conf_flat);
blob2vector(conv3_iou, conv3_iou_flat);

blob2vector(conv4priorbox, conv4priorbox_flat);
extract(dataBlobs[19], conv4_loc, conv4_conf, conv4_iou, 2);
blob2vector(conv4_loc, conv4_loc_flat);
blob2vector(conv4_conf, conv4_conf_flat);
blob2vector(conv4_iou, conv4_iou_flat);

blob2vector(conv5priorbox, conv5priorbox_flat);
extract(dataBlobs[17], conv5_loc, conv5_conf, conv5_iou, 2);
blob2vector(conv5_loc, conv5_loc_flat);
blob2vector(conv5_conf, conv5_conf_flat);
blob2vector(conv5_iou, conv5_iou_flat);

blob2vector(conv6priorbox, conv6priorbox_flat);
extract(dataBlobs[15], conv6_loc, conv6_conf, conv6_iou, 3);
blob2vector(conv6_loc, conv6_loc_flat);
blob2vector(conv6_conf, conv6_conf_flat);
blob2vector(conv6_iou, conv6_iou_flat);
TIME_END("prior flat");


TIME_START
concat4(conv3priorbox_flat, conv4priorbox_flat, conv5priorbox_flat, conv6priorbox_flat, mbox_priorbox);
concat4(conv3_loc_flat, conv4_loc_flat, conv5_loc_flat, conv6_loc_flat, mbox_loc);
concat4(conv3_conf_flat, conv4_conf_flat, conv5_conf_flat, conv6_conf_flat, mbox_conf);
concat4(conv3_iou_flat, conv4_iou_flat, conv5_iou_flat, conv6_iou_flat, mbox_iou);
TIME_END("concat prior")

TIME_START
softmax1vector2class(mbox_conf);
clamp1vector(mbox_iou);
TIME_END("softmax")

CDataBlob<float> facesInfo;
TIME_START;
detection_output(mbox_priorbox, mbox_loc, mbox_conf, mbox_iou, 0.3f, 0.5f, 1000, 100, facesInfo);
TIME_END("detection output")
sigmoid(cls);
sigmoid(obj);
TIME_END("decode")

TIME_START;
std::vector<FaceRect> faces;
for (int i = 0; i < facesInfo.cols; i++)
{
float * pFaceData = facesInfo.ptr(0,i);
float score = pFaceData[0];
float bbxmin = pFaceData[1];
float bbymin = pFaceData[2];
float bbxmax = pFaceData[3];
float bbymax = pFaceData[4];
FaceRect r;
r.score = score;
r.x = int(bbxmin * width + 0.5f);
r.y = int(bbymin * height + 0.5f);
r.w = int((bbxmax - bbxmin) * width + 0.5f);
r.h = int((bbymax - bbymin) * height + 0.5f);

//// convert the rectangle to a square
//r.w = int( ((bbxmax - bbxmin) * width + (bbymax - bbymin) * height + 1) / 2);
//r.h = r.w;
//r.x = int(((bbxmin + bbxmax) * width - r.w + 0.5f) / 2);
//r.y = int(((bbymin + bbymax) * height - r.h + 0.5f) / 2);

for (int lmidx = 0; lmidx < 5; lmidx++)
{
r.lm[lmidx * 2] = int(pFaceData[5 + lmidx * 2] * width + 0.5f);
r.lm[lmidx * 2 + 1] = int(pFaceData[5 + lmidx * 2 + 1] * height + 0.5f);
}

faces.push_back(r);
}
TIME_END("copy result");

// int ii = 2;
// cv::Mat m1(dataBlobs[ii].rows, dataBlobs[ii].cols, CV_32FC1);
// for(int r=0; r < m1.rows; r++)
// {
// float * p = (float*)m1.ptr(r);
// for(int c=0; c < m1.cols; c++)
// p[c]=(dataBlobs[ii].getElement(r, c, 0));
// }
// cv::imshow("x1", m1);
// cv::Mat m2(dataBlobs[ii].rows, dataBlobs[ii].cols, CV_32FC1);
// for(int r=0; r < m2.rows; r++)
// {
// float * p = (float*)m2.ptr(r);
// for(int c=0; c < m2.cols; c++)
// p[c]=(dataBlobs[ii].getElement(r, c, 31));
// }
// cv::imshow("x2", m2);
// cv::waitKey(0);

return faces;
std::vector<FaceRect> facesInfo = detection_output(cls, reg, kps, obj, 0.3f, 0.5f, 1000, 100);
TIME_END("detection output")
return facesInfo;
}

int * facedetect_cnn(unsigned char * result_buffer, //buffer memory for storing face detection results, !!its size must be 0x20000 Bytes!!
int* facedetect_cnn(unsigned char * result_buffer, //buffer memory for storing face detection results, !!its size must be 0x20000 Bytes!!
unsigned char * rgb_image_data, int width, int height, int step) //input image, it must be RGB (three-channel) image!
{

Expand Down Expand Up @@ -342,7 +233,7 @@ int * facedetect_cnn(unsigned char * result_buffer, //buffer memory for storing
{
//copy data
short * p = ((short*)(result_buffer + 4)) + 142 * size_t(i);
p[0] = (short)(faces[i].score * faces[i].score * 100);
p[0] = (short)(faces[i].score * 100);
p[1] = (short)faces[i].x;
p[2] = (short)faces[i].y;
p[3] = (short)faces[i].w;
Expand Down
Loading

0 comments on commit 24bc540

Please sign in to comment.