Skip to content

Commit a1883a8

Browse files
committed
add yolov3-spp tensorrt implement
1 parent a68ec82 commit a1883a8

18 files changed

Lines changed: 1702 additions & 0 deletions
40 KB
Binary file not shown.

yolov3-spp/CMakeLists.txt

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
cmake_minimum_required(VERSION 2.6)
2+
3+
project(yolov3-spp)
4+
5+
add_definitions(-std=c++11)
6+
7+
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
8+
set(CMAKE_CXX_STANDARD 11)
9+
set(CMAKE_BUILD_TYPE Debug)
10+
11+
find_package(CUDA REQUIRED)
12+
13+
set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)
14+
15+
include_directories(${PROJECT_SOURCE_DIR}/include)
16+
include_directories(/usr/local/cuda/targets/aarch64-linux/include)
17+
link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
18+
19+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
20+
21+
#cuda_add_library(leaky ${PROJECT_SOURCE_DIR}/leaky.cu)
22+
cuda_add_library(yololayer SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu)
23+
24+
find_package(OpenCV)
25+
include_directories(OpenCV_INCLUDE_DIRS)
26+
27+
add_executable(yolov3-spp ${PROJECT_SOURCE_DIR}/plugin_factory.cpp ${PROJECT_SOURCE_DIR}/yolov3-spp.cpp)
28+
target_link_libraries(yolov3-spp nvinfer nvinfer_plugin)
29+
target_link_libraries(yolov3-spp cudart)
30+
target_link_libraries(yolov3-spp yololayer)
31+
target_link_libraries(yolov3-spp ${OpenCV_LIBS})
32+
33+
add_definitions(-O2 -pthread)
34+

yolov3-spp/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# yolov3-spp
2+
3+
For the Pytorch implementation, you can refer to [ultralytics/yolov3](https://github.com/ultralytics/yolov3)
4+
5+
Following tricks are used in this yolov3-spp:
6+
7+
- Yololayer plugin is different from the plugin used in [yolov3](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov3). In this version, I reimplement the calculation of three yololayer plugins into one to improve speed. And the yololayer detect outputs are limited to maxmium 1000. The first number of output is number of targets in current image.
8+
- Batchnorm layer, implemented by scale layer.
9+
10+
11+
12+
## Excute:
13+
14+
```
15+
// 1. generate yolov3-spp_ultralytics68.wts from pytorch implementation with yolov3-spp.cfg and ultralytics68.pt
16+
17+
// 2. put yolov3-spp_ultralytics68.wts into yolov3-spp
18+
19+
// 3. build and run
20+
21+
cd yolov3-spp
22+
23+
mkdir build
24+
25+
cd build
26+
27+
cmake ..
28+
29+
make
30+
31+
sudo ./yolov3-spp -s ../samples // serialize model to plan file i.e. 'yolov3-spp.engine'
32+
33+
sudo ./yolov3-spp -d ../samples // deserialize plan file and run inference
34+
35+
// 4. see if the output is same as pytorch implementation, and see the detect result in build
36+
```

yolov3-spp/Utils.h

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#ifndef __TRT_UTILS_H_
2+
#define __TRT_UTILS_H_
3+
4+
#include <iostream>
5+
#include <vector>
6+
#include <algorithm>
7+
#include <cudnn.h>
8+
9+
#ifndef CUDA_CHECK
10+
11+
#define CUDA_CHECK(callstr) \
12+
{ \
13+
cudaError_t error_code = callstr; \
14+
if (error_code != cudaSuccess) { \
15+
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
16+
assert(0); \
17+
} \
18+
}
19+
20+
#endif
21+
22+
namespace Tn
23+
{
24+
class Profiler : public nvinfer1::IProfiler
25+
{
26+
public:
27+
void printLayerTimes(int itrationsTimes)
28+
{
29+
float totalTime = 0;
30+
for (size_t i = 0; i < mProfile.size(); i++)
31+
{
32+
printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
33+
totalTime += mProfile[i].second;
34+
}
35+
printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
36+
}
37+
private:
38+
typedef std::pair<std::string, float> Record;
39+
std::vector<Record> mProfile;
40+
41+
virtual void reportLayerTime(const char* layerName, float ms)
42+
{
43+
auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
44+
if (record == mProfile.end())
45+
mProfile.push_back(std::make_pair(layerName, ms));
46+
else
47+
record->second += ms;
48+
}
49+
};
50+
51+
//Logger for TensorRT info/warning/errors
52+
class Logger : public nvinfer1::ILogger
53+
{
54+
public:
55+
56+
Logger(): Logger(Severity::kWARNING) {}
57+
58+
Logger(Severity severity): reportableSeverity(severity) {}
59+
60+
void log(Severity severity, const char* msg) override
61+
{
62+
// suppress messages with severity enum value greater than the reportable
63+
if (severity > reportableSeverity) return;
64+
65+
switch (severity)
66+
{
67+
case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
68+
case Severity::kERROR: std::cerr << "ERROR: "; break;
69+
case Severity::kWARNING: std::cerr << "WARNING: "; break;
70+
case Severity::kINFO: std::cerr << "INFO: "; break;
71+
default: std::cerr << "UNKNOWN: "; break;
72+
}
73+
std::cerr << msg << std::endl;
74+
}
75+
76+
Severity reportableSeverity{Severity::kWARNING};
77+
};
78+
79+
template<typename T>
80+
void write(char*& buffer, const T& val)
81+
{
82+
*reinterpret_cast<T*>(buffer) = val;
83+
buffer += sizeof(T);
84+
}
85+
86+
template<typename T>
87+
void read(const char*& buffer, T& val)
88+
{
89+
val = *reinterpret_cast<const T*>(buffer);
90+
buffer += sizeof(T);
91+
}
92+
}
93+
94+
#endif

yolov3-spp/YoloConfigs.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#ifndef _YOLO_CONFIGS_H_
2+
#define _YOLO_CONFIGS_H_
3+
4+
5+
namespace Yolo
6+
{
7+
static constexpr int CHECK_COUNT = 3;
8+
static constexpr float IGNORE_THRESH = 0.5f;
9+
static constexpr int CLASS_NUM = 80;
10+
static constexpr int INPUT_H = 256;
11+
static constexpr int INPUT_W = 416;
12+
13+
struct YoloKernel
14+
{
15+
int width;
16+
int height;
17+
float anchors[CHECK_COUNT*2];
18+
};
19+
20+
//YOLO 608
21+
YoloKernel yolo1 = {
22+
13,
23+
8,
24+
{116,90, 156,198, 373,326}
25+
};
26+
YoloKernel yolo2 = {
27+
26,
28+
16,
29+
{30,61, 62,45, 59,119}
30+
};
31+
YoloKernel yolo3 = {
32+
52,
33+
32,
34+
{10,13, 16,30, 33,23}
35+
};
36+
37+
//YOLO 416
38+
// YoloKernel yolo1 = {
39+
// 13,
40+
// 13,
41+
// {116,90, 156,198, 373,326}
42+
// };
43+
// YoloKernel yolo2 = {
44+
// 26,
45+
// 26,
46+
// {30,61, 62,45, 59,119}
47+
// };
48+
// YoloKernel yolo3 = {
49+
// 52,
50+
// 52,
51+
// {10,13, 16,30, 33,23}
52+
// };
53+
}
54+
55+
#endif

0 commit comments

Comments
 (0)