Skip to content

Commit 1385fd8

Browse files
committed
update readme, yolov3 migated to trt7
1 parent f4e9063 commit 1385fd8

17 files changed

Lines changed: 1251 additions & 916 deletions

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ All the models are implemented in pytorch first, and export a weights file xxx.w
1212

1313
- `22 May 2020`. A new branch [trt4](https://github.com/wang-xinyu/tensorrtx/tree/trt4) created, which is using TensorRT 4 API. Now the master branch is using TensorRT 7 API. But only `yolov4` has been migrated to TensorRT 7 API for now. The rest will be migrated soon. And a tutorial for `migarating from TensorRT 4 to 7` provided.
1414
- `28 May 2020`. arcface LResNet50E-IR model from [deepinsight/insightface](https://github.com/deepinsight/insightface) implemented. We got 333fps on GTX1080.
15+
- `2 June 2020`. yolov3 and yolov3-spp migrated to TensorRT 7 API. The new yolov3 is using pytorch implementation [ultralytics/yolov3](https://github.com/ultralytics/yolov3), the yolov3 in branch `trt4` was using pytorch implementation [ayooshkathuria/pytorch-yolo-v3](https://github.com/ayooshkathuria/pytorch-yolo-v3).
1516

1617
## Tutorials
1718

@@ -44,8 +45,8 @@ Following models are implemented.
4445
|[shufflenet](./shufflenetv2)| ShuffleNetV2 with 0.5x output channels |
4546
|[squeezenet](./squeezenet)| SqueezeNet 1.1 model |
4647
|[vgg](./vgg)| VGG 11-layer model |
47-
|[yolov3](./yolov3)| darknet-53, weights from yolov3 authors, pytorch implementation from [ayooshkathuria/pytorch-yolo-v3](https://github.com/ayooshkathuria/pytorch-yolo-v3) |
48-
|[yolov3-spp](./yolov3-spp)| darknet-53, weights from [ultralytics/yolov3](https://github.com/ultralytics/yolov3) |
48+
|[yolov3](./yolov3)| darknet-53, weights and pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3) |
49+
|[yolov3-spp](./yolov3-spp)| darknet-53, weights and pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3) |
4950
|[yolov4](./yolov4)| CSPDarknet53, weights from [AlexeyAB/darknet](https://github.com/AlexeyAB/darknet#pre-trained-models), pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3) |
5051
|[retinaface](./retinaface)| resnet-50, weights from [biubug6/Pytorch_Retinaface](https://github.com/biubug6/Pytorch_Retinaface) |
5152
|[arcface](./arcface)| LResNet50E-IR, weights from [deepinsight/insightface](https://github.com/deepinsight/insightface) |
@@ -63,8 +64,8 @@ Some tricky operations encountered in these models, already solved, but might ha
6364
|torch.chunk()| implement the 'chunk(2, dim=C)' by tensorrt plugin, see shufflenet. |
6465
|channel shuffle| use two shuffle layers to implement `channel_shuffle`, see shufflenet. |
6566
|adaptive pool| use fixed input dimension, and use regular average pooling, see shufflenet. |
66-
|leaky relu| I wrote a leaky relu plugin, but PRelu in `NvInferPlugin.h` can be used, see yolov3. |
67-
|yolo layer v1| yolo layer is implemented as a plugin, see yolov3. |
67+
|leaky relu| I wrote a leaky relu plugin, but PRelu in `NvInferPlugin.h` can be used, see yolov3 in branch `trt4`. |
68+
|yolo layer v1| yolo layer is implemented as a plugin, see yolov3 in branch `trt4`. |
6869
|yolo layer v2| three yolo layers implemented in one plugin, see yolov3-spp. |
6970
|upsample| replaced by a deconvolution layer, see yolov3. |
7071
|hsigmoid| hard sigmoid is implemented as a plugin, hsigmoid and hswish are used in mobilenetv3 |
@@ -76,7 +77,7 @@ Some tricky operations encountered in these models, already solved, but might ha
7677

7778
| Models | Device | BatchSize | Mode | Input Shape(HxW) | FPS |
7879
|-|-|:-:|:-:|:-:|:-:|
79-
| YOLOv3(darknet53) | Xavier | 1 | FP16 | 320x320 | 55 |
80+
| YOLOv3(darknet53) | Xeon E5-2620/GTX1080 | 1 | FP16 | 608x608 | 39.2 |
8081
| YOLOv3-spp(darknet53) | Xeon E5-2620/GTX1080 | 1 | FP32 | 256x416 | 94 |
8182
| YOLOv3-spp(darknet53) | Xeon E5-2620/GTX1080 | 1 | FP16 | 608x608 | 38.5 |
8283
| YOLOv4(CSPDarknet53) | Xeon E5-2620/GTX1080 | 1 | FP16 | 608x608 | 35.7 |

yolov3-spp/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,5 +56,5 @@ sudo ./yolov3-spp -d ../samples // deserialize plan file and run inference, the
5656

5757
## More Information
5858

59-
See the [readme](../README.md) in home page
59+
See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)
6060

yolov3/CMakeLists.txt

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,30 @@ find_package(CUDA REQUIRED)
1313
set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)
1414

1515
include_directories(${PROJECT_SOURCE_DIR}/include)
16-
include_directories(/usr/local/cuda-9.0/targets/aarch64-linux/include)
17-
link_directories(/usr/local/cuda-9.0/targets/aarch64-linux/lib)
16+
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
17+
message("embed_platform on")
18+
include_directories(/usr/local/cuda/targets/aarch64-linux/include)
19+
link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
20+
else()
21+
message("embed_platform off")
22+
include_directories(/usr/local/cuda/include)
23+
link_directories(/usr/local/cuda/lib64)
24+
endif()
25+
1826

1927
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
2028

2129
#cuda_add_library(leaky ${PROJECT_SOURCE_DIR}/leaky.cu)
22-
cuda_add_library(yololayer ${PROJECT_SOURCE_DIR}/yololayer.cu)
30+
cuda_add_library(yololayer SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu)
2331

2432
find_package(OpenCV)
2533
include_directories(OpenCV_INCLUDE_DIRS)
2634

27-
add_executable(yolov3 ${PROJECT_SOURCE_DIR}/plugin_factory.cpp ${PROJECT_SOURCE_DIR}/yolov3.cpp)
28-
target_link_libraries(yolov3 nvinfer nvinfer_plugin)
35+
add_executable(yolov3 ${PROJECT_SOURCE_DIR}/yolov3.cpp)
36+
target_link_libraries(yolov3 nvinfer)
2937
target_link_libraries(yolov3 cudart)
3038
target_link_libraries(yolov3 yololayer)
31-
target_link_libraries(yolov3 ${OpenCV_LIBRARIES})
39+
target_link_libraries(yolov3 ${OpenCV_LIBS})
3240

3341
add_definitions(-O2 -pthread)
3442

yolov3/README.md

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,55 @@
1-
# yolo v3
1+
# yolov3
22

3-
Thanks to Ayoosh Kathuria, for his remarkable tutorials of yolov3. The github link is [ayooshkathuria/pytorch-yolo-v3](https://github.com/ayooshkathuria/pytorch-yolo-v3).
3+
The Pytorch implementation is [ultralytics/yolov3](https://github.com/ultralytics/yolov3). It provides two trained weights of yolov3, `yolov3.weights` and `yolov3.pt`
44

5-
I forked his github repo, and implement inference_on_one_pic and export weights for tensorrt. You can refer to
5+
This branch is using tensorrt7 API, there is also a yolov3 implementation using tensorrt4 API, go to [branch trt4/yolov3](https://github.com/wang-xinyu/tensorrtx/tree/trt4/yolov3), which is using [ayooshkathuria/pytorch-yolo-v3](https://github.com/ayooshkathuria/pytorch-yolo-v3).
66

7-
[pytorchx/pytorch-yolo-v3](https://github.com/wang-xinyu/pytorchx/tree/master/pytorch-yolo-v3)
8-
9-
Following tricks are used in this yolov3,
10-
11-
- I wrote a leaky relu plugin(leaky.cu leaky.cuh leakyplugin.cpp leakyplugin.h) in the beginning, but I found there is PRelu in `NvInferPlugin.h`.
12-
- yolo layer is implemented as a plugin. I learn a lot from [lewes6369/TensorRT-Yolov3](https://github.com/lewes6369/TensorRT-Yolov3).
13-
- upsample layer is replaced by a deconvolution layer.
14-
- Batchnorm layer, implemented by scale layer.
15-
16-
For FP16 mode, just need add one line `builder->setFp16Mode(true);`. On my TX1, it's 115ms in fp16, while 145ms in fp32.
7+
## Excute:
178

189
```
19-
// 1. generate yolov3.wts from [pytorchx/pytorch-yolo-v3](https://github.com/wang-xinyu/pytorchx/tree/master/pytorch-yolo-v3)
20-
21-
// 2. put yolov3.wts into tensorrtx/yolov3
10+
1. generate yolov3.wts from pytorch implementation with yolov3.cfg and yolov3.weights
2211
23-
// 3. build and run
12+
git clone https://github.com/wang-xinyu/tensorrtx.git
13+
git clone https://github.com/ultralytics/yolov3.git
14+
// download its weights 'yolov3.pt' or 'yolov3.weights'
15+
cd yolov3
16+
cp ../tensorrtx/yolov3/gen_wts.py .
17+
python gen_wts.py yolov3.weights
18+
// a file 'yolov3.wts' will be generated.
19+
// the master branch of yolov3 should work, if not, you can checkout cf7a4d31d37788023a9186a1a143a2dab0275ead
2420
25-
cd tensorrtx/yolov3
21+
2. put yolov3.wts into tensorrtx/yolov3, build and run
2622
23+
mv yolov3.wts ../tensorrtx/yolov3/
24+
cd ../tensorrtx/yolov3
2725
mkdir build
28-
2926
cd build
30-
3127
cmake ..
32-
3328
make
29+
sudo ./yolov3 -s // serialize model to plan file i.e. 'yolov3.engine'
30+
sudo ./yolov3 -d ../../yolov3-spp/samples // deserialize plan file and run inference, the images in samples will be processed.
3431
35-
sudo ./yolov3 -s // serialize model to plan file i.e. 'yolov3.engine'
36-
sudo ./yolov3 -d // deserialize plan file and run inference
37-
38-
// 4. see if the output is same as pytorchx/pytorch-yolo-v3
32+
3. check the images generated, as follows. _zidane.jpg and _bus.jpg
3933
```
4034

35+
<p align="center">
36+
<img src="https://user-images.githubusercontent.com/15235574/78247927-4d9fac00-751e-11ea-8b1b-704a0aeb3fcf.jpg">
37+
</p>
38+
39+
<p align="center">
40+
<img src="https://user-images.githubusercontent.com/15235574/78247970-60b27c00-751e-11ea-88df-41473fed4823.jpg">
41+
</p>
42+
43+
## Config
44+
45+
- Input shape defined in yololayer.h
46+
- Number of classes defined in yololayer.h
47+
- FP16/FP32 can be selected by the macro in yolov3.cpp
48+
- GPU id can be selected by the macro in yolov3.cpp
49+
- NMS thresh in yolov3.cpp
50+
- BBox confidence thresh in yolov3.cpp
51+
52+
## More Information
53+
54+
See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)
4155

yolov3/Utils.h

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#ifndef __TRT_UTILS_H_
2+
#define __TRT_UTILS_H_
3+
4+
#include <iostream>
5+
#include <vector>
6+
#include <algorithm>
7+
#include <cudnn.h>
8+
9+
#ifndef CUDA_CHECK
10+
11+
#define CUDA_CHECK(callstr) \
12+
{ \
13+
cudaError_t error_code = callstr; \
14+
if (error_code != cudaSuccess) { \
15+
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
16+
assert(0); \
17+
} \
18+
}
19+
20+
#endif
21+
22+
namespace Tn
23+
{
24+
class Profiler : public nvinfer1::IProfiler
25+
{
26+
public:
27+
void printLayerTimes(int itrationsTimes)
28+
{
29+
float totalTime = 0;
30+
for (size_t i = 0; i < mProfile.size(); i++)
31+
{
32+
printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
33+
totalTime += mProfile[i].second;
34+
}
35+
printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
36+
}
37+
private:
38+
typedef std::pair<std::string, float> Record;
39+
std::vector<Record> mProfile;
40+
41+
virtual void reportLayerTime(const char* layerName, float ms)
42+
{
43+
auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
44+
if (record == mProfile.end())
45+
mProfile.push_back(std::make_pair(layerName, ms));
46+
else
47+
record->second += ms;
48+
}
49+
};
50+
51+
//Logger for TensorRT info/warning/errors
52+
class Logger : public nvinfer1::ILogger
53+
{
54+
public:
55+
56+
Logger(): Logger(Severity::kWARNING) {}
57+
58+
Logger(Severity severity): reportableSeverity(severity) {}
59+
60+
void log(Severity severity, const char* msg) override
61+
{
62+
// suppress messages with severity enum value greater than the reportable
63+
if (severity > reportableSeverity) return;
64+
65+
switch (severity)
66+
{
67+
case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
68+
case Severity::kERROR: std::cerr << "ERROR: "; break;
69+
case Severity::kWARNING: std::cerr << "WARNING: "; break;
70+
case Severity::kINFO: std::cerr << "INFO: "; break;
71+
default: std::cerr << "UNKNOWN: "; break;
72+
}
73+
std::cerr << msg << std::endl;
74+
}
75+
76+
Severity reportableSeverity{Severity::kWARNING};
77+
};
78+
79+
template<typename T>
80+
void write(char*& buffer, const T& val)
81+
{
82+
*reinterpret_cast<T*>(buffer) = val;
83+
buffer += sizeof(T);
84+
}
85+
86+
template<typename T>
87+
void read(const char*& buffer, T& val)
88+
{
89+
val = *reinterpret_cast<const T*>(buffer);
90+
buffer += sizeof(T);
91+
}
92+
}
93+
94+
#endif

0 commit comments

Comments
 (0)