float123
diff --git a/‎yolov3-spp/.yolov3-spp-1cls.cpp.swp‎
40 KB b/‎yolov3-spp/.yolov3-spp-1cls.cpp.swp‎
40 KB
diff --git a/‎yolov3-spp/CMakeLists.txt‎
Lines changed: 34 additions & 0 deletions b/‎yolov3-spp/CMakeLists.txt‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎yolov3-spp/README.md‎
Lines changed: 36 additions & 0 deletions b/‎yolov3-spp/README.md‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎yolov3-spp/Utils.h‎
Lines changed: 94 additions & 0 deletions b/‎yolov3-spp/Utils.h‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎yolov3-spp/YoloConfigs.h‎
Lines changed: 55 additions & 0 deletions b/‎yolov3-spp/YoloConfigs.h‎
Lines changed: 55 additions & 0 deletions
@@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 2.6)
+
+project(yolov3-spp)
+
+add_definitions(-std=c++11)
+
+option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_BUILD_TYPE Debug)
+
+find_package(CUDA REQUIRED)
+
+set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+include_directories(/usr/local/cuda/targets/aarch64-linux/include)
+link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
+
+#cuda_add_library(leaky ${PROJECT_SOURCE_DIR}/leaky.cu)
+cuda_add_library(yololayer SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu)
+
+find_package(OpenCV)
+include_directories(OpenCV_INCLUDE_DIRS)
+
+add_executable(yolov3-spp ${PROJECT_SOURCE_DIR}/plugin_factory.cpp ${PROJECT_SOURCE_DIR}/yolov3-spp.cpp)
+target_link_libraries(yolov3-spp nvinfer nvinfer_plugin)
+target_link_libraries(yolov3-spp cudart)
+target_link_libraries(yolov3-spp yololayer)
+target_link_libraries(yolov3-spp ${OpenCV_LIBS})
+
+add_definitions(-O2 -pthread)
+
@@ -0,0 +1,36 @@
+# yolov3-spp
+
+For the Pytorch implementation, you can refer to [ultralytics/yolov3](https://github.com/ultralytics/yolov3)
+
+Following tricks are used in this yolov3-spp:
+
+- Yololayer plugin is different from the plugin used in [yolov3](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov3). In this version, I reimplement  the calculation of three yololayer plugins into one to improve speed. And the yololayer detect outputs are limited to maxmium 1000. The first number of output is number of targets in current image.
+- Batchnorm layer, implemented by scale layer.
+
+
+
+## Excute:
+
+```
+// 1. generate yolov3-spp_ultralytics68.wts from pytorch implementation with yolov3-spp.cfg and ultralytics68.pt
+
+// 2. put yolov3-spp_ultralytics68.wts into yolov3-spp
+
+// 3. build and run
+
+cd yolov3-spp
+
+mkdir build
+
+cd build
+
+cmake ..
+
+make
+
+sudo ./yolov3-spp -s ../samples // serialize model to plan file i.e. 'yolov3-spp.engine'
+
+sudo ./yolov3-spp -d  ../samples // deserialize plan file and run inference 
+
+// 4. see if the output is same as pytorch implementation, and see the detect result in build
+```
@@ -0,0 +1,94 @@
+#ifndef __TRT_UTILS_H_
+#define __TRT_UTILS_H_
+
+#include <iostream>
+#include <vector>
+#include <algorithm>
+#include <cudnn.h>
+
+#ifndef CUDA_CHECK
+
+#define CUDA_CHECK(callstr)                                                                    \
+    {                                                                                          \
+        cudaError_t error_code = callstr;                                                      \
+        if (error_code != cudaSuccess) {                                                       \
+            std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
+            assert(0);                                                                         \
+        }                                                                                      \
+    }
+
+#endif
+
+namespace Tn
+{
+    class Profiler : public nvinfer1::IProfiler
+    {
+    public:
+        void printLayerTimes(int itrationsTimes)
+        {
+            float totalTime = 0;
+            for (size_t i = 0; i < mProfile.size(); i++)
+            {
+                printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
+                totalTime += mProfile[i].second;
+            }
+            printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
+        }
+    private:
+        typedef std::pair<std::string, float> Record;
+        std::vector<Record> mProfile;
+
+        virtual void reportLayerTime(const char* layerName, float ms)
+        {
+            auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
+            if (record == mProfile.end())
+                mProfile.push_back(std::make_pair(layerName, ms));
+            else
+                record->second += ms;
+        }
+    };
+
+    //Logger for TensorRT info/warning/errors
+    class Logger : public nvinfer1::ILogger
+    {
+    public:
+
+        Logger(): Logger(Severity::kWARNING) {}
+
+        Logger(Severity severity): reportableSeverity(severity) {}
+
+        void log(Severity severity, const char* msg) override
+        {
+            // suppress messages with severity enum value greater than the reportable
+            if (severity > reportableSeverity) return;
+
+            switch (severity)
+            {
+                case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
+                case Severity::kERROR: std::cerr << "ERROR: "; break;
+                case Severity::kWARNING: std::cerr << "WARNING: "; break;
+                case Severity::kINFO: std::cerr << "INFO: "; break;
+                default: std::cerr << "UNKNOWN: "; break;
+            }
+            std::cerr << msg << std::endl;
+        }
+
+        Severity reportableSeverity{Severity::kWARNING};
+    };
+
+    template<typename T> 
+    void write(char*& buffer, const T& val)
+    {
+        *reinterpret_cast<T*>(buffer) = val;
+        buffer += sizeof(T);
+    }
+
+    template<typename T> 
+    void read(const char*& buffer, T& val)
+    {
+        val = *reinterpret_cast<const T*>(buffer);
+        buffer += sizeof(T);
+    }
+}
+
+#endif
@@ -0,0 +1,55 @@
+#ifndef _YOLO_CONFIGS_H_
+#define _YOLO_CONFIGS_H_
+
+
+namespace Yolo
+{
+    static constexpr int CHECK_COUNT = 3;
+    static constexpr float IGNORE_THRESH = 0.5f;
+    static constexpr int CLASS_NUM = 80;
+    static constexpr int INPUT_H = 256;
+    static constexpr int INPUT_W = 416;
+
+    struct YoloKernel
+    {
+        int width;
+        int height;
+        float anchors[CHECK_COUNT*2];
+    };
+
+    //YOLO 608
+    YoloKernel yolo1 = {
+        13,
+        8,
+        {116,90,  156,198,  373,326}
+    };
+    YoloKernel yolo2 = {
+        26,
+        16,
+        {30,61,  62,45,  59,119}
+    };
+    YoloKernel yolo3 = {
+        52,
+        32,
+        {10,13,  16,30,  33,23}
+    };
+
+    //YOLO 416
+    // YoloKernel yolo1 = {
+    //     13,
+    //     13,
+    //     {116,90,  156,198,  373,326}
+    // };
+    // YoloKernel yolo2 = {
+    //     26,
+    //     26,
+    //     {30,61,  62,45,  59,119}
+    // };
+    // YoloKernel yolo3 = {
+    //     52,
+    //     52,
+    //     {10,13,  16,30,  33,23}
+    // };
+}
+
+#endif