2 years ago · 4af069ab87
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@ At least, please make sure your torch is version 1.x.
 
				 | Optimizer               | SGD                        |
			
 
				 | Multi Scale Train       | True (320 ~ 640)           |
			
 
				 
			
 
				+*Due to my limited computing resources, I can not use a larger multi-scale range, such as 320-960.*
			
 
				 
			
 
				 ## Experiments
			
 
				 ### VOC
			
@@ -105,11 +106,22 @@ python train.py --cuda -d coco --root path/to/COCO -v yolov1 -bs 16 --max_epoch
 
				 | YOLOv3        | DarkNet-53         |  640  |  250  |       |        42.9            |       63.5        |   167.4           |   54.9             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov3_coco.pth) |
			
 
				 | YOLOv4        | CSPDarkNet-53      |  640  |  250  |       |        46.6            |       65.8        |   162.7           |   61.5             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov4_coco.pth) |
			
 
				 | YOLOX-L       | CSPDarkNet-L       |  640  |  300  |       |        46.6            |       66.1        |   155.4           |   54.2             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_coco.pth) |
			
 
				-| YOLOv7-Tiny   | ELANNet-Tiny       |  640  |  300  |       |                        |                   |   22.9            |   8.1              |  |
			
 
				-| YOLOv7-Large  | ELANNet-Large      |  640  |  300  |       |                        |                   |   144.6           |   44.0             |  |
			
 
				+| YOLOv7-T      | ELANNet-Tiny       |  640  |  300  |       |                        |                   |   22.9            |   8.1              |  |
			
 
				+| YOLOv7-L      | ELANNet-Large      |  640  |  300  |       |                        |                   |   144.6           |   44.0             |  |
			
 
				+
			
 
				+* Reproduced YOLOv5:
			
 
				+
			
 
				+| Model         |   Backbone         | Scale | Epoch |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
			
 
				+|---------------|--------------------|-------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
			
 
				+| YOLOv5-N      | CSPDarkNet-N       |  640  |  250  |       |                        |                   |   7.7             |   2.4              |  |
			
 
				+| YOLOv5-S      | CSPDarkNet-S       |  640  |  250  |       |                        |                   |   27.1            |   9.0              |  |
			
 
				+| YOLOv5-M      | CSPDarkNet-M       |  640  |  250  |       |                        |                   |   74.3            |   25.4             |  |
			
 
				+| YOLOv5-L      | CSPDarkNet-L       |  640  |  250  |       |                        |                   |   155.6           |   54.2             |  |
			
 
				 
			
 
				 - *All models are trained with ImageNet pretrained weight (IP). All FLOPs are measured with a 640x640 image size on COCO val2017. The FPS is measured with batch size 1 on 3090 GPU from the model inference to the NMS operation.*
			
 
				 
			
 
				+- *The reproduced YOLOv5's head is the **Decoupled Head**, which is why the FLOPs and Params are higher than the official YOLOv5.*
			
 
				+
			
 
				 - *Due to my limited computing resources, I had to abandon training on other YOLO detectors, including YOLOv7-Huge and YOLOv8-Nano~Large. If you are interested in these models and have trained them using the code from this project, I would greatly appreciate it if you could share the trained weight files with me.*
			
 
				 
			
 
				 ## Train
			
--- a/README_CN.md
+++ b/README_CN.md
@@ -39,6 +39,8 @@ pip install -r requirements.txt
 
				 | 训练优化器              | SGD               |
			
 
				 | 多尺度训练              | True (320 ~ 640)  |
			
 
				 
			
 
				+*受限于我贫瘠的计算资源，我无法使用更大的多尺度范围，如320~960。*
			
 
				+
			
 
				 ## 实验结果
			
 
				 ### VOC
			
 
				 - 下载 VOC.
			
@@ -107,11 +109,22 @@ python train.py --cuda -d coco --root path/to/COCO -v yolov1 -bs 16 --max_epoch
 
				 | YOLOv3        | DarkNet-53         |  640  |  250  |       |        42.9            |       63.5        |   167.4           |   54.9             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov3_coco.pth) |
			
 
				 | YOLOv4        | CSPDarkNet-53      |  640  |  250  |       |        46.6            |       65.8        |   162.7           |   61.5             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov4_coco.pth) |
			
 
				 | YOLOX-L       | CSPDarkNet-L       |  640  |  300  |       |        46.6            |       66.1        |   155.4           |   54.2             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_coco.pth) |
			
 
				-| YOLOv7-Tiny   | ELANNet-Tiny       |  640  |  300  |       |                        |                   |   22.9            |   8.1              |  |
			
 
				-| YOLOv7-Large  | ELANNet-Large      |  640  |  300  |       |                        |                   |   144.6           |   44.0             |  |
			
 
				+| YOLOv7-T      | ELANNet-Tiny       |  640  |  300  |       |                        |                   |   22.9            |   8.1              |  |
			
 
				+| YOLOv7-L      | ELANNet-Large      |  640  |  300  |       |                        |                   |   144.6           |   44.0             |  |
			
 
				+
			
 
				+* 我们复现的YOLOv5:
			
 
				+
			
 
				+| Model         |   Backbone         | Scale | Epoch |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
			
 
				+|---------------|--------------------|-------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
			
 
				+| YOLOv5-N      | CSPDarkNet-N       |  640  |  250  |       |                        |                   |   7.7             |   2.4              |  |
			
 
				+| YOLOv5-S      | CSPDarkNet-S       |  640  |  250  |       |                        |                   |   27.1            |   9.0              |  |
			
 
				+| YOLOv5-M      | CSPDarkNet-M       |  640  |  250  |       |                        |                   |   74.3            |   25.4             |  |
			
 
				+| YOLOv5-L      | CSPDarkNet-L       |  640  |  250  |       |                        |                   |   155.6           |   54.2             |  |
			
 
				 
			
 
				 - *所有的模型都使用了ImageNet预训练权重（IP），所有的FLOPs都是在COCO-val数据集上以640x640或1280x1280的输入尺寸来测试的。FPS指标是在一张3090型号的GPU上以batch size=1的输入来测试的，请注意，测速的内容包括模型前向推理、后处理以及NMS操作。*
			
 
				 
			
 
				+- *我们实现的YOLOv5的检测头是**解耦检测头**，所以FLOPs和参数量要高于官方的.*
			
 
				+
			
 
				 - *受限于我贫瘠的计算资源，更多的YOLO检测器被放弃训练了，包括YOLOv7-Huge、YOLOv8-Small~Large。如果您对他们感兴趣，并使用本项目的代码训练了他们，我很真诚地希望您能分享训练好的权重文件，那将会令我感激不尽。*
			
 
				 
			
 
				 
			
--- a/models/yolov1/build.py
+++ b/models/yolov1/build.py
@@ -28,31 +28,24 @@ def build_yolov1(args, cfg, device, num_classes=80, trainable=False):
 
				         )
			
 
				 
			
 
				     # -------------- Initialize YOLO --------------
			
 
				-    for m in model.modules():
			
 
				-        if isinstance(m, nn.BatchNorm2d):
			
 
				-            m.eps = 1e-3
			
 
				-            m.momentum = 0.03    
			
 
				     # Init bias
			
 
				     init_prob = 0.01
			
 
				     bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
			
 
				     # obj pred
			
 
				-    for obj_pred in model.obj_preds:
			
 
				-        b = obj_pred.bias.view(1, -1)
			
 
				-        b.data.fill_(bias_value.item())
			
 
				-        obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				+    b = model.obj_pred.bias.view(1, -1)
			
 
				+    b.data.fill_(bias_value.item())
			
 
				+    model.obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				     # cls pred
			
 
				-    for cls_pred in model.cls_preds:
			
 
				-        b = cls_pred.bias.view(1, -1)
			
 
				-        b.data.fill_(bias_value.item())
			
 
				-        cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				+    b = model.cls_pred.bias.view(1, -1)
			
 
				+    b.data.fill_(bias_value.item())
			
 
				+    model.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				     # reg pred
			
 
				-    for reg_pred in model.reg_preds:
			
 
				-        b = reg_pred.bias.view(-1, )
			
 
				-        b.data.fill_(1.0)
			
 
				-        reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				-        w = reg_pred.weight
			
 
				-        w.data.fill_(0.)
			
 
				-        reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
			
 
				+    b = model.reg_pred.bias.view(-1, )
			
 
				+    b.data.fill_(1.0)
			
 
				+    model.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				+    w = model.reg_pred.weight
			
 
				+    w.data.fill_(0.)
			
 
				+    model.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
			
 
				 
			
 
				 
			
 
				     # -------------- Build criterion --------------
			
--- a/models/yolov2/build.py
+++ b/models/yolov2/build.py
@@ -28,31 +28,24 @@ def build_yolov2(args, cfg, device, num_classes=80, trainable=False):
 
				         )
			
 
				 
			
 
				     # -------------- Initialize YOLO --------------
			
 
				-    for m in model.modules():
			
 
				-        if isinstance(m, nn.BatchNorm2d):
			
 
				-            m.eps = 1e-3
			
 
				-            m.momentum = 0.03    
			
 
				     # Init bias
			
 
				     init_prob = 0.01
			
 
				     bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
			
 
				     # obj pred
			
 
				-    for obj_pred in model.obj_preds:
			
 
				-        b = obj_pred.bias.view(1, -1)
			
 
				-        b.data.fill_(bias_value.item())
			
 
				-        obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				+    b = model.obj_pred.bias.view(1, -1)
			
 
				+    b.data.fill_(bias_value.item())
			
 
				+    model.obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				     # cls pred
			
 
				-    for cls_pred in model.cls_preds:
			
 
				-        b = cls_pred.bias.view(1, -1)
			
 
				-        b.data.fill_(bias_value.item())
			
 
				-        cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				+    b = model.cls_pred.bias.view(1, -1)
			
 
				+    b.data.fill_(bias_value.item())
			
 
				+    model.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				     # reg pred
			
 
				-    for reg_pred in model.reg_preds:
			
 
				-        b = reg_pred.bias.view(-1, )
			
 
				-        b.data.fill_(1.0)
			
 
				-        reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				-        w = reg_pred.weight
			
 
				-        w.data.fill_(0.)
			
 
				-        reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
			
 
				+    b = model.reg_pred.bias.view(-1, )
			
 
				+    b.data.fill_(1.0)
			
 
				+    model.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
			
 
				+    w = model.reg_pred.weight
			
 
				+    w.data.fill_(0.)
			
 
				+    model.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
			
 
				 
			
 
				 
			
 
				     # -------------- Build criterion --------------
			
--- a/tools/export_onnx.py
+++ b/tools/export_onnx.py
@@ -0,0 +1,138 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding:utf-8 -*-
			
 
				+# Copyright (c) Megvii, Inc. and its affiliates.
			
 
				+# Thanks to YOLOX: https://github.com/Megvii-BaseDetection/YOLOX/blob/main/tools/export_onnx.py
			
 
				+
			
 
				+import argparse
			
 
				+import os
			
 
				+from loguru import logger
			
 
				+import sys
			
 
				+sys.path.append('..')
			
 
				+
			
 
				+import torch
			
 
				+from torch import nn
			
 
				+
			
 
				+from utils.misc import SiLU
			
 
				+from utils.misc import load_weight, replace_module
			
 
				+from config import build_config
			
 
				+from models import build_model
			
 
				+
			
 
				+
			
 
				+def make_parser():
			
 
				+    parser = argparse.ArgumentParser("YOLO ONNXRuntime")
			
 
				+    # basic
			
 
				+    parser.add_argument("--output-name", type=str, default="yolo_free_large.onnx",
			
 
				+                        help="output name of models")
			
 
				+    parser.add_argument('-size', '--img_size', default=640, type=int,
			
 
				+                        help='the max size of input image')
			
 
				+    parser.add_argument("--input", default="images", type=str,
			
 
				+                        help="input node name of onnx model")
			
 
				+    parser.add_argument("--output", default="output", type=str,
			
 
				+                        help="output node name of onnx model")
			
 
				+    parser.add_argument("-o", "--opset", default=11, type=int,
			
 
				+                        help="onnx opset version")
			
 
				+    parser.add_argument("--batch-size", type=int, default=1,
			
 
				+                        help="batch size")
			
 
				+    parser.add_argument("--dynamic", action="store_true", default=False,
			
 
				+                        help="whether the input shape should be dynamic or not")
			
 
				+    parser.add_argument("--no-onnxsim", action="store_true", default=False,
			
 
				+                        help="use onnxsim or not")
			
 
				+    parser.add_argument("-f", "--exp_file", default=None, type=str,
			
 
				+                        help="experiment description file")
			
 
				+    parser.add_argument("-expn", "--experiment-name", type=str, default=None)
			
 
				+    parser.add_argument("opts", default=None, nargs=argparse.REMAINDER,
			
 
				+                        help="Modify config options using the command-line")
			
 
				+    parser.add_argument("--decode_in_inference", action="store_true", default=False,
			
 
				+                        help="decode in inference or not")
			
 
				+    parser.add_argument('--save_dir', default='../weights/onnx/', type=str,
			
 
				+                        help='Dir to save onnx file')
			
 
				+
			
 
				+    # model
			
 
				+    parser.add_argument('-v', '--version', default='yolo_free_large', type=str,
			
 
				+                        help='build yolo')
			
 
				+    parser.add_argument('--weight', default=None,
			
 
				+                        type=str, help='Trained state_dict file path to open')
			
 
				+    parser.add_argument('-ct', '--conf_thresh', default=0.1, type=float,
			
 
				+                        help='confidence threshold')
			
 
				+    parser.add_argument('-nt', '--nms_thresh', default=0.5, type=float,
			
 
				+                        help='NMS threshold')
			
 
				+    parser.add_argument('--topk', default=100, type=int,
			
 
				+                        help='topk candidates for testing')
			
 
				+    parser.add_argument('-nc', '--num_classes', default=80, type=int,
			
 
				+                        help='topk candidates for testing')
			
 
				+    parser.add_argument('--fuse_conv_bn', action='store_true', default=False,
			
 
				+                        help='fuse Conv & BN')
			
 
				+
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+@logger.catch
			
 
				+def main():
			
 
				+    args = make_parser().parse_args()
			
 
				+    logger.info("args value: {}".format(args))
			
 
				+    device = torch.device('cpu')
			
 
				+
			
 
				+    # config
			
 
				+    cfg = build_config(args)
			
 
				+
			
 
				+    # build model
			
 
				+    model = build_model(
			
 
				+        args=args, 
			
 
				+        cfg=cfg,
			
 
				+        device=device, 
			
 
				+        num_classes=args.num_classes,
			
 
				+        trainable=False
			
 
				+        )
			
 
				+
			
 
				+    # replace nn.SiLU with SiLU
			
 
				+    model = replace_module(model, nn.SiLU, SiLU)
			
 
				+
			
 
				+    # load trained weight
			
 
				+    model = load_weight(model, args.weight, args.fuse_conv_bn)
			
 
				+    model = model.to(device).eval()
			
 
				+
			
 
				+    logger.info("loading checkpoint done.")
			
 
				+    dummy_input = torch.randn(args.batch_size, 3, args.img_size, args.img_size)
			
 
				+
			
 
				+    # save onnx file
			
 
				+    save_path = os.path.join(args.save_dir, str(args.opset))
			
 
				+    os.makedirs(save_path, exist_ok=True)
			
 
				+    output_name = os.path.join(save_path, args.output_name)
			
 
				+
			
 
				+    torch.onnx._export(
			
 
				+        model,
			
 
				+        dummy_input,
			
 
				+        output_name,
			
 
				+        input_names=[args.input],
			
 
				+        output_names=[args.output],
			
 
				+        dynamic_axes={args.input: {0: 'batch'},
			
 
				+                      args.output: {0: 'batch'}} if args.dynamic else None,
			
 
				+        opset_version=args.opset,
			
 
				+    )
			
 
				+
			
 
				+    logger.info("generated onnx model named {}".format(output_name))
			
 
				+
			
 
				+    if not args.no_onnxsim:
			
 
				+        import onnx
			
 
				+
			
 
				+        from onnxsim import simplify
			
 
				+
			
 
				+        input_shapes = {args.input: list(dummy_input.shape)} if args.dynamic else None
			
 
				+
			
 
				+        # use onnxsimplify to reduce reduent model.
			
 
				+        onnx_model = onnx.load(output_name)
			
 
				+        model_simp, check = simplify(onnx_model,
			
 
				+                                     dynamic_input_shape=args.dynamic,
			
 
				+                                     input_shapes=input_shapes)
			
 
				+        assert check, "Simplified ONNX model could not be validated"
			
 
				+
			
 
				+        # save onnxsim file
			
 
				+        save_path = os.path.join(save_path, 'onnxsim')
			
 
				+        os.makedirs(save_path, exist_ok=True)
			
 
				+        output_name = os.path.join(save_path, args.output_name)
			
 
				+        onnx.save(model_simp, output_name)
			
 
				+        logger.info("generated simplified onnx model named {}".format(output_name))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/utils/misc.py
+++ b/utils/misc.py
@@ -302,6 +302,14 @@ class ModelEMA(object):
 
				         # Update EMA attributes
			
 
				         self.copy_attr(self.ema, model, include, exclude)
			
 
				 
			
 
				+## SiLU
			
 
				+class SiLU(nn.Module):
			
 
				+    """export-friendly version of nn.SiLU()"""
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def forward(x):
			
 
				+        return x * torch.sigmoid(x)
			
 
				+
			
 
				 
			
 
				 # ---------------------------- NMS ----------------------------
			
 
				 ## basic NMS