yjh0410 2 жил өмнө
parent
commit
7cf531e7da
92 өөрчлөгдсөн 1455 нэмэгдсэн , 35 устгасан
  1. 40 0
      README.md
  2. 41 0
      README_CN.md
  3. 16 5
      config/__init__.py
  4. 59 2
      config/transform_config.py
  5. 1 1
      config/yolov3_config.py
  6. 1 1
      config/yolov4_config.py
  7. 5 5
      config/yolov5_config.py
  8. 3 3
      config/yolov7_config.py
  9. 5 5
      config/yolov8_config.py
  10. 1 1
      eval.py
  11. 0 0
      models/detectors/__init__.py
  12. 0 0
      models/detectors/yolov1/build.py
  13. 0 0
      models/detectors/yolov1/loss.py
  14. 0 0
      models/detectors/yolov1/matcher.py
  15. 0 0
      models/detectors/yolov1/yolov1.py
  16. 0 0
      models/detectors/yolov1/yolov1_backbone.py
  17. 0 0
      models/detectors/yolov1/yolov1_basic.py
  18. 0 0
      models/detectors/yolov1/yolov1_head.py
  19. 0 0
      models/detectors/yolov1/yolov1_neck.py
  20. 0 0
      models/detectors/yolov2/build.py
  21. 0 0
      models/detectors/yolov2/loss.py
  22. 0 0
      models/detectors/yolov2/matcher.py
  23. 0 0
      models/detectors/yolov2/yolov2.py
  24. 0 0
      models/detectors/yolov2/yolov2_backbone.py
  25. 0 0
      models/detectors/yolov2/yolov2_basic.py
  26. 0 0
      models/detectors/yolov2/yolov2_head.py
  27. 0 0
      models/detectors/yolov2/yolov2_neck.py
  28. 0 0
      models/detectors/yolov3/build.py
  29. 0 0
      models/detectors/yolov3/loss.py
  30. 0 0
      models/detectors/yolov3/matcher.py
  31. 0 0
      models/detectors/yolov3/yolov3.py
  32. 0 0
      models/detectors/yolov3/yolov3_backbone.py
  33. 0 0
      models/detectors/yolov3/yolov3_basic.py
  34. 0 0
      models/detectors/yolov3/yolov3_fpn.py
  35. 0 0
      models/detectors/yolov3/yolov3_head.py
  36. 0 0
      models/detectors/yolov3/yolov3_neck.py
  37. 0 0
      models/detectors/yolov4/build.py
  38. 0 0
      models/detectors/yolov4/loss.py
  39. 0 0
      models/detectors/yolov4/matcher.py
  40. 0 0
      models/detectors/yolov4/yolov4.py
  41. 0 0
      models/detectors/yolov4/yolov4_backbone.py
  42. 0 0
      models/detectors/yolov4/yolov4_basic.py
  43. 0 0
      models/detectors/yolov4/yolov4_fpn.py
  44. 0 0
      models/detectors/yolov4/yolov4_head.py
  45. 0 0
      models/detectors/yolov4/yolov4_neck.py
  46. 0 0
      models/detectors/yolov5/build.py
  47. 0 0
      models/detectors/yolov5/loss.py
  48. 0 0
      models/detectors/yolov5/matcher.py
  49. 0 0
      models/detectors/yolov5/yolov5.py
  50. 0 0
      models/detectors/yolov5/yolov5_backbone.py
  51. 0 0
      models/detectors/yolov5/yolov5_basic.py
  52. 0 0
      models/detectors/yolov5/yolov5_head.py
  53. 0 0
      models/detectors/yolov5/yolov5_neck.py
  54. 0 0
      models/detectors/yolov5/yolov5_pafpn.py
  55. 0 0
      models/detectors/yolov7/build.py
  56. 0 0
      models/detectors/yolov7/loss.py
  57. 0 0
      models/detectors/yolov7/matcher.py
  58. 0 0
      models/detectors/yolov7/yolov7.py
  59. 0 0
      models/detectors/yolov7/yolov7_backbone.py
  60. 0 0
      models/detectors/yolov7/yolov7_basic.py
  61. 0 0
      models/detectors/yolov7/yolov7_fpn.py
  62. 0 0
      models/detectors/yolov7/yolov7_head.py
  63. 0 0
      models/detectors/yolov7/yolov7_neck.py
  64. 0 0
      models/detectors/yolov8/build.py
  65. 0 0
      models/detectors/yolov8/loss.py
  66. 0 0
      models/detectors/yolov8/matcher.py
  67. 0 0
      models/detectors/yolov8/yolov8.py
  68. 0 0
      models/detectors/yolov8/yolov8_backbone.py
  69. 0 0
      models/detectors/yolov8/yolov8_basic.py
  70. 0 0
      models/detectors/yolov8/yolov8_head.py
  71. 0 0
      models/detectors/yolov8/yolov8_neck.py
  72. 0 0
      models/detectors/yolov8/yolov8_pafpn.py
  73. 0 0
      models/detectors/yolox/build.py
  74. 0 0
      models/detectors/yolox/loss.py
  75. 0 0
      models/detectors/yolox/matcher.py
  76. 0 0
      models/detectors/yolox/yolox.py
  77. 0 0
      models/detectors/yolox/yolox_backbone.py
  78. 0 0
      models/detectors/yolox/yolox_basic.py
  79. 0 0
      models/detectors/yolox/yolox_fpn.py
  80. 0 0
      models/detectors/yolox/yolox_head.py
  81. 0 0
      models/detectors/yolox/yolox_neck.py
  82. 9 0
      models/tracker/__init__.py
  83. 52 0
      models/tracker/byte_tracker/basetrack.py
  84. 14 0
      models/tracker/byte_tracker/build.py
  85. 332 0
      models/tracker/byte_tracker/byte_tracker.py
  86. 278 0
      models/tracker/byte_tracker/kalman_filter.py
  87. 194 0
      models/tracker/byte_tracker/matching.py
  88. 1 1
      test.py
  89. 1 1
      tools/export_onnx.py
  90. 356 0
      track.py
  91. 1 1
      train.py
  92. 45 9
      utils/vis_tools.py

+ 40 - 0
README.md

@@ -230,3 +230,43 @@ python demo.py --mode camera \
                --cuda \
                --weight path/to/weight
 ```
+
+## Tracking
+Our project also supports **multi-object tracking** tasks. We use the YOLO of this project as the detector, following the "tracking-by-detection" framework, and use the simple and efficient **ByteTrack** as the tracker.
+
+* images tracking
+```Shell
+python track.py --mode image \
+                --path_to_img path/to/images/ \
+                -dt yolov2 \
+                -tk byte_tracker \
+                --weight path/to/coco_pretrained/ \
+                -size 640 \
+                --cuda \
+                --show
+```
+
+* video tracking
+
+```Shell
+python track.py --mode video \
+                --path_to_img path/to/video/ \
+                -dt yolov2 \
+                -tk byte_tracker \
+                --weight path/to/coco_pretrained/ \
+                -size 640 \
+                --cuda \
+                --show
+```
+
+* camera tracking
+
+```Shell
+python track.py --mode camera \
+                -dt yolov2 \
+                -tk byte_tracker \
+                --weight path/to/coco_pretrained/ \
+                -size 640 \
+                --cuda \
+                --show
+```

+ 41 - 0
README_CN.md

@@ -243,3 +243,44 @@ python demo.py --mode camera \
                --cuda \
                --weight path/to/weight
 ```
+
+
+## 目标跟踪
+该项目也支持**多目标跟踪**任务。我们使用本项目的YOLO检测器作为“tracking-by-detection”的检测器,并使用简单高效的**ByteTrack**作为跟踪器。
+
+* images tracking
+```Shell
+python track.py --mode image \
+                --path_to_img path/to/images/ \
+                -dt yolov2 \
+                -tk byte_tracker \
+                --weight path/to/coco_pretrained/ \
+                -size 640 \
+                --cuda \
+                --show
+```
+
+* video tracking
+
+```Shell
+python track.py --mode video \
+                --path_to_img path/to/video/ \
+                -dt yolov2 \
+                -tk byte_tracker \
+                --weight path/to/coco_pretrained/ \
+                -size 640 \
+                --cuda \
+                --show
+```
+
+* camera tracking
+
+```Shell
+python track.py --mode camera \
+                -dt yolov2 \
+                -tk byte_tracker \
+                --weight path/to/coco_pretrained/ \
+                -size 640 \
+                --cuda \
+                --show
+```

+ 16 - 5
config/__init__.py

@@ -42,7 +42,12 @@ def build_model_config(args):
 
 # ------------------ Transform Config ----------------------
 from .transform_config import (
-    yolov5_strong_trans_config, yolov5_weak_trans_config, yolov5_nano_trans_config,
+    yolov5_nano_trans_config,
+    yolov5_tiny_trans_config,
+    yolov5_small_trans_config,
+    yolov5_medium_trans_config,
+    yolov5_large_trans_config,
+    yolov5_huge_trans_config,
     ssd_trans_config
 )
 
@@ -54,11 +59,17 @@ def build_trans_config(trans_config='ssd'):
         cfg = ssd_trans_config
 
     # YOLOv5-style transform 
-    elif trans_config == 'yolov5_strong':
-        cfg = yolov5_strong_trans_config
-    elif trans_config == 'yolov5_weak':
-        cfg = yolov5_weak_trans_config
     elif trans_config == 'yolov5_nano':
         cfg = yolov5_nano_trans_config
+    elif trans_config == 'yolov5_tiny':
+        cfg = yolov5_tiny_trans_config
+    elif trans_config == 'yolov5_small':
+        cfg = yolov5_small_trans_config
+    elif trans_config == 'yolov5_medium':
+        cfg = yolov5_medium_trans_config
+    elif trans_config == 'yolov5_large':
+        cfg = yolov5_large_trans_config
+    elif trans_config == 'yolov5_huge':
+        cfg = yolov5_huge_trans_config
         
     return cfg

+ 59 - 2
config/transform_config.py

@@ -1,7 +1,26 @@
 # transform config
 
 # ----------------------- YOLOv5-Style -----------------------
-yolov5_strong_trans_config = {
+yolov5_huge_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.2,
+    'scale': 0.9,
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 1.0,
+    'mixup_prob': 0.2,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup',
+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
+}
+
+yolov5_large_trans_config = {
     'aug_type': 'yolov5',
     # Basic Augment
     'degrees': 0.0,
@@ -20,7 +39,45 @@ yolov5_strong_trans_config = {
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
 }
 
-yolov5_weak_trans_config = {
+yolov5_medium_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.2,
+    'scale': 0.9,
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 1.0,
+    'mixup_prob': 0.10,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup',
+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
+}
+
+yolov5_small_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.2,
+    'scale': 0.9,
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 1.0,
+    'mixup_prob': 0.05,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup',
+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
+}
+
+yolov5_tiny_trans_config = {
     'aug_type': 'yolov5',
     # Basic Augment
     'degrees': 0.0,

+ 1 - 1
config/yolov3_config.py

@@ -2,7 +2,7 @@
 
 yolov3_cfg = {
     # input
-    'trans_type': 'yolov5_strong',
+    'trans_type': 'yolov5_large',
     'multi_scale': [0.5, 1.0],
     # model
     'backbone': 'darknet53',

+ 1 - 1
config/yolov4_config.py

@@ -2,7 +2,7 @@
 
 yolov4_cfg = {
     # input
-    'trans_type': 'yolov5_strong',
+    'trans_type': 'yolov5_large',
     'multi_scale': [0.5, 1.0],
     # model
     'backbone': 'cspdarknet53',

+ 5 - 5
config/yolov5_config.py

@@ -33,7 +33,7 @@ yolov5_cfg = {
         # ---------------- Train config ----------------
         ## input
         'multi_scale': [0.5, 1.0],   # 320 -> 640
-        'trans_type': 'yolov5_weak',
+        'trans_type': 'yolov5_tiny',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,
@@ -93,7 +93,7 @@ yolov5_cfg = {
         # ---------------- Train config ----------------
         ## input
         'multi_scale': [0.5, 1.0],   # 320 -> 640
-        'trans_type': 'yolov5_weak',
+        'trans_type': 'yolov5_small',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,
@@ -153,7 +153,7 @@ yolov5_cfg = {
         # ---------------- Train config ----------------
         ## input
         'multi_scale': [0.5, 1.0],   # 320 -> 640
-        'trans_type': 'yolov5_strong',
+        'trans_type': 'yolov5_medium',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,
@@ -213,7 +213,7 @@ yolov5_cfg = {
         # ---------------- Train config ----------------
         ## input
         'multi_scale': [0.5, 1.0],   # 320 -> 640
-        'trans_type': 'yolov5_strong',
+        'trans_type': 'yolov5_large',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,
@@ -273,7 +273,7 @@ yolov5_cfg = {
         # ---------------- Train config ----------------
         ## input
         'multi_scale': [0.5, 1.0],   # 320 -> 640
-        'trans_type': 'yolov5_strong',
+        'trans_type': 'yolov5_huge',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,

+ 3 - 3
config/yolov7_config.py

@@ -3,7 +3,7 @@
 yolov7_cfg = {
     'yolov7_t':{
         # input
-        'trans_type': 'yolov5_weak',
+        'trans_type': 'yolov5_tiny',
         'multi_scale': [0.5, 1.5], # 320 -> 640
         # model
         'backbone': 'elannet_tiny',
@@ -61,7 +61,7 @@ yolov7_cfg = {
 
     'yolov7_l':{
         # input
-        'trans_type': 'yolov5_strong',
+        'trans_type': 'yolov5_large',
         'multi_scale': [0.5, 1.25], # 320 -> 640
         # model
         'backbone': 'elannet_large',
@@ -119,7 +119,7 @@ yolov7_cfg = {
 
     'yolov7_x':{
         # input
-        'trans_type': 'yolov5_strong',
+        'trans_type': 'yolov5_huge',
         'multi_scale': [0.5, 1.25], # 320 -> 640
         # model
         'backbone': 'elannet_huge',

+ 5 - 5
config/yolov8_config.py

@@ -3,7 +3,7 @@
 yolov8_cfg = {
     'yolov8_n':{
         # input
-        'trans_type': 'yolov5_weak',
+        'trans_type': 'yolov5_tiny',
         'multi_scale': [0.5, 1.5],   # 320 -> 960
         # model
         'backbone': 'elan_cspnet',
@@ -64,7 +64,7 @@ yolov8_cfg = {
 
     'yolov8_s':{
         # input
-        'trans_type': 'yolov5_strong',
+        'trans_type': 'yolov5_small',
         'multi_scale': [0.5, 1.5],   # 320 -> 960
         # model
         'backbone': 'elan_cspnet',
@@ -125,7 +125,7 @@ yolov8_cfg = {
 
     'yolov8_m':{
         # input
-        'trans_type': 'yolov5_strong',
+        'trans_type': 'yolov5_medium',
         'multi_scale': [0.5, 1.5],   # 320 -> 960
         # model
         'backbone': 'elan_cspnet',
@@ -186,7 +186,7 @@ yolov8_cfg = {
 
     'yolov8_l':{
         # input
-        'trans_type': 'yolov5_strong',
+        'trans_type': 'yolov5_large',
         'multi_scale': [0.5, 1.5],   # 320 -> 960
         # model
         'backbone': 'elan_cspnet',
@@ -247,7 +247,7 @@ yolov8_cfg = {
 
     'yolov8_x':{
         # input
-        'trans_type': 'yolov5_strong',
+        'trans_type': 'yolov5_huge',
         'multi_scale': [0.5, 1.5],   # 320 -> 960
         # model
         'backbone': 'elan_cspnet',

+ 1 - 1
eval.py

@@ -16,7 +16,7 @@ from dataset.data_augment import build_transform
 from utils.misc import load_weight
 from utils.misc import compute_flops
 
-from models import build_model
+from models.detectors import build_model
 from config import build_model_config, build_trans_config
 
 

+ 0 - 0
models/__init__.py → models/detectors/__init__.py


+ 0 - 0
models/yolov1/build.py → models/detectors/yolov1/build.py


+ 0 - 0
models/yolov1/loss.py → models/detectors/yolov1/loss.py


+ 0 - 0
models/yolov1/matcher.py → models/detectors/yolov1/matcher.py


+ 0 - 0
models/yolov1/yolov1.py → models/detectors/yolov1/yolov1.py


+ 0 - 0
models/yolov1/yolov1_backbone.py → models/detectors/yolov1/yolov1_backbone.py


+ 0 - 0
models/yolov1/yolov1_basic.py → models/detectors/yolov1/yolov1_basic.py


+ 0 - 0
models/yolov1/yolov1_head.py → models/detectors/yolov1/yolov1_head.py


+ 0 - 0
models/yolov1/yolov1_neck.py → models/detectors/yolov1/yolov1_neck.py


+ 0 - 0
models/yolov2/build.py → models/detectors/yolov2/build.py


+ 0 - 0
models/yolov2/loss.py → models/detectors/yolov2/loss.py


+ 0 - 0
models/yolov2/matcher.py → models/detectors/yolov2/matcher.py


+ 0 - 0
models/yolov2/yolov2.py → models/detectors/yolov2/yolov2.py


+ 0 - 0
models/yolov2/yolov2_backbone.py → models/detectors/yolov2/yolov2_backbone.py


+ 0 - 0
models/yolov2/yolov2_basic.py → models/detectors/yolov2/yolov2_basic.py


+ 0 - 0
models/yolov2/yolov2_head.py → models/detectors/yolov2/yolov2_head.py


+ 0 - 0
models/yolov2/yolov2_neck.py → models/detectors/yolov2/yolov2_neck.py


+ 0 - 0
models/yolov3/build.py → models/detectors/yolov3/build.py


+ 0 - 0
models/yolov3/loss.py → models/detectors/yolov3/loss.py


+ 0 - 0
models/yolov3/matcher.py → models/detectors/yolov3/matcher.py


+ 0 - 0
models/yolov3/yolov3.py → models/detectors/yolov3/yolov3.py


+ 0 - 0
models/yolov3/yolov3_backbone.py → models/detectors/yolov3/yolov3_backbone.py


+ 0 - 0
models/yolov3/yolov3_basic.py → models/detectors/yolov3/yolov3_basic.py


+ 0 - 0
models/yolov3/yolov3_fpn.py → models/detectors/yolov3/yolov3_fpn.py


+ 0 - 0
models/yolov3/yolov3_head.py → models/detectors/yolov3/yolov3_head.py


+ 0 - 0
models/yolov3/yolov3_neck.py → models/detectors/yolov3/yolov3_neck.py


+ 0 - 0
models/yolov4/build.py → models/detectors/yolov4/build.py


+ 0 - 0
models/yolov4/loss.py → models/detectors/yolov4/loss.py


+ 0 - 0
models/yolov4/matcher.py → models/detectors/yolov4/matcher.py


+ 0 - 0
models/yolov4/yolov4.py → models/detectors/yolov4/yolov4.py


+ 0 - 0
models/yolov4/yolov4_backbone.py → models/detectors/yolov4/yolov4_backbone.py


+ 0 - 0
models/yolov4/yolov4_basic.py → models/detectors/yolov4/yolov4_basic.py


+ 0 - 0
models/yolov4/yolov4_fpn.py → models/detectors/yolov4/yolov4_fpn.py


+ 0 - 0
models/yolov4/yolov4_head.py → models/detectors/yolov4/yolov4_head.py


+ 0 - 0
models/yolov4/yolov4_neck.py → models/detectors/yolov4/yolov4_neck.py


+ 0 - 0
models/yolov5/build.py → models/detectors/yolov5/build.py


+ 0 - 0
models/yolov5/loss.py → models/detectors/yolov5/loss.py


+ 0 - 0
models/yolov5/matcher.py → models/detectors/yolov5/matcher.py


+ 0 - 0
models/yolov5/yolov5.py → models/detectors/yolov5/yolov5.py


+ 0 - 0
models/yolov5/yolov5_backbone.py → models/detectors/yolov5/yolov5_backbone.py


+ 0 - 0
models/yolov5/yolov5_basic.py → models/detectors/yolov5/yolov5_basic.py


+ 0 - 0
models/yolov5/yolov5_head.py → models/detectors/yolov5/yolov5_head.py


+ 0 - 0
models/yolov5/yolov5_neck.py → models/detectors/yolov5/yolov5_neck.py


+ 0 - 0
models/yolov5/yolov5_pafpn.py → models/detectors/yolov5/yolov5_pafpn.py


+ 0 - 0
models/yolov7/build.py → models/detectors/yolov7/build.py


+ 0 - 0
models/yolov7/loss.py → models/detectors/yolov7/loss.py


+ 0 - 0
models/yolov7/matcher.py → models/detectors/yolov7/matcher.py


+ 0 - 0
models/yolov7/yolov7.py → models/detectors/yolov7/yolov7.py


+ 0 - 0
models/yolov7/yolov7_backbone.py → models/detectors/yolov7/yolov7_backbone.py


+ 0 - 0
models/yolov7/yolov7_basic.py → models/detectors/yolov7/yolov7_basic.py


+ 0 - 0
models/yolov7/yolov7_fpn.py → models/detectors/yolov7/yolov7_fpn.py


+ 0 - 0
models/yolov7/yolov7_head.py → models/detectors/yolov7/yolov7_head.py


+ 0 - 0
models/yolov7/yolov7_neck.py → models/detectors/yolov7/yolov7_neck.py


+ 0 - 0
models/yolov8/build.py → models/detectors/yolov8/build.py


+ 0 - 0
models/yolov8/loss.py → models/detectors/yolov8/loss.py


+ 0 - 0
models/yolov8/matcher.py → models/detectors/yolov8/matcher.py


+ 0 - 0
models/yolov8/yolov8.py → models/detectors/yolov8/yolov8.py


+ 0 - 0
models/yolov8/yolov8_backbone.py → models/detectors/yolov8/yolov8_backbone.py


+ 0 - 0
models/yolov8/yolov8_basic.py → models/detectors/yolov8/yolov8_basic.py


+ 0 - 0
models/yolov8/yolov8_head.py → models/detectors/yolov8/yolov8_head.py


+ 0 - 0
models/yolov8/yolov8_neck.py → models/detectors/yolov8/yolov8_neck.py


+ 0 - 0
models/yolov8/yolov8_pafpn.py → models/detectors/yolov8/yolov8_pafpn.py


+ 0 - 0
models/yolox/build.py → models/detectors/yolox/build.py


+ 0 - 0
models/yolox/loss.py → models/detectors/yolox/loss.py


+ 0 - 0
models/yolox/matcher.py → models/detectors/yolox/matcher.py


+ 0 - 0
models/yolox/yolox.py → models/detectors/yolox/yolox.py


+ 0 - 0
models/yolox/yolox_backbone.py → models/detectors/yolox/yolox_backbone.py


+ 0 - 0
models/yolox/yolox_basic.py → models/detectors/yolox/yolox_basic.py


+ 0 - 0
models/yolox/yolox_fpn.py → models/detectors/yolox/yolox_fpn.py


+ 0 - 0
models/yolox/yolox_head.py → models/detectors/yolox/yolox_head.py


+ 0 - 0
models/yolox/yolox_neck.py → models/detectors/yolox/yolox_neck.py


+ 9 - 0
models/tracker/__init__.py

@@ -0,0 +1,9 @@
+from .byte_tracker.build import build_byte_tracker
+
+
+
+def build_tracker(args):
+    if args.tracker == 'byte_tracker':
+        return build_byte_tracker(args)
+    else:
+        raise NotImplementedError

+ 52 - 0
models/tracker/byte_tracker/basetrack.py

@@ -0,0 +1,52 @@
+import numpy as np
+from collections import OrderedDict
+
+
+class TrackState(object):
+    New = 0
+    Tracked = 1
+    Lost = 2
+    Removed = 3
+
+
+class BaseTrack(object):
+    _count = 0
+
+    track_id = 0
+    is_activated = False
+    state = TrackState.New
+
+    history = OrderedDict()
+    features = []
+    curr_feature = None
+    score = 0
+    start_frame = 0
+    frame_id = 0
+    time_since_update = 0
+
+    # multi-camera
+    location = (np.inf, np.inf)
+
+    @property
+    def end_frame(self):
+        return self.frame_id
+
+    @staticmethod
+    def next_id():
+        BaseTrack._count += 1
+        return BaseTrack._count
+
+    def activate(self, *args):
+        raise NotImplementedError
+
+    def predict(self):
+        raise NotImplementedError
+
+    def update(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def mark_lost(self):
+        self.state = TrackState.Lost
+
+    def mark_removed(self):
+        self.state = TrackState.Removed

+ 14 - 0
models/tracker/byte_tracker/build.py

@@ -0,0 +1,14 @@
+from .byte_tracker import ByteTracker
+
+
+def build_byte_tracker(args):
+    tracker = ByteTracker(
+        track_thresh=args.track_thresh,
+        track_buffer=args.track_buffer,
+        frame_rate=args.fps,
+        match_thresh=args.match_thresh,
+        mot20=args.mot20
+    )
+
+    return tracker
+    

+ 332 - 0
models/tracker/byte_tracker/byte_tracker.py

@@ -0,0 +1,332 @@
+import numpy as np
+import os
+import os.path as osp
+
+from .kalman_filter import KalmanFilter
+from .matching import iou_distance, fuse_score, linear_assignment
+from .basetrack import BaseTrack, TrackState
+
+
+class STrack(BaseTrack):
+    shared_kalman = KalmanFilter()
+    def __init__(self, xywh, score):
+
+        # wait activate
+        self._xywh = np.asarray(xywh, dtype=np.float)
+        self.kalman_filter = None
+        self.mean, self.covariance = None, None
+        self.is_activated = False
+
+        self.score = score
+        self.tracklet_len = 0
+
+    def predict(self):
+        mean_state = self.mean.copy()
+        if self.state != TrackState.Tracked:
+            mean_state[7] = 0
+        self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
+
+    @staticmethod
+    def multi_predict(stracks):
+        if len(stracks) > 0:
+            multi_mean = np.asarray([st.mean.copy() for st in stracks])
+            multi_covariance = np.asarray([st.covariance for st in stracks])
+            for i, st in enumerate(stracks):
+                if st.state != TrackState.Tracked:
+                    multi_mean[i][7] = 0
+            multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
+            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
+                stracks[i].mean = mean
+                stracks[i].covariance = cov
+
+    def activate(self, kalman_filter, frame_id):
+        """Start a new tracklet"""
+        self.kalman_filter = kalman_filter
+        self.track_id = self.next_id()
+        self.mean, self.covariance = self.kalman_filter.initiate(self.xywh_to_cxcyah(self._xywh))
+
+        self.tracklet_len = 0
+        self.state = TrackState.Tracked
+        if frame_id == 1:
+            self.is_activated = True
+        # self.is_activated = True
+        self.frame_id = frame_id
+        self.start_frame = frame_id
+
+    def re_activate(self, new_track, frame_id, new_id=False):
+        self.mean, self.covariance = self.kalman_filter.update(
+            self.mean, self.covariance, self.xywh_to_cxcyah(new_track.xywh)
+        )
+        self.tracklet_len = 0
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.frame_id = frame_id
+        if new_id:
+            self.track_id = self.next_id()
+        self.score = new_track.score
+
+    def update(self, new_track, frame_id):
+        """
+        Update a matched track
+        :type new_track: STrack
+        :type frame_id: int
+        :type update_feature: bool
+        :return:
+        """
+        self.frame_id = frame_id
+        self.tracklet_len += 1
+
+        new_xywh = new_track.xywh
+        self.mean, self.covariance = self.kalman_filter.update(
+            self.mean, self.covariance, self.xywh_to_cxcyah(new_xywh))
+        self.state = TrackState.Tracked
+        self.is_activated = True
+
+        self.score = new_track.score
+
+    @property
+    # @jit(nopython=True)
+    def xywh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+                width, height)`.
+        """
+        if self.mean is None:
+            return self._xywh.copy()
+        ret = self.mean[:4].copy()
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+
+    @property
+    # @jit(nopython=True)
+    def xyxy(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.xywh.copy()
+        ret[2:] += ret[:2]
+        return ret
+
+
+    @staticmethod
+    # @jit(nopython=True)
+    def xywh_to_cxcyah(xywh):
+        """[x1, y1, w, h] -> [cx, cy, aspect ratio, h], 
+        where the aspect ratio is `width / height`.
+        """
+        ret = np.asarray(xywh).copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
+
+
+    @staticmethod
+    # @jit(nopython=True)
+    def xyxy_to_xywh(xyxy):
+        """ [x1, y1, x2, y2] -> [x1, y1, w, h]"""
+        ret = np.asarray(xyxy).copy()
+        ret[2:] -= ret[:2]
+        return ret
+
+
+    @staticmethod
+    # @jit(nopython=True)
+    def xywh_to_xyxy(xywh):
+        ret = np.asarray(xywh).copy()
+        ret[2:] += ret[:2]
+        return ret
+
+
+    def to_cxcyah(self):
+        return self.xywh_to_cxcyah(self.xywh)
+
+
+    def __repr__(self):
+        return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
+
+
+class ByteTracker(object):
+    def __init__(self, track_thresh=0.6, track_buffer=30, frame_rate=30, match_thresh=0.9, mot20=False):
+        self.tracked_stracks = []  # type: list[STrack]
+        self.lost_stracks = []  # type: list[STrack]
+        self.removed_stracks = []  # type: list[STrack]
+
+        self.frame_id = 0
+        self.track_thresh = track_thresh
+        self.track_buffer = track_buffer
+        self.det_thresh = track_thresh + 0.1
+        self.match_thresh = match_thresh
+
+        self.buffer_size = int(frame_rate / 30.0 * track_buffer)
+        self.max_time_lost = self.buffer_size
+        self.kalman_filter = KalmanFilter()
+
+        self.mot20 = mot20
+
+
+    def update(self, scores, bboxes, labels):
+        self.frame_id += 1
+        activated_starcks = []
+        refind_stracks = []
+        lost_stracks = []
+        removed_stracks = []
+
+        # process outputs
+        remain_inds = scores > self.track_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.track_thresh
+        inds_second = np.logical_and(inds_low, inds_high)
+
+        # high score detections
+        dets = bboxes[remain_inds]
+        scores_keep = scores[remain_inds]
+
+        # second detections
+        dets_second = bboxes[inds_second]
+        scores_second = scores[inds_second]
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [STrack(STrack.xyxy_to_xywh(xyxy), s) for
+                          (xyxy, s) in zip(dets, scores_keep)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_stracks'''
+        unconfirmed = []
+        tracked_stracks = []  # type: list[STrack]
+        for track in self.tracked_stracks:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_stracks.append(track)
+
+        ''' Step 2: First association, with high score detection boxes'''
+        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
+        # Predict the current location with KF
+        STrack.multi_predict(strack_pool)
+        dists = iou_distance(strack_pool, detections)
+        if not self.mot20:
+            dists = fuse_score(dists, detections)
+        matches, u_track, u_detection = linear_assignment(dists, thresh=self.match_thresh)
+
+        for itracked, idet in matches:
+            track = strack_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_starcks.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_stracks.append(track)
+
+        ''' Step 3: Second association, with low score detection boxes'''
+        # association the untrack to the low score detections
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [STrack(STrack.xyxy_to_xywh(xyxy), s) for
+                          (xyxy, s) in zip(dets_second, scores_second)]
+        else:
+            detections_second = []
+        r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
+        dists = iou_distance(r_tracked_stracks, detections_second)
+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_stracks[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_starcks.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_stracks.append(track)
+
+        for it in u_track:
+            track = r_tracked_stracks[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_stracks.append(track)
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+        if not self.mot20:
+            dists = fuse_score(dists, detections)
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_starcks.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_stracks.append(track)
+
+        """ Step 4: Init new stracks"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.kalman_filter, self.frame_id)
+            activated_starcks.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_stracks:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_stracks.append(track)
+
+        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
+        self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
+        self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
+        self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
+        self.lost_stracks.extend(lost_stracks)
+        self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
+        self.removed_stracks.extend(removed_stracks)
+        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
+            self.tracked_stracks, self.lost_stracks)
+        # get scores of lost tracks
+        output_stracks = [track for track in self.tracked_stracks if track.is_activated]
+
+        return output_stracks
+
+
+def joint_stracks(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_stracks(tlista, tlistb):
+    stracks = {}
+    for t in tlista:
+        stracks[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if stracks.get(tid, 0):
+            del stracks[tid]
+    return list(stracks.values())
+
+
+def remove_duplicate_stracks(stracksa, stracksb):
+    pdist = iou_distance(stracksa, stracksb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = stracksa[p].frame_id - stracksa[p].start_frame
+        timeq = stracksb[q].frame_id - stracksb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(stracksa) if not i in dupa]
+    resb = [t for i, t in enumerate(stracksb) if not i in dupb]
+    return resa, resb

+ 278 - 0
models/tracker/byte_tracker/kalman_filter.py

@@ -0,0 +1,278 @@
+# vim: expandtab:ts=4:sw=4
+import numpy as np
+import scipy.linalg
+
+
+"""
+Table for the 0.95 quantile of the chi-square distribution with N degrees of
+freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
+function and used as Mahalanobis gating threshold.
+"""
+chi2inv95 = {
+    1: 3.8415,
+    2: 5.9915,
+    3: 7.8147,
+    4: 9.4877,
+    5: 11.070,
+    6: 12.592,
+    7: 14.067,
+    8: 15.507,
+    9: 16.919}
+
+
+class KalmanFilter(object):
+    """
+    A simple Kalman filter for tracking bounding boxes in image space.
+
+    The 8-dimensional state space
+
+        x, y, a, h, vx, vy, va, vh
+
+    contains the bounding box center position (x, y), aspect ratio a, height h,
+    and their respective velocities.
+
+    Object motion follows a constant velocity model. The bounding box location
+    (x, y, a, h) is taken as direct observation of the state space (linear
+    observation model).
+
+    """
+
+    def __init__(self):
+        ndim, dt = 4, 1.
+
+        # Create Kalman filter model matrices.
+        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
+        for i in range(ndim):
+            self._motion_mat[i, ndim + i] = dt
+        self._update_mat = np.eye(ndim, 2 * ndim)
+
+        # Motion and observation uncertainty are chosen relative to the current
+        # state estimate. These weights control the amount of uncertainty in
+        # the model. This is a bit hacky.
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+
+
+    def initiate(self, measurement):
+        """Create track from unassociated measurement.
+
+        Parameters
+        ----------
+        measurement : ndarray
+            Bounding box coordinates (x, y, a, h) with center position (x, y),
+            aspect ratio a, and height h.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector (8 dimensional) and covariance matrix (8x8
+            dimensional) of the new track. Unobserved velocities are initialized
+            to 0 mean.
+
+        """
+        mean_pos = measurement
+        mean_vel = np.zeros_like(mean_pos)
+        mean = np.r_[mean_pos, mean_vel]
+
+        std = [
+            2 * self._std_weight_position * measurement[3],
+            2 * self._std_weight_position * measurement[3],
+            1e-2,
+            2 * self._std_weight_position * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            1e-5,
+            10 * self._std_weight_velocity * measurement[3]]
+        covariance = np.diag(np.square(std))
+
+        return mean, covariance
+
+
+    def predict(self, mean, covariance):
+        """Run Kalman filter prediction step.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The 8 dimensional mean vector of the object state at the previous
+            time step.
+        covariance : ndarray
+            The 8x8 dimensional covariance matrix of the object state at the
+            previous time step.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector and covariance matrix of the predicted
+            state. Unobserved velocities are initialized to 0 mean.
+
+        """
+        std_pos = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-2,
+            self._std_weight_position * mean[3]]
+        std_vel = [
+            self._std_weight_velocity * mean[3],
+            self._std_weight_velocity * mean[3],
+            1e-5,
+            self._std_weight_velocity * mean[3]]
+        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        #mean = np.dot(self._motion_mat, mean)
+        mean = np.dot(mean, self._motion_mat.T)
+        covariance = np.linalg.multi_dot((
+            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
+
+        return mean, covariance
+
+
+    def project(self, mean, covariance):
+        """Project state distribution to measurement space.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The state's mean vector (8 dimensional array).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the projected mean and covariance matrix of the given state
+            estimate.
+
+        """
+        std = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-1,
+            self._std_weight_position * mean[3]]
+        innovation_cov = np.diag(np.square(std))
+
+        mean = np.dot(self._update_mat, mean)
+        covariance = np.linalg.multi_dot((
+            self._update_mat, covariance, self._update_mat.T))
+        return mean, covariance + innovation_cov
+
+
+    def multi_predict(self, mean, covariance):
+        """Run Kalman filter prediction step (Vectorized version).
+        Parameters
+        ----------
+        mean : ndarray
+            The Nx8 dimensional mean matrix of the object states at the previous
+            time step.
+        covariance : ndarray
+            The Nx8x8 dimensional covariance matrics of the object states at the
+            previous time step.
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector and covariance matrix of the predicted
+            state. Unobserved velocities are initialized to 0 mean.
+        """
+        std_pos = [
+            self._std_weight_position * mean[:, 3],
+            self._std_weight_position * mean[:, 3],
+            1e-2 * np.ones_like(mean[:, 3]),
+            self._std_weight_position * mean[:, 3]]
+        std_vel = [
+            self._std_weight_velocity * mean[:, 3],
+            self._std_weight_velocity * mean[:, 3],
+            1e-5 * np.ones_like(mean[:, 3]),
+            self._std_weight_velocity * mean[:, 3]]
+        sqr = np.square(np.r_[std_pos, std_vel]).T
+
+        motion_cov = []
+        for i in range(len(mean)):
+            motion_cov.append(np.diag(sqr[i]))
+        motion_cov = np.asarray(motion_cov)
+
+        mean = np.dot(mean, self._motion_mat.T)
+        left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
+        covariance = np.dot(left, self._motion_mat.T) + motion_cov
+
+        return mean, covariance
+
+
+    def update(self, mean, covariance, measurement):
+        """Run Kalman filter correction step.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The predicted state's mean vector (8 dimensional).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+        measurement : ndarray
+            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
+            is the center position, a the aspect ratio, and h the height of the
+            bounding box.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the measurement-corrected state distribution.
+
+        """
+        projected_mean, projected_cov = self.project(mean, covariance)
+
+        chol_factor, lower = scipy.linalg.cho_factor(
+            projected_cov, lower=True, check_finite=False)
+        kalman_gain = scipy.linalg.cho_solve(
+            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
+            check_finite=False).T
+        innovation = measurement - projected_mean
+
+        new_mean = mean + np.dot(innovation, kalman_gain.T)
+        new_covariance = covariance - np.linalg.multi_dot((
+            kalman_gain, projected_cov, kalman_gain.T))
+        return new_mean, new_covariance
+
+
+    def gating_distance(self, mean, covariance, measurements,
+                        only_position=False, metric='maha'):
+        """Compute gating distance between state distribution and measurements.
+        A suitable distance threshold can be obtained from `chi2inv95`. If
+        `only_position` is False, the chi-square distribution has 4 degrees of
+        freedom, otherwise 2.
+        Parameters
+        ----------
+        mean : ndarray
+            Mean vector over the state distribution (8 dimensional).
+        covariance : ndarray
+            Covariance of the state distribution (8x8 dimensional).
+        measurements : ndarray
+            An Nx4 dimensional matrix of N measurements, each in
+            format (x, y, a, h) where (x, y) is the bounding box center
+            position, a the aspect ratio, and h the height.
+        only_position : Optional[bool]
+            If True, distance computation is done with respect to the bounding
+            box center position only.
+        Returns
+        -------
+        ndarray
+            Returns an array of length N, where the i-th element contains the
+            squared Mahalanobis distance between (mean, covariance) and
+            `measurements[i]`.
+        """
+        mean, covariance = self.project(mean, covariance)
+        if only_position:
+            mean, covariance = mean[:2], covariance[:2, :2]
+            measurements = measurements[:, :2]
+
+        d = measurements - mean
+        if metric == 'gaussian':
+            return np.sum(d * d, axis=1)
+        elif metric == 'maha':
+            cholesky_factor = np.linalg.cholesky(covariance)
+            z = scipy.linalg.solve_triangular(
+                cholesky_factor, d.T, lower=True, check_finite=False,
+                overwrite_b=True)
+            squared_maha = np.sum(z * z, axis=0)
+            return squared_maha
+        else:
+            raise ValueError('invalid distance metric')
+            

+ 194 - 0
models/tracker/byte_tracker/matching.py

@@ -0,0 +1,194 @@
+import cv2
+import numpy as np
+import scipy
+import lap
+from scipy.spatial.distance import cdist
+
+from .kalman_filter import chi2inv95
+
+import time
+
+def merge_matches(m1, m2, shape):
+    O,P,Q = shape
+    m1 = np.asarray(m1)
+    m2 = np.asarray(m2)
+
+    M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
+    M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
+
+    mask = M1*M2
+    match = mask.nonzero()
+    match = list(zip(match[0], match[1]))
+    unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
+    unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
+
+    return match, unmatched_O, unmatched_Q
+
+
+def _indices_to_matches(cost_matrix, indices, thresh):
+    matched_cost = cost_matrix[tuple(zip(*indices))]
+    matched_mask = (matched_cost <= thresh)
+
+    matches = indices[matched_mask]
+    unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
+    unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
+
+    return matches, unmatched_a, unmatched_b
+
+
+def linear_assignment(cost_matrix, thresh):
+    if cost_matrix.size == 0:
+        return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
+    matches, unmatched_a, unmatched_b = [], [], []
+    cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
+    for ix, mx in enumerate(x):
+        if mx >= 0:
+            matches.append([ix, mx])
+    unmatched_a = np.where(x < 0)[0]
+    unmatched_b = np.where(y < 0)[0]
+    matches = np.asarray(matches)
+    return matches, unmatched_a, unmatched_b
+
+
+def ious(axyxys, bxyxys):
+    """
+    Compute cost based on IoU
+    :type axyxys: list[xyxy] | np.ndarray
+    :type axyxys: list[xyxy] | np.ndarray
+
+    :rtype ious np.ndarray
+    """
+    ious = np.zeros((len(axyxys), len(bxyxys)), dtype=np.float)
+    if ious.size == 0:
+        return ious
+
+    axyxys = np.ascontiguousarray(axyxys, dtype=np.float)
+    bxyxys = np.ascontiguousarray(bxyxys, dtype=np.float)
+    
+    area1 = (axyxys[:, 2] - axyxys[:, 0]) * (axyxys[:, 3] - axyxys[:, 1])
+    area2 = (bxyxys[:, 2] - bxyxys[:, 0]) * (bxyxys[:, 3] - bxyxys[:, 1])
+
+    lt = np.maximum(axyxys[:, None, :2], bxyxys[:, :2])  # [N,M,2]
+    rb = np.minimum(axyxys[:, None, 2:], bxyxys[:, 2:])  # [N,M,2]
+
+    wh = np.clip(rb - lt, a_min=0, a_max=1e4)     # [N,M,2]
+    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
+
+    union = area1[:, None] + area2 - inter
+
+    iou = inter / union
+
+    return iou
+
+
+def iou_distance(atracks, btracks):
+    """
+    Compute cost based on IoU
+    :type atracks: list[STrack]
+    :type btracks: list[STrack]
+
+    :rtype cost_matrix np.ndarray
+    """
+
+    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
+        axyxys = atracks
+        bxyxys = btracks
+    else:
+        axyxys = [track.xyxy for track in atracks]
+        bxyxys = [track.xyxy for track in btracks]
+    _ious = ious(axyxys, bxyxys)
+    cost_matrix = 1 - _ious
+
+    return cost_matrix
+
+
+def v_iou_distance(atracks, btracks):
+    """
+    Compute cost based on IoU
+    :type atracks: list[STrack]
+    :type btracks: list[STrack]
+
+    :rtype cost_matrix np.ndarray
+    """
+
+    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
+        axyxys = atracks
+        bxyxys = btracks
+    else:
+        axyxys = [track.xywh_to_xyxy(track.pred_bbox) for track in atracks]
+        bxyxys = [track.xywh_to_xyxy(track.pred_bbox) for track in btracks]
+    _ious = ious(axyxys, bxyxys)
+    cost_matrix = 1 - _ious
+
+    return cost_matrix
+
+
+def embedding_distance(tracks, detections, metric='cosine'):
+    """
+    :param tracks: list[STrack]
+    :param detections: list[BaseTrack]
+    :param metric:
+    :return: cost_matrix np.ndarray
+    """
+
+    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
+    if cost_matrix.size == 0:
+        return cost_matrix
+    det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
+    #for i, track in enumerate(tracks):
+        #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
+    track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
+    cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # Nomalized features
+    return cost_matrix
+
+
+def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    gating_dim = 2 if only_position else 4
+    gating_threshold = chi2inv95[gating_dim]
+    measurements = np.asarray([det.to_xyah() for det in detections])
+    for row, track in enumerate(tracks):
+        gating_distance = kf.gating_distance(
+            track.mean, track.covariance, measurements, only_position)
+        cost_matrix[row, gating_distance > gating_threshold] = np.inf
+    return cost_matrix
+
+
+def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    gating_dim = 2 if only_position else 4
+    gating_threshold = chi2inv95[gating_dim]
+    measurements = np.asarray([det.to_xyah() for det in detections])
+    for row, track in enumerate(tracks):
+        gating_distance = kf.gating_distance(
+            track.mean, track.covariance, measurements, only_position, metric='maha')
+        cost_matrix[row, gating_distance > gating_threshold] = np.inf
+        cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
+    return cost_matrix
+
+
+def fuse_iou(cost_matrix, tracks, detections):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    reid_sim = 1 - cost_matrix
+    iou_dist = iou_distance(tracks, detections)
+    iou_sim = 1 - iou_dist
+    fuse_sim = reid_sim * (1 + iou_sim) / 2
+    det_scores = np.array([det.score for det in detections])
+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
+    #fuse_sim = fuse_sim * (1 + det_scores) / 2
+    fuse_cost = 1 - fuse_sim
+    return fuse_cost
+
+
+def fuse_score(cost_matrix, detections):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    iou_sim = 1 - cost_matrix
+    det_scores = np.array([det.score for det in detections])
+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
+    fuse_sim = iou_sim * det_scores
+    fuse_cost = 1 - fuse_sim
+    return fuse_cost

+ 1 - 1
test.py

@@ -14,7 +14,7 @@ from utils.misc import build_dataset, load_weight
 from utils.misc import compute_flops
 from utils.box_ops import rescale_bboxes
 
-from models import build_model
+from models.detectors import build_model
 from config import build_model_config, build_trans_config
 
 

+ 1 - 1
tools/export_onnx.py

@@ -15,7 +15,7 @@ from torch import nn
 from utils.misc import SiLU
 from utils.misc import load_weight, replace_module
 from config import build_config
-from models import build_model
+from models.detectors import build_model
 
 
 def make_parser():

+ 356 - 0
track.py

@@ -0,0 +1,356 @@
+import os
+import cv2
+import time
+import argparse
+import numpy as np
+import torch
+
+from dataset.data_augment import build_transform
+from utils.vis_tools import plot_tracking
+from utils.misc import load_weight
+from utils.box_ops import rescale_bboxes
+
+from config import build_model_config, build_trans_config
+
+from models.detectors import build_model
+from models.tracker import build_tracker
+
+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
+IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Tracking Task')
+
+    # basic
+    parser.add_argument('-size', '--img_size', default=640, type=int,
+                        help='the max size of input image')
+    parser.add_argument('--cuda', action='store_true', default=False, 
+                        help='use cuda.')
+
+    # data
+    parser.add_argument('--mode', type=str, default='image',
+                        help='image, video or camera')
+    parser.add_argument('--path_to_img', type=str, default='dataset/demo/images/',
+                        help='Dir to load images')
+    parser.add_argument('--path_to_vid', type=str, default='dataset/demo/videos/',
+                        help='Dir to load a video')
+    parser.add_argument('--path_to_save', default='det_results/', type=str,
+                        help='Dir to save results')
+    parser.add_argument('--fps', type=int, default=30,
+                        help='frame rate')
+    parser.add_argument('--show', action='store_true', default=False, 
+                        help='show results.')
+    parser.add_argument('--save', action='store_true', default=False, 
+                        help='save results.')
+
+    # tracker
+    parser.add_argument('-tk', '--tracker', default='byte_tracker', type=str,
+                        help='build FreeTrack')
+    parser.add_argument("--track_thresh", type=float, default=0.5, 
+                        help="tracking confidence threshold")
+    parser.add_argument("--track_buffer", type=int, default=30, 
+                        help="the frames for keep lost tracks")
+    parser.add_argument("--match_thresh", type=float, default=0.8, 
+                        help="matching threshold for tracking")
+    parser.add_argument("--aspect_ratio_thresh", type=float, default=1.6,
+                        help="threshold for filtering out boxes of which \
+                              aspect ratio are above the given value.")
+    parser.add_argument('--min_box_area', type=float, default=10,
+                        help='filter out tiny boxes')
+    parser.add_argument("--mot20", default=False, action="store_true",
+                        help="test mot20.")
+
+    # detector
+    parser.add_argument('-dt', '--model', default='yolov1', type=str,
+                        help='build YOLO')
+    parser.add_argument('-ns', '--num_classes', type=int, default=80,
+                        help='number of object classes.')
+    parser.add_argument('--weight', default=None,
+                        type=str, help='Trained state_dict file path to open')
+    parser.add_argument('-ct', '--conf_thresh', default=0.3, type=float,
+                        help='confidence threshold')
+    parser.add_argument('-nt', '--nms_thresh', default=0.5, type=float,
+                        help='NMS threshold')
+    parser.add_argument('--topk', default=100, type=int,
+                        help='topk candidates for testing')
+    parser.add_argument('-fcb', '--fuse_conv_bn', action='store_true', default=False,
+                        help='fuse Conv & BN')
+
+    return parser.parse_args()
+
+
+def get_image_list(path):
+    image_names = []
+    for maindir, subdir, file_name_list in os.walk(path):
+        for filename in file_name_list:
+            apath = os.path.join(maindir, filename)
+            ext = os.path.splitext(apath)[1]
+            if ext in IMAGE_EXT:
+                image_names.append(apath)
+    return image_names
+
+
+def run(args,
+        tracker,
+        detector,
+        device, 
+        transform):
+    save_path = os.path.join(args.path_to_save, args.mode)
+    os.makedirs(save_path, exist_ok=True)
+
+    # ------------------------- Camera ----------------------------
+    if args.mode == 'camera':
+        print('use camera !!!')
+        cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
+        frame_id = 0
+        results = []
+        # start tracking
+        while True:
+            ret, frame = cap.read()
+            if ret:
+                if cv2.waitKey(1) == ord('q'):
+                    break
+                # ------------------------- Detection ---------------------------
+                # preprocess
+                x, _, deltas = transform(frame)
+                x = x.unsqueeze(0).to(device) / 255.
+                orig_h, orig_w, _ = frame.shape
+
+                # detect
+                t0 = time.time()
+                bboxes, scores, labels = detector(x)
+                print("=============== Frame-{} ================".format(frame_id))
+                print("detect time: {:.1f} ms".format((time.time() - t0)*1000))
+
+                # rescale bboxes
+                origin_img_size = [orig_h, orig_w]
+                cur_img_size = [*x.shape[-2:]]
+                bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
+
+                # track
+                t2 = time.time()
+                if len(bboxes) > 0:
+                    online_targets = tracker.update(scores, bboxes, labels)
+                    online_xywhs = []
+                    online_ids = []
+                    online_scores = []
+                    for t in online_targets:
+                        xywh = t.xywh
+                        tid = t.track_id
+                        vertical = xywh[2] / xywh[3] > args.aspect_ratio_thresh
+                        if xywh[2] * xywh[3] > args.min_box_area and not vertical:
+                            online_xywhs.append(xywh)
+                            online_ids.append(tid)
+                            online_scores.append(t.score)
+                            results.append(
+                                f"{frame_id},{tid},{xywh[0]:.2f},{xywh[1]:.2f},{xywh[2]:.2f},{xywh[3]:.2f},{t.score:.2f},-1,-1,-1\n"
+                                )
+                    print("tracking time: {:.1f} ms".format((time.time() - t2)*1000))
+                    
+                    # plot tracking results
+                    online_im = plot_tracking(
+                        frame, online_xywhs, online_ids, frame_id=frame_id + 1, fps=1. / (time.time() - t0)
+                    )
+                else:
+                    online_im = frame
+
+                # show results
+                if args.show:
+                    cv2.imshow('tracking', online_im)
+                    ch = cv2.waitKey(1)
+                    if ch == 27 or ch == ord("q") or ch == ord("Q"):
+                        break
+
+            else:
+                break
+            frame_id += 1
+
+        cap.release()
+        cv2.destroyAllWindows()
+
+    # ------------------------- Image ----------------------------
+    elif args.mode == 'image':
+        files = get_image_list(args.path_to_img)
+        files.sort()
+        # start tracking
+        frame_id = 0
+        results = []
+        for frame_id, img_path in enumerate(files, 1):
+            image = cv2.imread(os.path.join(img_path))
+            # preprocess
+            x, _, deltas = transform(image)
+            x = x.unsqueeze(0).to(device) / 255.
+            orig_h, orig_w, _ = image.shape
+
+            # detect
+            t0 = time.time()
+            bboxes, scores, labels = detector(x)
+            print("=============== Frame-{} ================".format(frame_id))
+            print("detect time: {:.1f} ms".format((time.time() - t0)*1000))
+
+            # rescale bboxes
+            origin_img_size = [orig_h, orig_w]
+            cur_img_size = [*x.shape[-2:]]
+            bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
+
+            # track
+            t2 = time.time()
+            if len(bboxes) > 0:
+                online_targets = tracker.update(scores, bboxes, labels)
+                online_xywhs = []
+                online_ids = []
+                online_scores = []
+                for t in online_targets:
+                    xywh = t.xywh
+                    tid = t.track_id
+                    vertical = xywh[2] / xywh[3] > args.aspect_ratio_thresh
+                    if xywh[2] * xywh[3] > args.min_box_area and not vertical:
+                        online_xywhs.append(xywh)
+                        online_ids.append(tid)
+                        online_scores.append(t.score)
+                        results.append(
+                            f"{frame_id},{tid},{xywh[0]:.2f},{xywh[1]:.2f},{xywh[2]:.2f},{xywh[3]:.2f},{t.score:.2f},-1,-1,-1\n"
+                            )
+                print("tracking time: {:.1f} ms".format((time.time() - t2)*1000))
+                
+                # plot tracking results
+                online_im = plot_tracking(
+                    image, online_xywhs, online_ids, frame_id=frame_id + 1, fps=1. / (time.time() - t0)
+                )
+            else:
+                online_im = image
+
+            # save results
+            if args.save:
+                vid_writer.write(online_im)
+            # show results
+            if args.show:
+                cv2.imshow('tracking', online_im)
+                ch = cv2.waitKey(1)
+                if ch == 27 or ch == ord("q") or ch == ord("Q"):
+                    break
+
+            frame_id += 1
+
+        cv2.destroyAllWindows()
+            
+    # ------------------------- Video ---------------------------
+    elif args.mode == 'video':
+        # read a video
+        video = cv2.VideoCapture(args.path_to_vid)
+        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
+        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        
+        # path to save
+        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S',time.localtime(time.time()))
+        save_path = os.path.join(save_path, timestamp, args.path.split("/")[-1])
+        vid_writer = cv2.VideoWriter(
+            save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
+        )
+        print("Save path: {}".format(save_path))
+
+        # start tracking
+        frame_id = 0
+        results = []
+        while(True):
+            ret, frame = video.read()
+            
+            if ret:
+                # ------------------------- Detection ---------------------------
+                # preprocess
+                x, _, deltas = transform(frame)
+                x = x.unsqueeze(0).to(device) / 255.
+                orig_h, orig_w, _ = frame.shape
+
+                # detect
+                t0 = time.time()
+                bboxes, scores, labels = detector(x)
+                print("=============== Frame-{} ================".format(frame_id))
+                print("detect time: {:.1f} ms".format((time.time() - t0)*1000))
+
+                # rescale bboxes
+                origin_img_size = [orig_h, orig_w]
+                cur_img_size = [*x.shape[-2:]]
+                bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
+
+                # track
+                t2 = time.time()
+                if len(bboxes) > 0:
+                    online_targets = tracker.update(scores, bboxes, labels)
+                    online_xywhs = []
+                    online_ids = []
+                    online_scores = []
+                    for t in online_targets:
+                        xywh = t.xywh
+                        tid = t.track_id
+                        vertical = xywh[2] / xywh[3] > args.aspect_ratio_thresh
+                        if xywh[2] * xywh[3] > args.min_box_area and not vertical:
+                            online_xywhs.append(xywh)
+                            online_ids.append(tid)
+                            online_scores.append(t.score)
+                            results.append(
+                                f"{frame_id},{tid},{xywh[0]:.2f},{xywh[1]:.2f},{xywh[2]:.2f},{xywh[3]:.2f},{t.score:.2f},-1,-1,-1\n"
+                                )
+                    print("tracking time: {:.1f} ms".format((time.time() - t2)*1000))
+                    
+                    # plot tracking results
+                    online_im = plot_tracking(
+                        frame, online_xywhs, online_ids, frame_id=frame_id + 1, fps=1. / (time.time() - t0)
+                    )
+                else:
+                    online_im = frame
+
+                # save results
+                if args.save:
+                    vid_writer.write(online_im)
+                # show results
+                if args.show:
+                    cv2.imshow('tracking', online_im)
+                    ch = cv2.waitKey(1)
+                    if ch == 27 or ch == ord("q") or ch == ord("Q"):
+                        break
+            else:
+                break
+            frame_id += 1
+
+        video.release()
+        vid_writer.release()
+        cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    # cuda
+    if args.cuda:
+        print('use cuda')
+        device = torch.device("cuda")
+    else:
+        device = torch.device("cpu")
+
+    np.random.seed(0)
+
+    # config
+    model_cfg = build_model_config(args)
+    trans_cfg = build_trans_config(model_cfg['trans_type'])
+
+    # transform
+    transform = build_transform(args.img_size, trans_cfg, is_train=False)
+
+    # ---------------------- General Object Detector ----------------------
+    detector = build_model(args, model_cfg, device, args.num_classes, False)
+
+    ## load trained weight
+    detector = load_weight(detector, args.weight, args.fuse_conv_bn)
+    detector.to(device).eval()
+    
+    # ---------------------- General Object Tracker ----------------------
+    tracker = build_tracker(args)
+
+    # run
+    run(args=args,
+        tracker=tracker,
+        detector=detector, 
+        device=device,
+        transform=transform)

+ 1 - 1
train.py

@@ -17,7 +17,7 @@ from utils.solver.lr_scheduler import build_lr_scheduler
 from engine import train_one_epoch, val_one_epoch
 
 from config import build_model_config, build_trans_config
-from models import build_model
+from models.detectors import build_model
 
 
 def parse_args():

+ 45 - 9
utils/vis_tools.py

@@ -5,7 +5,8 @@ import matplotlib.pyplot as plt
 from dataset.coco import coco_class_index, coco_class_labels
 
 
-# draw bbox & label on the image
+# -------------------------- For Detection Task --------------------------
+## draw bbox & label on the image
 def plot_bbox_labels(img, bbox, label, cls_color, test_scale=0.4):
     x1, y1, x2, y2 = bbox
     x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
@@ -19,8 +20,7 @@ def plot_bbox_labels(img, bbox, label, cls_color, test_scale=0.4):
 
     return img
 
-
-# visualize the detection results
+## visualize the detection results
 def visualize(img, bboxes, scores, labels, class_colors, vis_thresh=0.3):
     ts = 0.4
     for i, bbox in enumerate(bboxes):
@@ -32,8 +32,7 @@ def visualize(img, bboxes, scores, labels, class_colors, vis_thresh=0.3):
 
     return img
 
-
-# visualize the input data during the training stage
+## visualize the input data during the training stage
 def vis_data(images, targets):
     """
         images: (tensor) [B, 3, H, W]
@@ -66,8 +65,7 @@ def vis_data(images, targets):
         cv2.imshow('train target', image)
         cv2.waitKey(0)
 
-
-# convert feature to he heatmap
+## convert feature to he heatmap
 def convert_feature_heatmap(feature):
     """
         feature: (ndarray) [H, W, C]
@@ -76,8 +74,7 @@ def convert_feature_heatmap(feature):
 
     return heatmap
 
-
-# draw feature on the image
+## draw feature on the image
 def draw_feature(img, features, save=None):
     """
         img: (ndarray & cv2.Mat) [H, W, C], where the C is 3 for RGB or 1 for Gray.
@@ -107,3 +104,42 @@ def draw_feature(img, features, save=None):
             save_dir = 'feature_heatmap'
             os.makedirs(save_dir, exist_ok=True)
             cv2.imwrite(os.path.join(save_dir, 'feature_{}.png'.format(i) ), superimposed_img)    
+
+
+# -------------------------- For Tracking Task --------------------------
+def get_color(idx):
+    idx = idx * 3
+    color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
+
+    return color
+
+
+def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
+    im = np.ascontiguousarray(np.copy(image))
+    im_h, im_w = im.shape[:2]
+
+    top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
+
+    #text_scale = max(1, image.shape[1] / 1600.)
+    #text_thickness = 2
+    #line_thickness = max(1, int(image.shape[1] / 500.))
+    text_scale = 2
+    text_thickness = 2
+    line_thickness = 3
+
+    radius = max(5, int(im_w/140.))
+    cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
+                (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), thickness=2)
+
+    for i, tlwh in enumerate(tlwhs):
+        x1, y1, w, h = tlwh
+        intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
+        obj_id = int(obj_ids[i])
+        id_text = '{}'.format(int(obj_id))
+        if ids2 is not None:
+            id_text = id_text + ', {}'.format(int(ids2[i]))
+        color = get_color(abs(obj_id))
+        cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
+        cv2.putText(im, id_text, (intbox[0], intbox[1]), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
+                    thickness=text_thickness)
+    return im