2 жил өмнө · 7cf531e7da
--- a/README.md
+++ b/README.md
@@ -230,3 +230,43 @@ python demo.py --mode camera \
 
				                --cuda \
			
 
				                --weight path/to/weight
			
 
				 ```
			
 
				+
			
 
				+## Tracking
			
 
				+Our project also supports **multi-object tracking** tasks. We use the YOLO of this project as the detector, following the "tracking-by-detection" framework, and use the simple and efficient **ByteTrack** as the tracker.
			
 
				+
			
 
				+* images tracking
			
 
				+```Shell
			
 
				+python track.py --mode image \
			
 
				+                --path_to_img path/to/images/ \
			
 
				+                -dt yolov2 \
			
 
				+                -tk byte_tracker \
			
 
				+                --weight path/to/coco_pretrained/ \
			
 
				+                -size 640 \
			
 
				+                --cuda \
			
 
				+                --show
			
 
				+```
			
 
				+
			
 
				+* video tracking
			
 
				+
			
 
				+```Shell
			
 
				+python track.py --mode video \
			
 
				+                --path_to_img path/to/video/ \
			
 
				+                -dt yolov2 \
			
 
				+                -tk byte_tracker \
			
 
				+                --weight path/to/coco_pretrained/ \
			
 
				+                -size 640 \
			
 
				+                --cuda \
			
 
				+                --show
			
 
				+```
			
 
				+
			
 
				+* camera tracking
			
 
				+
			
 
				+```Shell
			
 
				+python track.py --mode camera \
			
 
				+                -dt yolov2 \
			
 
				+                -tk byte_tracker \
			
 
				+                --weight path/to/coco_pretrained/ \
			
 
				+                -size 640 \
			
 
				+                --cuda \
			
 
				+                --show
			
 
				+```
			
--- a/README_CN.md
+++ b/README_CN.md
@@ -243,3 +243,44 @@ python demo.py --mode camera \
 
				                --cuda \
			
 
				                --weight path/to/weight
			
 
				 ```
			
 
				+
			
 
				+
			
 
				+## 目标跟踪
			
 
				+该项目也支持**多目标跟踪**任务。我们使用本项目的YOLO检测器作为“tracking-by-detection”的检测器，并使用简单高效的**ByteTrack**作为跟踪器。
			
 
				+
			
 
				+* images tracking
			
 
				+```Shell
			
 
				+python track.py --mode image \
			
 
				+                --path_to_img path/to/images/ \
			
 
				+                -dt yolov2 \
			
 
				+                -tk byte_tracker \
			
 
				+                --weight path/to/coco_pretrained/ \
			
 
				+                -size 640 \
			
 
				+                --cuda \
			
 
				+                --show
			
 
				+```
			
 
				+
			
 
				+* video tracking
			
 
				+
			
 
				+```Shell
			
 
				+python track.py --mode video \
			
 
				+                --path_to_img path/to/video/ \
			
 
				+                -dt yolov2 \
			
 
				+                -tk byte_tracker \
			
 
				+                --weight path/to/coco_pretrained/ \
			
 
				+                -size 640 \
			
 
				+                --cuda \
			
 
				+                --show
			
 
				+```
			
 
				+
			
 
				+* camera tracking
			
 
				+
			
 
				+```Shell
			
 
				+python track.py --mode camera \
			
 
				+                -dt yolov2 \
			
 
				+                -tk byte_tracker \
			
 
				+                --weight path/to/coco_pretrained/ \
			
 
				+                -size 640 \
			
 
				+                --cuda \
			
 
				+                --show
			
 
				+```
			
--- a/config/__init__.py
+++ b/config/__init__.py
@@ -42,7 +42,12 @@ def build_model_config(args):
 
				 
			
 
				 # ------------------ Transform Config ----------------------
			
 
				 from .transform_config import (
			
 
				-    yolov5_strong_trans_config, yolov5_weak_trans_config, yolov5_nano_trans_config,
			
 
				+    yolov5_nano_trans_config,
			
 
				+    yolov5_tiny_trans_config,
			
 
				+    yolov5_small_trans_config,
			
 
				+    yolov5_medium_trans_config,
			
 
				+    yolov5_large_trans_config,
			
 
				+    yolov5_huge_trans_config,
			
 
				     ssd_trans_config
			
 
				 )
			
 
				 
			
@@ -54,11 +59,17 @@ def build_trans_config(trans_config='ssd'):
 
				         cfg = ssd_trans_config
			
 
				 
			
 
				     # YOLOv5-style transform 
			
 
				-    elif trans_config == 'yolov5_strong':
			
 
				-        cfg = yolov5_strong_trans_config
			
 
				-    elif trans_config == 'yolov5_weak':
			
 
				-        cfg = yolov5_weak_trans_config
			
 
				     elif trans_config == 'yolov5_nano':
			
 
				         cfg = yolov5_nano_trans_config
			
 
				+    elif trans_config == 'yolov5_tiny':
			
 
				+        cfg = yolov5_tiny_trans_config
			
 
				+    elif trans_config == 'yolov5_small':
			
 
				+        cfg = yolov5_small_trans_config
			
 
				+    elif trans_config == 'yolov5_medium':
			
 
				+        cfg = yolov5_medium_trans_config
			
 
				+    elif trans_config == 'yolov5_large':
			
 
				+        cfg = yolov5_large_trans_config
			
 
				+    elif trans_config == 'yolov5_huge':
			
 
				+        cfg = yolov5_huge_trans_config
			
 
				         
			
 
				     return cfg
			
--- a/config/transform_config.py
+++ b/config/transform_config.py
@@ -1,7 +1,26 @@
 
				 # transform config
			
 
				 
			
 
				 # ----------------------- YOLOv5-Style -----------------------
			
 
				-yolov5_strong_trans_config = {
			
 
				+yolov5_huge_trans_config = {
			
 
				+    'aug_type': 'yolov5',
			
 
				+    # Basic Augment
			
 
				+    'degrees': 0.0,
			
 
				+    'translate': 0.2,
			
 
				+    'scale': 0.9,
			
 
				+    'shear': 0.0,
			
 
				+    'perspective': 0.0,
			
 
				+    'hsv_h': 0.015,
			
 
				+    'hsv_s': 0.7,
			
 
				+    'hsv_v': 0.4,
			
 
				+    # Mosaic & Mixup
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob': 0.2,
			
 
				+    'mosaic_type': 'yolov5_mosaic',
			
 
				+    'mixup_type': 'yolov5_mixup',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+}
			
 
				+
			
 
				+yolov5_large_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				     # Basic Augment
			
 
				     'degrees': 0.0,
			
@@ -20,7 +39,45 @@ yolov5_strong_trans_config = {
 
				     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				 }
			
 
				 
			
 
				-yolov5_weak_trans_config = {
			
 
				+yolov5_medium_trans_config = {
			
 
				+    'aug_type': 'yolov5',
			
 
				+    # Basic Augment
			
 
				+    'degrees': 0.0,
			
 
				+    'translate': 0.2,
			
 
				+    'scale': 0.9,
			
 
				+    'shear': 0.0,
			
 
				+    'perspective': 0.0,
			
 
				+    'hsv_h': 0.015,
			
 
				+    'hsv_s': 0.7,
			
 
				+    'hsv_v': 0.4,
			
 
				+    # Mosaic & Mixup
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob': 0.10,
			
 
				+    'mosaic_type': 'yolov5_mosaic',
			
 
				+    'mixup_type': 'yolov5_mixup',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+}
			
 
				+
			
 
				+yolov5_small_trans_config = {
			
 
				+    'aug_type': 'yolov5',
			
 
				+    # Basic Augment
			
 
				+    'degrees': 0.0,
			
 
				+    'translate': 0.2,
			
 
				+    'scale': 0.9,
			
 
				+    'shear': 0.0,
			
 
				+    'perspective': 0.0,
			
 
				+    'hsv_h': 0.015,
			
 
				+    'hsv_s': 0.7,
			
 
				+    'hsv_v': 0.4,
			
 
				+    # Mosaic & Mixup
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob': 0.05,
			
 
				+    'mosaic_type': 'yolov5_mosaic',
			
 
				+    'mixup_type': 'yolov5_mixup',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+}
			
 
				+
			
 
				+yolov5_tiny_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				     # Basic Augment
			
 
				     'degrees': 0.0,
			
--- a/config/yolov3_config.py
+++ b/config/yolov3_config.py
@@ -2,7 +2,7 @@
 
				 
			
 
				 yolov3_cfg = {
			
 
				     # input
			
 
				-    'trans_type': 'yolov5_strong',
			
 
				+    'trans_type': 'yolov5_large',
			
 
				     'multi_scale': [0.5, 1.0],
			
 
				     # model
			
 
				     'backbone': 'darknet53',
			
--- a/config/yolov4_config.py
+++ b/config/yolov4_config.py
@@ -2,7 +2,7 @@
 
				 
			
 
				 yolov4_cfg = {
			
 
				     # input
			
 
				-    'trans_type': 'yolov5_strong',
			
 
				+    'trans_type': 'yolov5_large',
			
 
				     'multi_scale': [0.5, 1.0],
			
 
				     # model
			
 
				     'backbone': 'cspdarknet53',
			
--- a/config/yolov5_config.py
+++ b/config/yolov5_config.py
@@ -33,7 +33,7 @@ yolov5_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.0],   # 320 -> 640
			
 
				-        'trans_type': 'yolov5_weak',
			
 
				+        'trans_type': 'yolov5_tiny',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
@@ -93,7 +93,7 @@ yolov5_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.0],   # 320 -> 640
			
 
				-        'trans_type': 'yolov5_weak',
			
 
				+        'trans_type': 'yolov5_small',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
@@ -153,7 +153,7 @@ yolov5_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.0],   # 320 -> 640
			
 
				-        'trans_type': 'yolov5_strong',
			
 
				+        'trans_type': 'yolov5_medium',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
@@ -213,7 +213,7 @@ yolov5_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.0],   # 320 -> 640
			
 
				-        'trans_type': 'yolov5_strong',
			
 
				+        'trans_type': 'yolov5_large',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
@@ -273,7 +273,7 @@ yolov5_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.0],   # 320 -> 640
			
 
				-        'trans_type': 'yolov5_strong',
			
 
				+        'trans_type': 'yolov5_huge',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
--- a/config/yolov7_config.py
+++ b/config/yolov7_config.py
@@ -3,7 +3,7 @@
 
				 yolov7_cfg = {
			
 
				     'yolov7_t':{
			
 
				         # input
			
 
				-        'trans_type': 'yolov5_weak',
			
 
				+        'trans_type': 'yolov5_tiny',
			
 
				         'multi_scale': [0.5, 1.5], # 320 -> 640
			
 
				         # model
			
 
				         'backbone': 'elannet_tiny',
			
@@ -61,7 +61,7 @@ yolov7_cfg = {
 
				 
			
 
				     'yolov7_l':{
			
 
				         # input
			
 
				-        'trans_type': 'yolov5_strong',
			
 
				+        'trans_type': 'yolov5_large',
			
 
				         'multi_scale': [0.5, 1.25], # 320 -> 640
			
 
				         # model
			
 
				         'backbone': 'elannet_large',
			
@@ -119,7 +119,7 @@ yolov7_cfg = {
 
				 
			
 
				     'yolov7_x':{
			
 
				         # input
			
 
				-        'trans_type': 'yolov5_strong',
			
 
				+        'trans_type': 'yolov5_huge',
			
 
				         'multi_scale': [0.5, 1.25], # 320 -> 640
			
 
				         # model
			
 
				         'backbone': 'elannet_huge',
			
--- a/config/yolov8_config.py
+++ b/config/yolov8_config.py
@@ -3,7 +3,7 @@
 
				 yolov8_cfg = {
			
 
				     'yolov8_n':{
			
 
				         # input
			
 
				-        'trans_type': 'yolov5_weak',
			
 
				+        'trans_type': 'yolov5_tiny',
			
 
				         'multi_scale': [0.5, 1.5],   # 320 -> 960
			
 
				         # model
			
 
				         'backbone': 'elan_cspnet',
			
@@ -64,7 +64,7 @@ yolov8_cfg = {
 
				 
			
 
				     'yolov8_s':{
			
 
				         # input
			
 
				-        'trans_type': 'yolov5_strong',
			
 
				+        'trans_type': 'yolov5_small',
			
 
				         'multi_scale': [0.5, 1.5],   # 320 -> 960
			
 
				         # model
			
 
				         'backbone': 'elan_cspnet',
			
@@ -125,7 +125,7 @@ yolov8_cfg = {
 
				 
			
 
				     'yolov8_m':{
			
 
				         # input
			
 
				-        'trans_type': 'yolov5_strong',
			
 
				+        'trans_type': 'yolov5_medium',
			
 
				         'multi_scale': [0.5, 1.5],   # 320 -> 960
			
 
				         # model
			
 
				         'backbone': 'elan_cspnet',
			
@@ -186,7 +186,7 @@ yolov8_cfg = {
 
				 
			
 
				     'yolov8_l':{
			
 
				         # input
			
 
				-        'trans_type': 'yolov5_strong',
			
 
				+        'trans_type': 'yolov5_large',
			
 
				         'multi_scale': [0.5, 1.5],   # 320 -> 960
			
 
				         # model
			
 
				         'backbone': 'elan_cspnet',
			
@@ -247,7 +247,7 @@ yolov8_cfg = {
 
				 
			
 
				     'yolov8_x':{
			
 
				         # input
			
 
				-        'trans_type': 'yolov5_strong',
			
 
				+        'trans_type': 'yolov5_huge',
			
 
				         'multi_scale': [0.5, 1.5],   # 320 -> 960
			
 
				         # model
			
 
				         'backbone': 'elan_cspnet',
			
--- a/eval.py
+++ b/eval.py
@@ -16,7 +16,7 @@ from dataset.data_augment import build_transform
 
				 from utils.misc import load_weight
			
 
				 from utils.misc import compute_flops
			
 
				 
			
 
				-from models import build_model
			
 
				+from models.detectors import build_model
			
 
				 from config import build_model_config, build_trans_config
			
 
				 
			
 
				 
			
--- a/models/detectors/__init__.py
+++ b/models/detectors/__init__.py
--- a/models/detectors/yolov1/build.py
+++ b/models/detectors/yolov1/build.py
--- a/models/detectors/yolov1/loss.py
+++ b/models/detectors/yolov1/loss.py
--- a/models/detectors/yolov1/matcher.py
+++ b/models/detectors/yolov1/matcher.py
--- a/models/detectors/yolov1/yolov1.py
+++ b/models/detectors/yolov1/yolov1.py
--- a/models/detectors/yolov1/yolov1_backbone.py
+++ b/models/detectors/yolov1/yolov1_backbone.py
--- a/models/detectors/yolov1/yolov1_basic.py
+++ b/models/detectors/yolov1/yolov1_basic.py
--- a/models/detectors/yolov1/yolov1_head.py
+++ b/models/detectors/yolov1/yolov1_head.py
--- a/models/detectors/yolov1/yolov1_neck.py
+++ b/models/detectors/yolov1/yolov1_neck.py
--- a/models/detectors/yolov2/build.py
+++ b/models/detectors/yolov2/build.py
--- a/models/detectors/yolov2/loss.py
+++ b/models/detectors/yolov2/loss.py
--- a/models/detectors/yolov2/matcher.py
+++ b/models/detectors/yolov2/matcher.py
--- a/models/detectors/yolov2/yolov2.py
+++ b/models/detectors/yolov2/yolov2.py
--- a/models/detectors/yolov2/yolov2_backbone.py
+++ b/models/detectors/yolov2/yolov2_backbone.py
--- a/models/detectors/yolov2/yolov2_basic.py
+++ b/models/detectors/yolov2/yolov2_basic.py
--- a/models/detectors/yolov2/yolov2_head.py
+++ b/models/detectors/yolov2/yolov2_head.py
--- a/models/detectors/yolov2/yolov2_neck.py
+++ b/models/detectors/yolov2/yolov2_neck.py
--- a/models/detectors/yolov3/build.py
+++ b/models/detectors/yolov3/build.py
--- a/models/detectors/yolov3/loss.py
+++ b/models/detectors/yolov3/loss.py
--- a/models/detectors/yolov3/matcher.py
+++ b/models/detectors/yolov3/matcher.py
--- a/models/detectors/yolov3/yolov3.py
+++ b/models/detectors/yolov3/yolov3.py
--- a/models/detectors/yolov3/yolov3_backbone.py
+++ b/models/detectors/yolov3/yolov3_backbone.py
--- a/models/detectors/yolov3/yolov3_basic.py
+++ b/models/detectors/yolov3/yolov3_basic.py
--- a/models/detectors/yolov3/yolov3_fpn.py
+++ b/models/detectors/yolov3/yolov3_fpn.py
--- a/models/detectors/yolov3/yolov3_head.py
+++ b/models/detectors/yolov3/yolov3_head.py
--- a/models/detectors/yolov3/yolov3_neck.py
+++ b/models/detectors/yolov3/yolov3_neck.py
--- a/models/detectors/yolov4/build.py
+++ b/models/detectors/yolov4/build.py
--- a/models/detectors/yolov4/loss.py
+++ b/models/detectors/yolov4/loss.py
--- a/models/detectors/yolov4/matcher.py
+++ b/models/detectors/yolov4/matcher.py
--- a/models/detectors/yolov4/yolov4.py
+++ b/models/detectors/yolov4/yolov4.py
--- a/models/detectors/yolov4/yolov4_backbone.py
+++ b/models/detectors/yolov4/yolov4_backbone.py
--- a/models/detectors/yolov4/yolov4_basic.py
+++ b/models/detectors/yolov4/yolov4_basic.py
--- a/models/detectors/yolov4/yolov4_fpn.py
+++ b/models/detectors/yolov4/yolov4_fpn.py
--- a/models/detectors/yolov4/yolov4_head.py
+++ b/models/detectors/yolov4/yolov4_head.py
--- a/models/detectors/yolov4/yolov4_neck.py
+++ b/models/detectors/yolov4/yolov4_neck.py
--- a/models/detectors/yolov5/build.py
+++ b/models/detectors/yolov5/build.py
--- a/models/detectors/yolov5/loss.py
+++ b/models/detectors/yolov5/loss.py
--- a/models/detectors/yolov5/matcher.py
+++ b/models/detectors/yolov5/matcher.py
--- a/models/detectors/yolov5/yolov5.py
+++ b/models/detectors/yolov5/yolov5.py
--- a/models/detectors/yolov5/yolov5_backbone.py
+++ b/models/detectors/yolov5/yolov5_backbone.py
--- a/models/detectors/yolov5/yolov5_basic.py
+++ b/models/detectors/yolov5/yolov5_basic.py
--- a/models/detectors/yolov5/yolov5_head.py
+++ b/models/detectors/yolov5/yolov5_head.py
--- a/models/detectors/yolov5/yolov5_neck.py
+++ b/models/detectors/yolov5/yolov5_neck.py
--- a/models/detectors/yolov5/yolov5_pafpn.py
+++ b/models/detectors/yolov5/yolov5_pafpn.py
--- a/models/detectors/yolov7/build.py
+++ b/models/detectors/yolov7/build.py
--- a/models/detectors/yolov7/loss.py
+++ b/models/detectors/yolov7/loss.py
--- a/models/detectors/yolov7/matcher.py
+++ b/models/detectors/yolov7/matcher.py
--- a/models/detectors/yolov7/yolov7.py
+++ b/models/detectors/yolov7/yolov7.py
--- a/models/detectors/yolov7/yolov7_backbone.py
+++ b/models/detectors/yolov7/yolov7_backbone.py
--- a/models/detectors/yolov7/yolov7_basic.py
+++ b/models/detectors/yolov7/yolov7_basic.py
--- a/models/detectors/yolov7/yolov7_fpn.py
+++ b/models/detectors/yolov7/yolov7_fpn.py
--- a/models/detectors/yolov7/yolov7_head.py
+++ b/models/detectors/yolov7/yolov7_head.py
--- a/models/detectors/yolov7/yolov7_neck.py
+++ b/models/detectors/yolov7/yolov7_neck.py
--- a/models/detectors/yolov8/build.py
+++ b/models/detectors/yolov8/build.py
--- a/models/detectors/yolov8/loss.py
+++ b/models/detectors/yolov8/loss.py
--- a/models/detectors/yolov8/matcher.py
+++ b/models/detectors/yolov8/matcher.py
--- a/models/detectors/yolov8/yolov8.py
+++ b/models/detectors/yolov8/yolov8.py
--- a/models/detectors/yolov8/yolov8_backbone.py
+++ b/models/detectors/yolov8/yolov8_backbone.py
--- a/models/detectors/yolov8/yolov8_basic.py
+++ b/models/detectors/yolov8/yolov8_basic.py
--- a/models/detectors/yolov8/yolov8_head.py
+++ b/models/detectors/yolov8/yolov8_head.py
--- a/models/detectors/yolov8/yolov8_neck.py
+++ b/models/detectors/yolov8/yolov8_neck.py
--- a/models/detectors/yolov8/yolov8_pafpn.py
+++ b/models/detectors/yolov8/yolov8_pafpn.py
--- a/models/detectors/yolox/build.py
+++ b/models/detectors/yolox/build.py
--- a/models/detectors/yolox/loss.py
+++ b/models/detectors/yolox/loss.py
--- a/models/detectors/yolox/matcher.py
+++ b/models/detectors/yolox/matcher.py
--- a/models/detectors/yolox/yolox.py
+++ b/models/detectors/yolox/yolox.py
--- a/models/detectors/yolox/yolox_backbone.py
+++ b/models/detectors/yolox/yolox_backbone.py
--- a/models/detectors/yolox/yolox_basic.py
+++ b/models/detectors/yolox/yolox_basic.py
--- a/models/detectors/yolox/yolox_fpn.py
+++ b/models/detectors/yolox/yolox_fpn.py
--- a/models/detectors/yolox/yolox_head.py
+++ b/models/detectors/yolox/yolox_head.py
--- a/models/detectors/yolox/yolox_neck.py
+++ b/models/detectors/yolox/yolox_neck.py
--- a/models/tracker/__init__.py
+++ b/models/tracker/__init__.py
@@ -0,0 +1,9 @@
 
				+from .byte_tracker.build import build_byte_tracker
			
 
				+
			
 
				+
			
 
				+
			
 
				+def build_tracker(args):
			
 
				+    if args.tracker == 'byte_tracker':
			
 
				+        return build_byte_tracker(args)
			
 
				+    else:
			
 
				+        raise NotImplementedError
			
--- a/models/tracker/byte_tracker/basetrack.py
+++ b/models/tracker/byte_tracker/basetrack.py
@@ -0,0 +1,52 @@
 
				+import numpy as np
			
 
				+from collections import OrderedDict
			
 
				+
			
 
				+
			
 
				+class TrackState(object):
			
 
				+    New = 0
			
 
				+    Tracked = 1
			
 
				+    Lost = 2
			
 
				+    Removed = 3
			
 
				+
			
 
				+
			
 
				+class BaseTrack(object):
			
 
				+    _count = 0
			
 
				+
			
 
				+    track_id = 0
			
 
				+    is_activated = False
			
 
				+    state = TrackState.New
			
 
				+
			
 
				+    history = OrderedDict()
			
 
				+    features = []
			
 
				+    curr_feature = None
			
 
				+    score = 0
			
 
				+    start_frame = 0
			
 
				+    frame_id = 0
			
 
				+    time_since_update = 0
			
 
				+
			
 
				+    # multi-camera
			
 
				+    location = (np.inf, np.inf)
			
 
				+
			
 
				+    @property
			
 
				+    def end_frame(self):
			
 
				+        return self.frame_id
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def next_id():
			
 
				+        BaseTrack._count += 1
			
 
				+        return BaseTrack._count
			
 
				+
			
 
				+    def activate(self, *args):
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+    def predict(self):
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+    def update(self, *args, **kwargs):
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+    def mark_lost(self):
			
 
				+        self.state = TrackState.Lost
			
 
				+
			
 
				+    def mark_removed(self):
			
 
				+        self.state = TrackState.Removed
			
--- a/models/tracker/byte_tracker/build.py
+++ b/models/tracker/byte_tracker/build.py
@@ -0,0 +1,14 @@
 
				+from .byte_tracker import ByteTracker
			
 
				+
			
 
				+
			
 
				+def build_byte_tracker(args):
			
 
				+    tracker = ByteTracker(
			
 
				+        track_thresh=args.track_thresh,
			
 
				+        track_buffer=args.track_buffer,
			
 
				+        frame_rate=args.fps,
			
 
				+        match_thresh=args.match_thresh,
			
 
				+        mot20=args.mot20
			
 
				+    )
			
 
				+
			
 
				+    return tracker
			
 
				+    
			
--- a/models/tracker/byte_tracker/byte_tracker.py
+++ b/models/tracker/byte_tracker/byte_tracker.py
@@ -0,0 +1,332 @@
 
				+import numpy as np
			
 
				+import os
			
 
				+import os.path as osp
			
 
				+
			
 
				+from .kalman_filter import KalmanFilter
			
 
				+from .matching import iou_distance, fuse_score, linear_assignment
			
 
				+from .basetrack import BaseTrack, TrackState
			
 
				+
			
 
				+
			
 
				+class STrack(BaseTrack):
			
 
				+    shared_kalman = KalmanFilter()
			
 
				+    def __init__(self, xywh, score):
			
 
				+
			
 
				+        # wait activate
			
 
				+        self._xywh = np.asarray(xywh, dtype=np.float)
			
 
				+        self.kalman_filter = None
			
 
				+        self.mean, self.covariance = None, None
			
 
				+        self.is_activated = False
			
 
				+
			
 
				+        self.score = score
			
 
				+        self.tracklet_len = 0
			
 
				+
			
 
				+    def predict(self):
			
 
				+        mean_state = self.mean.copy()
			
 
				+        if self.state != TrackState.Tracked:
			
 
				+            mean_state[7] = 0
			
 
				+        self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def multi_predict(stracks):
			
 
				+        if len(stracks) > 0:
			
 
				+            multi_mean = np.asarray([st.mean.copy() for st in stracks])
			
 
				+            multi_covariance = np.asarray([st.covariance for st in stracks])
			
 
				+            for i, st in enumerate(stracks):
			
 
				+                if st.state != TrackState.Tracked:
			
 
				+                    multi_mean[i][7] = 0
			
 
				+            multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
			
 
				+            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
			
 
				+                stracks[i].mean = mean
			
 
				+                stracks[i].covariance = cov
			
 
				+
			
 
				+    def activate(self, kalman_filter, frame_id):
			
 
				+        """Start a new tracklet"""
			
 
				+        self.kalman_filter = kalman_filter
			
 
				+        self.track_id = self.next_id()
			
 
				+        self.mean, self.covariance = self.kalman_filter.initiate(self.xywh_to_cxcyah(self._xywh))
			
 
				+
			
 
				+        self.tracklet_len = 0
			
 
				+        self.state = TrackState.Tracked
			
 
				+        if frame_id == 1:
			
 
				+            self.is_activated = True
			
 
				+        # self.is_activated = True
			
 
				+        self.frame_id = frame_id
			
 
				+        self.start_frame = frame_id
			
 
				+
			
 
				+    def re_activate(self, new_track, frame_id, new_id=False):
			
 
				+        self.mean, self.covariance = self.kalman_filter.update(
			
 
				+            self.mean, self.covariance, self.xywh_to_cxcyah(new_track.xywh)
			
 
				+        )
			
 
				+        self.tracklet_len = 0
			
 
				+        self.state = TrackState.Tracked
			
 
				+        self.is_activated = True
			
 
				+        self.frame_id = frame_id
			
 
				+        if new_id:
			
 
				+            self.track_id = self.next_id()
			
 
				+        self.score = new_track.score
			
 
				+
			
 
				+    def update(self, new_track, frame_id):
			
 
				+        """
			
 
				+        Update a matched track
			
 
				+        :type new_track: STrack
			
 
				+        :type frame_id: int
			
 
				+        :type update_feature: bool
			
 
				+        :return:
			
 
				+        """
			
 
				+        self.frame_id = frame_id
			
 
				+        self.tracklet_len += 1
			
 
				+
			
 
				+        new_xywh = new_track.xywh
			
 
				+        self.mean, self.covariance = self.kalman_filter.update(
			
 
				+            self.mean, self.covariance, self.xywh_to_cxcyah(new_xywh))
			
 
				+        self.state = TrackState.Tracked
			
 
				+        self.is_activated = True
			
 
				+
			
 
				+        self.score = new_track.score
			
 
				+
			
 
				+    @property
			
 
				+    # @jit(nopython=True)
			
 
				+    def xywh(self):
			
 
				+        """Get current position in bounding box format `(top left x, top left y,
			
 
				+                width, height)`.
			
 
				+        """
			
 
				+        if self.mean is None:
			
 
				+            return self._xywh.copy()
			
 
				+        ret = self.mean[:4].copy()
			
 
				+        ret[2] *= ret[3]
			
 
				+        ret[:2] -= ret[2:] / 2
			
 
				+        return ret
			
 
				+
			
 
				+
			
 
				+    @property
			
 
				+    # @jit(nopython=True)
			
 
				+    def xyxy(self):
			
 
				+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
			
 
				+        `(top left, bottom right)`.
			
 
				+        """
			
 
				+        ret = self.xywh.copy()
			
 
				+        ret[2:] += ret[:2]
			
 
				+        return ret
			
 
				+
			
 
				+
			
 
				+    @staticmethod
			
 
				+    # @jit(nopython=True)
			
 
				+    def xywh_to_cxcyah(xywh):
			
 
				+        """[x1, y1, w, h] -> [cx, cy, aspect ratio, h], 
			
 
				+        where the aspect ratio is `width / height`.
			
 
				+        """
			
 
				+        ret = np.asarray(xywh).copy()
			
 
				+        ret[:2] += ret[2:] / 2
			
 
				+        ret[2] /= ret[3]
			
 
				+        return ret
			
 
				+
			
 
				+
			
 
				+    @staticmethod
			
 
				+    # @jit(nopython=True)
			
 
				+    def xyxy_to_xywh(xyxy):
			
 
				+        """ [x1, y1, x2, y2] -> [x1, y1, w, h]"""
			
 
				+        ret = np.asarray(xyxy).copy()
			
 
				+        ret[2:] -= ret[:2]
			
 
				+        return ret
			
 
				+
			
 
				+
			
 
				+    @staticmethod
			
 
				+    # @jit(nopython=True)
			
 
				+    def xywh_to_xyxy(xywh):
			
 
				+        ret = np.asarray(xywh).copy()
			
 
				+        ret[2:] += ret[:2]
			
 
				+        return ret
			
 
				+
			
 
				+
			
 
				+    def to_cxcyah(self):
			
 
				+        return self.xywh_to_cxcyah(self.xywh)
			
 
				+
			
 
				+
			
 
				+    def __repr__(self):
			
 
				+        return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
			
 
				+
			
 
				+
			
 
				+class ByteTracker(object):
			
 
				+    def __init__(self, track_thresh=0.6, track_buffer=30, frame_rate=30, match_thresh=0.9, mot20=False):
			
 
				+        self.tracked_stracks = []  # type: list[STrack]
			
 
				+        self.lost_stracks = []  # type: list[STrack]
			
 
				+        self.removed_stracks = []  # type: list[STrack]
			
 
				+
			
 
				+        self.frame_id = 0
			
 
				+        self.track_thresh = track_thresh
			
 
				+        self.track_buffer = track_buffer
			
 
				+        self.det_thresh = track_thresh + 0.1
			
 
				+        self.match_thresh = match_thresh
			
 
				+
			
 
				+        self.buffer_size = int(frame_rate / 30.0 * track_buffer)
			
 
				+        self.max_time_lost = self.buffer_size
			
 
				+        self.kalman_filter = KalmanFilter()
			
 
				+
			
 
				+        self.mot20 = mot20
			
 
				+
			
 
				+
			
 
				+    def update(self, scores, bboxes, labels):
			
 
				+        self.frame_id += 1
			
 
				+        activated_starcks = []
			
 
				+        refind_stracks = []
			
 
				+        lost_stracks = []
			
 
				+        removed_stracks = []
			
 
				+
			
 
				+        # process outputs
			
 
				+        remain_inds = scores > self.track_thresh
			
 
				+        inds_low = scores > 0.1
			
 
				+        inds_high = scores < self.track_thresh
			
 
				+        inds_second = np.logical_and(inds_low, inds_high)
			
 
				+
			
 
				+        # high score detections
			
 
				+        dets = bboxes[remain_inds]
			
 
				+        scores_keep = scores[remain_inds]
			
 
				+
			
 
				+        # second detections
			
 
				+        dets_second = bboxes[inds_second]
			
 
				+        scores_second = scores[inds_second]
			
 
				+
			
 
				+        if len(dets) > 0:
			
 
				+            '''Detections'''
			
 
				+            detections = [STrack(STrack.xyxy_to_xywh(xyxy), s) for
			
 
				+                          (xyxy, s) in zip(dets, scores_keep)]
			
 
				+        else:
			
 
				+            detections = []
			
 
				+
			
 
				+        ''' Add newly detected tracklets to tracked_stracks'''
			
 
				+        unconfirmed = []
			
 
				+        tracked_stracks = []  # type: list[STrack]
			
 
				+        for track in self.tracked_stracks:
			
 
				+            if not track.is_activated:
			
 
				+                unconfirmed.append(track)
			
 
				+            else:
			
 
				+                tracked_stracks.append(track)
			
 
				+
			
 
				+        ''' Step 2: First association, with high score detection boxes'''
			
 
				+        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
			
 
				+        # Predict the current location with KF
			
 
				+        STrack.multi_predict(strack_pool)
			
 
				+        dists = iou_distance(strack_pool, detections)
			
 
				+        if not self.mot20:
			
 
				+            dists = fuse_score(dists, detections)
			
 
				+        matches, u_track, u_detection = linear_assignment(dists, thresh=self.match_thresh)
			
 
				+
			
 
				+        for itracked, idet in matches:
			
 
				+            track = strack_pool[itracked]
			
 
				+            det = detections[idet]
			
 
				+            if track.state == TrackState.Tracked:
			
 
				+                track.update(detections[idet], self.frame_id)
			
 
				+                activated_starcks.append(track)
			
 
				+            else:
			
 
				+                track.re_activate(det, self.frame_id, new_id=False)
			
 
				+                refind_stracks.append(track)
			
 
				+
			
 
				+        ''' Step 3: Second association, with low score detection boxes'''
			
 
				+        # association the untrack to the low score detections
			
 
				+        if len(dets_second) > 0:
			
 
				+            '''Detections'''
			
 
				+            detections_second = [STrack(STrack.xyxy_to_xywh(xyxy), s) for
			
 
				+                          (xyxy, s) in zip(dets_second, scores_second)]
			
 
				+        else:
			
 
				+            detections_second = []
			
 
				+        r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
			
 
				+        dists = iou_distance(r_tracked_stracks, detections_second)
			
 
				+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
			
 
				+        for itracked, idet in matches:
			
 
				+            track = r_tracked_stracks[itracked]
			
 
				+            det = detections_second[idet]
			
 
				+            if track.state == TrackState.Tracked:
			
 
				+                track.update(det, self.frame_id)
			
 
				+                activated_starcks.append(track)
			
 
				+            else:
			
 
				+                track.re_activate(det, self.frame_id, new_id=False)
			
 
				+                refind_stracks.append(track)
			
 
				+
			
 
				+        for it in u_track:
			
 
				+            track = r_tracked_stracks[it]
			
 
				+            if not track.state == TrackState.Lost:
			
 
				+                track.mark_lost()
			
 
				+                lost_stracks.append(track)
			
 
				+
			
 
				+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
			
 
				+        detections = [detections[i] for i in u_detection]
			
 
				+        dists = iou_distance(unconfirmed, detections)
			
 
				+        if not self.mot20:
			
 
				+            dists = fuse_score(dists, detections)
			
 
				+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
			
 
				+        for itracked, idet in matches:
			
 
				+            unconfirmed[itracked].update(detections[idet], self.frame_id)
			
 
				+            activated_starcks.append(unconfirmed[itracked])
			
 
				+        for it in u_unconfirmed:
			
 
				+            track = unconfirmed[it]
			
 
				+            track.mark_removed()
			
 
				+            removed_stracks.append(track)
			
 
				+
			
 
				+        """ Step 4: Init new stracks"""
			
 
				+        for inew in u_detection:
			
 
				+            track = detections[inew]
			
 
				+            if track.score < self.det_thresh:
			
 
				+                continue
			
 
				+            track.activate(self.kalman_filter, self.frame_id)
			
 
				+            activated_starcks.append(track)
			
 
				+
			
 
				+        """ Step 5: Update state"""
			
 
				+        for track in self.lost_stracks:
			
 
				+            if self.frame_id - track.end_frame > self.max_time_lost:
			
 
				+                track.mark_removed()
			
 
				+                removed_stracks.append(track)
			
 
				+
			
 
				+        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
			
 
				+        self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
			
 
				+        self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
			
 
				+        self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
			
 
				+        self.lost_stracks.extend(lost_stracks)
			
 
				+        self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
			
 
				+        self.removed_stracks.extend(removed_stracks)
			
 
				+        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
			
 
				+            self.tracked_stracks, self.lost_stracks)
			
 
				+        # get scores of lost tracks
			
 
				+        output_stracks = [track for track in self.tracked_stracks if track.is_activated]
			
 
				+
			
 
				+        return output_stracks
			
 
				+
			
 
				+
			
 
				+def joint_stracks(tlista, tlistb):
			
 
				+    exists = {}
			
 
				+    res = []
			
 
				+    for t in tlista:
			
 
				+        exists[t.track_id] = 1
			
 
				+        res.append(t)
			
 
				+    for t in tlistb:
			
 
				+        tid = t.track_id
			
 
				+        if not exists.get(tid, 0):
			
 
				+            exists[tid] = 1
			
 
				+            res.append(t)
			
 
				+    return res
			
 
				+
			
 
				+
			
 
				+def sub_stracks(tlista, tlistb):
			
 
				+    stracks = {}
			
 
				+    for t in tlista:
			
 
				+        stracks[t.track_id] = t
			
 
				+    for t in tlistb:
			
 
				+        tid = t.track_id
			
 
				+        if stracks.get(tid, 0):
			
 
				+            del stracks[tid]
			
 
				+    return list(stracks.values())
			
 
				+
			
 
				+
			
 
				+def remove_duplicate_stracks(stracksa, stracksb):
			
 
				+    pdist = iou_distance(stracksa, stracksb)
			
 
				+    pairs = np.where(pdist < 0.15)
			
 
				+    dupa, dupb = list(), list()
			
 
				+    for p, q in zip(*pairs):
			
 
				+        timep = stracksa[p].frame_id - stracksa[p].start_frame
			
 
				+        timeq = stracksb[q].frame_id - stracksb[q].start_frame
			
 
				+        if timep > timeq:
			
 
				+            dupb.append(q)
			
 
				+        else:
			
 
				+            dupa.append(p)
			
 
				+    resa = [t for i, t in enumerate(stracksa) if not i in dupa]
			
 
				+    resb = [t for i, t in enumerate(stracksb) if not i in dupb]
			
 
				+    return resa, resb
			
--- a/models/tracker/byte_tracker/kalman_filter.py
+++ b/models/tracker/byte_tracker/kalman_filter.py
@@ -0,0 +1,278 @@
 
				+# vim: expandtab:ts=4:sw=4
			
 
				+import numpy as np
			
 
				+import scipy.linalg
			
 
				+
			
 
				+
			
 
				+"""
			
 
				+Table for the 0.95 quantile of the chi-square distribution with N degrees of
			
 
				+freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
			
 
				+function and used as Mahalanobis gating threshold.
			
 
				+"""
			
 
				+chi2inv95 = {
			
 
				+    1: 3.8415,
			
 
				+    2: 5.9915,
			
 
				+    3: 7.8147,
			
 
				+    4: 9.4877,
			
 
				+    5: 11.070,
			
 
				+    6: 12.592,
			
 
				+    7: 14.067,
			
 
				+    8: 15.507,
			
 
				+    9: 16.919}
			
 
				+
			
 
				+
			
 
				+class KalmanFilter(object):
			
 
				+    """
			
 
				+    A simple Kalman filter for tracking bounding boxes in image space.
			
 
				+
			
 
				+    The 8-dimensional state space
			
 
				+
			
 
				+        x, y, a, h, vx, vy, va, vh
			
 
				+
			
 
				+    contains the bounding box center position (x, y), aspect ratio a, height h,
			
 
				+    and their respective velocities.
			
 
				+
			
 
				+    Object motion follows a constant velocity model. The bounding box location
			
 
				+    (x, y, a, h) is taken as direct observation of the state space (linear
			
 
				+    observation model).
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        ndim, dt = 4, 1.
			
 
				+
			
 
				+        # Create Kalman filter model matrices.
			
 
				+        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
			
 
				+        for i in range(ndim):
			
 
				+            self._motion_mat[i, ndim + i] = dt
			
 
				+        self._update_mat = np.eye(ndim, 2 * ndim)
			
 
				+
			
 
				+        # Motion and observation uncertainty are chosen relative to the current
			
 
				+        # state estimate. These weights control the amount of uncertainty in
			
 
				+        # the model. This is a bit hacky.
			
 
				+        self._std_weight_position = 1. / 20
			
 
				+        self._std_weight_velocity = 1. / 160
			
 
				+
			
 
				+
			
 
				+    def initiate(self, measurement):
			
 
				+        """Create track from unassociated measurement.
			
 
				+
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        measurement : ndarray
			
 
				+            Bounding box coordinates (x, y, a, h) with center position (x, y),
			
 
				+            aspect ratio a, and height h.
			
 
				+
			
 
				+        Returns
			
 
				+        -------
			
 
				+        (ndarray, ndarray)
			
 
				+            Returns the mean vector (8 dimensional) and covariance matrix (8x8
			
 
				+            dimensional) of the new track. Unobserved velocities are initialized
			
 
				+            to 0 mean.
			
 
				+
			
 
				+        """
			
 
				+        mean_pos = measurement
			
 
				+        mean_vel = np.zeros_like(mean_pos)
			
 
				+        mean = np.r_[mean_pos, mean_vel]
			
 
				+
			
 
				+        std = [
			
 
				+            2 * self._std_weight_position * measurement[3],
			
 
				+            2 * self._std_weight_position * measurement[3],
			
 
				+            1e-2,
			
 
				+            2 * self._std_weight_position * measurement[3],
			
 
				+            10 * self._std_weight_velocity * measurement[3],
			
 
				+            10 * self._std_weight_velocity * measurement[3],
			
 
				+            1e-5,
			
 
				+            10 * self._std_weight_velocity * measurement[3]]
			
 
				+        covariance = np.diag(np.square(std))
			
 
				+
			
 
				+        return mean, covariance
			
 
				+
			
 
				+
			
 
				+    def predict(self, mean, covariance):
			
 
				+        """Run Kalman filter prediction step.
			
 
				+
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        mean : ndarray
			
 
				+            The 8 dimensional mean vector of the object state at the previous
			
 
				+            time step.
			
 
				+        covariance : ndarray
			
 
				+            The 8x8 dimensional covariance matrix of the object state at the
			
 
				+            previous time step.
			
 
				+
			
 
				+        Returns
			
 
				+        -------
			
 
				+        (ndarray, ndarray)
			
 
				+            Returns the mean vector and covariance matrix of the predicted
			
 
				+            state. Unobserved velocities are initialized to 0 mean.
			
 
				+
			
 
				+        """
			
 
				+        std_pos = [
			
 
				+            self._std_weight_position * mean[3],
			
 
				+            self._std_weight_position * mean[3],
			
 
				+            1e-2,
			
 
				+            self._std_weight_position * mean[3]]
			
 
				+        std_vel = [
			
 
				+            self._std_weight_velocity * mean[3],
			
 
				+            self._std_weight_velocity * mean[3],
			
 
				+            1e-5,
			
 
				+            self._std_weight_velocity * mean[3]]
			
 
				+        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
			
 
				+
			
 
				+        #mean = np.dot(self._motion_mat, mean)
			
 
				+        mean = np.dot(mean, self._motion_mat.T)
			
 
				+        covariance = np.linalg.multi_dot((
			
 
				+            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
			
 
				+
			
 
				+        return mean, covariance
			
 
				+
			
 
				+
			
 
				+    def project(self, mean, covariance):
			
 
				+        """Project state distribution to measurement space.
			
 
				+
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        mean : ndarray
			
 
				+            The state's mean vector (8 dimensional array).
			
 
				+        covariance : ndarray
			
 
				+            The state's covariance matrix (8x8 dimensional).
			
 
				+
			
 
				+        Returns
			
 
				+        -------
			
 
				+        (ndarray, ndarray)
			
 
				+            Returns the projected mean and covariance matrix of the given state
			
 
				+            estimate.
			
 
				+
			
 
				+        """
			
 
				+        std = [
			
 
				+            self._std_weight_position * mean[3],
			
 
				+            self._std_weight_position * mean[3],
			
 
				+            1e-1,
			
 
				+            self._std_weight_position * mean[3]]
			
 
				+        innovation_cov = np.diag(np.square(std))
			
 
				+
			
 
				+        mean = np.dot(self._update_mat, mean)
			
 
				+        covariance = np.linalg.multi_dot((
			
 
				+            self._update_mat, covariance, self._update_mat.T))
			
 
				+        return mean, covariance + innovation_cov
			
 
				+
			
 
				+
			
 
				+    def multi_predict(self, mean, covariance):
			
 
				+        """Run Kalman filter prediction step (Vectorized version).
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        mean : ndarray
			
 
				+            The Nx8 dimensional mean matrix of the object states at the previous
			
 
				+            time step.
			
 
				+        covariance : ndarray
			
 
				+            The Nx8x8 dimensional covariance matrics of the object states at the
			
 
				+            previous time step.
			
 
				+        Returns
			
 
				+        -------
			
 
				+        (ndarray, ndarray)
			
 
				+            Returns the mean vector and covariance matrix of the predicted
			
 
				+            state. Unobserved velocities are initialized to 0 mean.
			
 
				+        """
			
 
				+        std_pos = [
			
 
				+            self._std_weight_position * mean[:, 3],
			
 
				+            self._std_weight_position * mean[:, 3],
			
 
				+            1e-2 * np.ones_like(mean[:, 3]),
			
 
				+            self._std_weight_position * mean[:, 3]]
			
 
				+        std_vel = [
			
 
				+            self._std_weight_velocity * mean[:, 3],
			
 
				+            self._std_weight_velocity * mean[:, 3],
			
 
				+            1e-5 * np.ones_like(mean[:, 3]),
			
 
				+            self._std_weight_velocity * mean[:, 3]]
			
 
				+        sqr = np.square(np.r_[std_pos, std_vel]).T
			
 
				+
			
 
				+        motion_cov = []
			
 
				+        for i in range(len(mean)):
			
 
				+            motion_cov.append(np.diag(sqr[i]))
			
 
				+        motion_cov = np.asarray(motion_cov)
			
 
				+
			
 
				+        mean = np.dot(mean, self._motion_mat.T)
			
 
				+        left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
			
 
				+        covariance = np.dot(left, self._motion_mat.T) + motion_cov
			
 
				+
			
 
				+        return mean, covariance
			
 
				+
			
 
				+
			
 
				+    def update(self, mean, covariance, measurement):
			
 
				+        """Run Kalman filter correction step.
			
 
				+
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        mean : ndarray
			
 
				+            The predicted state's mean vector (8 dimensional).
			
 
				+        covariance : ndarray
			
 
				+            The state's covariance matrix (8x8 dimensional).
			
 
				+        measurement : ndarray
			
 
				+            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
			
 
				+            is the center position, a the aspect ratio, and h the height of the
			
 
				+            bounding box.
			
 
				+
			
 
				+        Returns
			
 
				+        -------
			
 
				+        (ndarray, ndarray)
			
 
				+            Returns the measurement-corrected state distribution.
			
 
				+
			
 
				+        """
			
 
				+        projected_mean, projected_cov = self.project(mean, covariance)
			
 
				+
			
 
				+        chol_factor, lower = scipy.linalg.cho_factor(
			
 
				+            projected_cov, lower=True, check_finite=False)
			
 
				+        kalman_gain = scipy.linalg.cho_solve(
			
 
				+            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
			
 
				+            check_finite=False).T
			
 
				+        innovation = measurement - projected_mean
			
 
				+
			
 
				+        new_mean = mean + np.dot(innovation, kalman_gain.T)
			
 
				+        new_covariance = covariance - np.linalg.multi_dot((
			
 
				+            kalman_gain, projected_cov, kalman_gain.T))
			
 
				+        return new_mean, new_covariance
			
 
				+
			
 
				+
			
 
				+    def gating_distance(self, mean, covariance, measurements,
			
 
				+                        only_position=False, metric='maha'):
			
 
				+        """Compute gating distance between state distribution and measurements.
			
 
				+        A suitable distance threshold can be obtained from `chi2inv95`. If
			
 
				+        `only_position` is False, the chi-square distribution has 4 degrees of
			
 
				+        freedom, otherwise 2.
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        mean : ndarray
			
 
				+            Mean vector over the state distribution (8 dimensional).
			
 
				+        covariance : ndarray
			
 
				+            Covariance of the state distribution (8x8 dimensional).
			
 
				+        measurements : ndarray
			
 
				+            An Nx4 dimensional matrix of N measurements, each in
			
 
				+            format (x, y, a, h) where (x, y) is the bounding box center
			
 
				+            position, a the aspect ratio, and h the height.
			
 
				+        only_position : Optional[bool]
			
 
				+            If True, distance computation is done with respect to the bounding
			
 
				+            box center position only.
			
 
				+        Returns
			
 
				+        -------
			
 
				+        ndarray
			
 
				+            Returns an array of length N, where the i-th element contains the
			
 
				+            squared Mahalanobis distance between (mean, covariance) and
			
 
				+            `measurements[i]`.
			
 
				+        """
			
 
				+        mean, covariance = self.project(mean, covariance)
			
 
				+        if only_position:
			
 
				+            mean, covariance = mean[:2], covariance[:2, :2]
			
 
				+            measurements = measurements[:, :2]
			
 
				+
			
 
				+        d = measurements - mean
			
 
				+        if metric == 'gaussian':
			
 
				+            return np.sum(d * d, axis=1)
			
 
				+        elif metric == 'maha':
			
 
				+            cholesky_factor = np.linalg.cholesky(covariance)
			
 
				+            z = scipy.linalg.solve_triangular(
			
 
				+                cholesky_factor, d.T, lower=True, check_finite=False,
			
 
				+                overwrite_b=True)
			
 
				+            squared_maha = np.sum(z * z, axis=0)
			
 
				+            return squared_maha
			
 
				+        else:
			
 
				+            raise ValueError('invalid distance metric')
			
 
				+            
			
--- a/models/tracker/byte_tracker/matching.py
+++ b/models/tracker/byte_tracker/matching.py
@@ -0,0 +1,194 @@
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import scipy
			
 
				+import lap
			
 
				+from scipy.spatial.distance import cdist
			
 
				+
			
 
				+from .kalman_filter import chi2inv95
			
 
				+
			
 
				+import time
			
 
				+
			
 
				+def merge_matches(m1, m2, shape):
			
 
				+    O,P,Q = shape
			
 
				+    m1 = np.asarray(m1)
			
 
				+    m2 = np.asarray(m2)
			
 
				+
			
 
				+    M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
			
 
				+    M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
			
 
				+
			
 
				+    mask = M1*M2
			
 
				+    match = mask.nonzero()
			
 
				+    match = list(zip(match[0], match[1]))
			
 
				+    unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
			
 
				+    unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
			
 
				+
			
 
				+    return match, unmatched_O, unmatched_Q
			
 
				+
			
 
				+
			
 
				+def _indices_to_matches(cost_matrix, indices, thresh):
			
 
				+    matched_cost = cost_matrix[tuple(zip(*indices))]
			
 
				+    matched_mask = (matched_cost <= thresh)
			
 
				+
			
 
				+    matches = indices[matched_mask]
			
 
				+    unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
			
 
				+    unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
			
 
				+
			
 
				+    return matches, unmatched_a, unmatched_b
			
 
				+
			
 
				+
			
 
				+def linear_assignment(cost_matrix, thresh):
			
 
				+    if cost_matrix.size == 0:
			
 
				+        return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
			
 
				+    matches, unmatched_a, unmatched_b = [], [], []
			
 
				+    cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
			
 
				+    for ix, mx in enumerate(x):
			
 
				+        if mx >= 0:
			
 
				+            matches.append([ix, mx])
			
 
				+    unmatched_a = np.where(x < 0)[0]
			
 
				+    unmatched_b = np.where(y < 0)[0]
			
 
				+    matches = np.asarray(matches)
			
 
				+    return matches, unmatched_a, unmatched_b
			
 
				+
			
 
				+
			
 
				+def ious(axyxys, bxyxys):
			
 
				+    """
			
 
				+    Compute cost based on IoU
			
 
				+    :type axyxys: list[xyxy] | np.ndarray
			
 
				+    :type axyxys: list[xyxy] | np.ndarray
			
 
				+
			
 
				+    :rtype ious np.ndarray
			
 
				+    """
			
 
				+    ious = np.zeros((len(axyxys), len(bxyxys)), dtype=np.float)
			
 
				+    if ious.size == 0:
			
 
				+        return ious
			
 
				+
			
 
				+    axyxys = np.ascontiguousarray(axyxys, dtype=np.float)
			
 
				+    bxyxys = np.ascontiguousarray(bxyxys, dtype=np.float)
			
 
				+    
			
 
				+    area1 = (axyxys[:, 2] - axyxys[:, 0]) * (axyxys[:, 3] - axyxys[:, 1])
			
 
				+    area2 = (bxyxys[:, 2] - bxyxys[:, 0]) * (bxyxys[:, 3] - bxyxys[:, 1])
			
 
				+
			
 
				+    lt = np.maximum(axyxys[:, None, :2], bxyxys[:, :2])  # [N,M,2]
			
 
				+    rb = np.minimum(axyxys[:, None, 2:], bxyxys[:, 2:])  # [N,M,2]
			
 
				+
			
 
				+    wh = np.clip(rb - lt, a_min=0, a_max=1e4)     # [N,M,2]
			
 
				+    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
			
 
				+
			
 
				+    union = area1[:, None] + area2 - inter
			
 
				+
			
 
				+    iou = inter / union
			
 
				+
			
 
				+    return iou
			
 
				+
			
 
				+
			
 
				+def iou_distance(atracks, btracks):
			
 
				+    """
			
 
				+    Compute cost based on IoU
			
 
				+    :type atracks: list[STrack]
			
 
				+    :type btracks: list[STrack]
			
 
				+
			
 
				+    :rtype cost_matrix np.ndarray
			
 
				+    """
			
 
				+
			
 
				+    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
			
 
				+        axyxys = atracks
			
 
				+        bxyxys = btracks
			
 
				+    else:
			
 
				+        axyxys = [track.xyxy for track in atracks]
			
 
				+        bxyxys = [track.xyxy for track in btracks]
			
 
				+    _ious = ious(axyxys, bxyxys)
			
 
				+    cost_matrix = 1 - _ious
			
 
				+
			
 
				+    return cost_matrix
			
 
				+
			
 
				+
			
 
				+def v_iou_distance(atracks, btracks):
			
 
				+    """
			
 
				+    Compute cost based on IoU
			
 
				+    :type atracks: list[STrack]
			
 
				+    :type btracks: list[STrack]
			
 
				+
			
 
				+    :rtype cost_matrix np.ndarray
			
 
				+    """
			
 
				+
			
 
				+    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
			
 
				+        axyxys = atracks
			
 
				+        bxyxys = btracks
			
 
				+    else:
			
 
				+        axyxys = [track.xywh_to_xyxy(track.pred_bbox) for track in atracks]
			
 
				+        bxyxys = [track.xywh_to_xyxy(track.pred_bbox) for track in btracks]
			
 
				+    _ious = ious(axyxys, bxyxys)
			
 
				+    cost_matrix = 1 - _ious
			
 
				+
			
 
				+    return cost_matrix
			
 
				+
			
 
				+
			
 
				+def embedding_distance(tracks, detections, metric='cosine'):
			
 
				+    """
			
 
				+    :param tracks: list[STrack]
			
 
				+    :param detections: list[BaseTrack]
			
 
				+    :param metric:
			
 
				+    :return: cost_matrix np.ndarray
			
 
				+    """
			
 
				+
			
 
				+    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
			
 
				+    if cost_matrix.size == 0:
			
 
				+        return cost_matrix
			
 
				+    det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
			
 
				+    #for i, track in enumerate(tracks):
			
 
				+        #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
			
 
				+    track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
			
 
				+    cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # Nomalized features
			
 
				+    return cost_matrix
			
 
				+
			
 
				+
			
 
				+def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
			
 
				+    if cost_matrix.size == 0:
			
 
				+        return cost_matrix
			
 
				+    gating_dim = 2 if only_position else 4
			
 
				+    gating_threshold = chi2inv95[gating_dim]
			
 
				+    measurements = np.asarray([det.to_xyah() for det in detections])
			
 
				+    for row, track in enumerate(tracks):
			
 
				+        gating_distance = kf.gating_distance(
			
 
				+            track.mean, track.covariance, measurements, only_position)
			
 
				+        cost_matrix[row, gating_distance > gating_threshold] = np.inf
			
 
				+    return cost_matrix
			
 
				+
			
 
				+
			
 
				+def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
			
 
				+    if cost_matrix.size == 0:
			
 
				+        return cost_matrix
			
 
				+    gating_dim = 2 if only_position else 4
			
 
				+    gating_threshold = chi2inv95[gating_dim]
			
 
				+    measurements = np.asarray([det.to_xyah() for det in detections])
			
 
				+    for row, track in enumerate(tracks):
			
 
				+        gating_distance = kf.gating_distance(
			
 
				+            track.mean, track.covariance, measurements, only_position, metric='maha')
			
 
				+        cost_matrix[row, gating_distance > gating_threshold] = np.inf
			
 
				+        cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
			
 
				+    return cost_matrix
			
 
				+
			
 
				+
			
 
				+def fuse_iou(cost_matrix, tracks, detections):
			
 
				+    if cost_matrix.size == 0:
			
 
				+        return cost_matrix
			
 
				+    reid_sim = 1 - cost_matrix
			
 
				+    iou_dist = iou_distance(tracks, detections)
			
 
				+    iou_sim = 1 - iou_dist
			
 
				+    fuse_sim = reid_sim * (1 + iou_sim) / 2
			
 
				+    det_scores = np.array([det.score for det in detections])
			
 
				+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
			
 
				+    #fuse_sim = fuse_sim * (1 + det_scores) / 2
			
 
				+    fuse_cost = 1 - fuse_sim
			
 
				+    return fuse_cost
			
 
				+
			
 
				+
			
 
				+def fuse_score(cost_matrix, detections):
			
 
				+    if cost_matrix.size == 0:
			
 
				+        return cost_matrix
			
 
				+    iou_sim = 1 - cost_matrix
			
 
				+    det_scores = np.array([det.score for det in detections])
			
 
				+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
			
 
				+    fuse_sim = iou_sim * det_scores
			
 
				+    fuse_cost = 1 - fuse_sim
			
 
				+    return fuse_cost
			
--- a/test.py
+++ b/test.py
@@ -14,7 +14,7 @@ from utils.misc import build_dataset, load_weight
 
				 from utils.misc import compute_flops
			
 
				 from utils.box_ops import rescale_bboxes
			
 
				 
			
 
				-from models import build_model
			
 
				+from models.detectors import build_model
			
 
				 from config import build_model_config, build_trans_config
			
 
				 
			
 
				 
			
--- a/tools/export_onnx.py
+++ b/tools/export_onnx.py
@@ -15,7 +15,7 @@ from torch import nn
 
				 from utils.misc import SiLU
			
 
				 from utils.misc import load_weight, replace_module
			
 
				 from config import build_config
			
 
				-from models import build_model
			
 
				+from models.detectors import build_model
			
 
				 
			
 
				 
			
 
				 def make_parser():
			
--- a/track.py
+++ b/track.py
@@ -0,0 +1,356 @@
 
				+import os
			
 
				+import cv2
			
 
				+import time
			
 
				+import argparse
			
 
				+import numpy as np
			
 
				+import torch
			
 
				+
			
 
				+from dataset.data_augment import build_transform
			
 
				+from utils.vis_tools import plot_tracking
			
 
				+from utils.misc import load_weight
			
 
				+from utils.box_ops import rescale_bboxes
			
 
				+
			
 
				+from config import build_model_config, build_trans_config
			
 
				+
			
 
				+from models.detectors import build_model
			
 
				+from models.tracker import build_tracker
			
 
				+
			
 
				+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
			
 
				+IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]
			
 
				+
			
 
				+
			
 
				+def parse_args():
			
 
				+    parser = argparse.ArgumentParser(description='Tracking Task')
			
 
				+
			
 
				+    # basic
			
 
				+    parser.add_argument('-size', '--img_size', default=640, type=int,
			
 
				+                        help='the max size of input image')
			
 
				+    parser.add_argument('--cuda', action='store_true', default=False, 
			
 
				+                        help='use cuda.')
			
 
				+
			
 
				+    # data
			
 
				+    parser.add_argument('--mode', type=str, default='image',
			
 
				+                        help='image, video or camera')
			
 
				+    parser.add_argument('--path_to_img', type=str, default='dataset/demo/images/',
			
 
				+                        help='Dir to load images')
			
 
				+    parser.add_argument('--path_to_vid', type=str, default='dataset/demo/videos/',
			
 
				+                        help='Dir to load a video')
			
 
				+    parser.add_argument('--path_to_save', default='det_results/', type=str,
			
 
				+                        help='Dir to save results')
			
 
				+    parser.add_argument('--fps', type=int, default=30,
			
 
				+                        help='frame rate')
			
 
				+    parser.add_argument('--show', action='store_true', default=False, 
			
 
				+                        help='show results.')
			
 
				+    parser.add_argument('--save', action='store_true', default=False, 
			
 
				+                        help='save results.')
			
 
				+
			
 
				+    # tracker
			
 
				+    parser.add_argument('-tk', '--tracker', default='byte_tracker', type=str,
			
 
				+                        help='build FreeTrack')
			
 
				+    parser.add_argument("--track_thresh", type=float, default=0.5, 
			
 
				+                        help="tracking confidence threshold")
			
 
				+    parser.add_argument("--track_buffer", type=int, default=30, 
			
 
				+                        help="the frames for keep lost tracks")
			
 
				+    parser.add_argument("--match_thresh", type=float, default=0.8, 
			
 
				+                        help="matching threshold for tracking")
			
 
				+    parser.add_argument("--aspect_ratio_thresh", type=float, default=1.6,
			
 
				+                        help="threshold for filtering out boxes of which \
			
 
				+                              aspect ratio are above the given value.")
			
 
				+    parser.add_argument('--min_box_area', type=float, default=10,
			
 
				+                        help='filter out tiny boxes')
			
 
				+    parser.add_argument("--mot20", default=False, action="store_true",
			
 
				+                        help="test mot20.")
			
 
				+
			
 
				+    # detector
			
 
				+    parser.add_argument('-dt', '--model', default='yolov1', type=str,
			
 
				+                        help='build YOLO')
			
 
				+    parser.add_argument('-ns', '--num_classes', type=int, default=80,
			
 
				+                        help='number of object classes.')
			
 
				+    parser.add_argument('--weight', default=None,
			
 
				+                        type=str, help='Trained state_dict file path to open')
			
 
				+    parser.add_argument('-ct', '--conf_thresh', default=0.3, type=float,
			
 
				+                        help='confidence threshold')
			
 
				+    parser.add_argument('-nt', '--nms_thresh', default=0.5, type=float,
			
 
				+                        help='NMS threshold')
			
 
				+    parser.add_argument('--topk', default=100, type=int,
			
 
				+                        help='topk candidates for testing')
			
 
				+    parser.add_argument('-fcb', '--fuse_conv_bn', action='store_true', default=False,
			
 
				+                        help='fuse Conv & BN')
			
 
				+
			
 
				+    return parser.parse_args()
			
 
				+
			
 
				+
			
 
				+def get_image_list(path):
			
 
				+    image_names = []
			
 
				+    for maindir, subdir, file_name_list in os.walk(path):
			
 
				+        for filename in file_name_list:
			
 
				+            apath = os.path.join(maindir, filename)
			
 
				+            ext = os.path.splitext(apath)[1]
			
 
				+            if ext in IMAGE_EXT:
			
 
				+                image_names.append(apath)
			
 
				+    return image_names
			
 
				+
			
 
				+
			
 
				+def run(args,
			
 
				+        tracker,
			
 
				+        detector,
			
 
				+        device, 
			
 
				+        transform):
			
 
				+    save_path = os.path.join(args.path_to_save, args.mode)
			
 
				+    os.makedirs(save_path, exist_ok=True)
			
 
				+
			
 
				+    # ------------------------- Camera ----------------------------
			
 
				+    if args.mode == 'camera':
			
 
				+        print('use camera !!!')
			
 
				+        cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
			
 
				+        frame_id = 0
			
 
				+        results = []
			
 
				+        # start tracking
			
 
				+        while True:
			
 
				+            ret, frame = cap.read()
			
 
				+            if ret:
			
 
				+                if cv2.waitKey(1) == ord('q'):
			
 
				+                    break
			
 
				+                # ------------------------- Detection ---------------------------
			
 
				+                # preprocess
			
 
				+                x, _, deltas = transform(frame)
			
 
				+                x = x.unsqueeze(0).to(device) / 255.
			
 
				+                orig_h, orig_w, _ = frame.shape
			
 
				+
			
 
				+                # detect
			
 
				+                t0 = time.time()
			
 
				+                bboxes, scores, labels = detector(x)
			
 
				+                print("=============== Frame-{} ================".format(frame_id))
			
 
				+                print("detect time: {:.1f} ms".format((time.time() - t0)*1000))
			
 
				+
			
 
				+                # rescale bboxes
			
 
				+                origin_img_size = [orig_h, orig_w]
			
 
				+                cur_img_size = [*x.shape[-2:]]
			
 
				+                bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
			
 
				+
			
 
				+                # track
			
 
				+                t2 = time.time()
			
 
				+                if len(bboxes) > 0:
			
 
				+                    online_targets = tracker.update(scores, bboxes, labels)
			
 
				+                    online_xywhs = []
			
 
				+                    online_ids = []
			
 
				+                    online_scores = []
			
 
				+                    for t in online_targets:
			
 
				+                        xywh = t.xywh
			
 
				+                        tid = t.track_id
			
 
				+                        vertical = xywh[2] / xywh[3] > args.aspect_ratio_thresh
			
 
				+                        if xywh[2] * xywh[3] > args.min_box_area and not vertical:
			
 
				+                            online_xywhs.append(xywh)
			
 
				+                            online_ids.append(tid)
			
 
				+                            online_scores.append(t.score)
			
 
				+                            results.append(
			
 
				+                                f"{frame_id},{tid},{xywh[0]:.2f},{xywh[1]:.2f},{xywh[2]:.2f},{xywh[3]:.2f},{t.score:.2f},-1,-1,-1\n"
			
 
				+                                )
			
 
				+                    print("tracking time: {:.1f} ms".format((time.time() - t2)*1000))
			
 
				+                    
			
 
				+                    # plot tracking results
			
 
				+                    online_im = plot_tracking(
			
 
				+                        frame, online_xywhs, online_ids, frame_id=frame_id + 1, fps=1. / (time.time() - t0)
			
 
				+                    )
			
 
				+                else:
			
 
				+                    online_im = frame
			
 
				+
			
 
				+                # show results
			
 
				+                if args.show:
			
 
				+                    cv2.imshow('tracking', online_im)
			
 
				+                    ch = cv2.waitKey(1)
			
 
				+                    if ch == 27 or ch == ord("q") or ch == ord("Q"):
			
 
				+                        break
			
 
				+
			
 
				+            else:
			
 
				+                break
			
 
				+            frame_id += 1
			
 
				+
			
 
				+        cap.release()
			
 
				+        cv2.destroyAllWindows()
			
 
				+
			
 
				+    # ------------------------- Image ----------------------------
			
 
				+    elif args.mode == 'image':
			
 
				+        files = get_image_list(args.path_to_img)
			
 
				+        files.sort()
			
 
				+        # start tracking
			
 
				+        frame_id = 0
			
 
				+        results = []
			
 
				+        for frame_id, img_path in enumerate(files, 1):
			
 
				+            image = cv2.imread(os.path.join(img_path))
			
 
				+            # preprocess
			
 
				+            x, _, deltas = transform(image)
			
 
				+            x = x.unsqueeze(0).to(device) / 255.
			
 
				+            orig_h, orig_w, _ = image.shape
			
 
				+
			
 
				+            # detect
			
 
				+            t0 = time.time()
			
 
				+            bboxes, scores, labels = detector(x)
			
 
				+            print("=============== Frame-{} ================".format(frame_id))
			
 
				+            print("detect time: {:.1f} ms".format((time.time() - t0)*1000))
			
 
				+
			
 
				+            # rescale bboxes
			
 
				+            origin_img_size = [orig_h, orig_w]
			
 
				+            cur_img_size = [*x.shape[-2:]]
			
 
				+            bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
			
 
				+
			
 
				+            # track
			
 
				+            t2 = time.time()
			
 
				+            if len(bboxes) > 0:
			
 
				+                online_targets = tracker.update(scores, bboxes, labels)
			
 
				+                online_xywhs = []
			
 
				+                online_ids = []
			
 
				+                online_scores = []
			
 
				+                for t in online_targets:
			
 
				+                    xywh = t.xywh
			
 
				+                    tid = t.track_id
			
 
				+                    vertical = xywh[2] / xywh[3] > args.aspect_ratio_thresh
			
 
				+                    if xywh[2] * xywh[3] > args.min_box_area and not vertical:
			
 
				+                        online_xywhs.append(xywh)
			
 
				+                        online_ids.append(tid)
			
 
				+                        online_scores.append(t.score)
			
 
				+                        results.append(
			
 
				+                            f"{frame_id},{tid},{xywh[0]:.2f},{xywh[1]:.2f},{xywh[2]:.2f},{xywh[3]:.2f},{t.score:.2f},-1,-1,-1\n"
			
 
				+                            )
			
 
				+                print("tracking time: {:.1f} ms".format((time.time() - t2)*1000))
			
 
				+                
			
 
				+                # plot tracking results
			
 
				+                online_im = plot_tracking(
			
 
				+                    image, online_xywhs, online_ids, frame_id=frame_id + 1, fps=1. / (time.time() - t0)
			
 
				+                )
			
 
				+            else:
			
 
				+                online_im = image
			
 
				+
			
 
				+            # save results
			
 
				+            if args.save:
			
 
				+                vid_writer.write(online_im)
			
 
				+            # show results
			
 
				+            if args.show:
			
 
				+                cv2.imshow('tracking', online_im)
			
 
				+                ch = cv2.waitKey(1)
			
 
				+                if ch == 27 or ch == ord("q") or ch == ord("Q"):
			
 
				+                    break
			
 
				+
			
 
				+            frame_id += 1
			
 
				+
			
 
				+        cv2.destroyAllWindows()
			
 
				+            
			
 
				+    # ------------------------- Video ---------------------------
			
 
				+    elif args.mode == 'video':
			
 
				+        # read a video
			
 
				+        video = cv2.VideoCapture(args.path_to_vid)
			
 
				+        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
			
 
				+        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
			
 
				+        fps = cap.get(cv2.CAP_PROP_FPS)
			
 
				+        
			
 
				+        # path to save
			
 
				+        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S',time.localtime(time.time()))
			
 
				+        save_path = os.path.join(save_path, timestamp, args.path.split("/")[-1])
			
 
				+        vid_writer = cv2.VideoWriter(
			
 
				+            save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
			
 
				+        )
			
 
				+        print("Save path: {}".format(save_path))
			
 
				+
			
 
				+        # start tracking
			
 
				+        frame_id = 0
			
 
				+        results = []
			
 
				+        while(True):
			
 
				+            ret, frame = video.read()
			
 
				+            
			
 
				+            if ret:
			
 
				+                # ------------------------- Detection ---------------------------
			
 
				+                # preprocess
			
 
				+                x, _, deltas = transform(frame)
			
 
				+                x = x.unsqueeze(0).to(device) / 255.
			
 
				+                orig_h, orig_w, _ = frame.shape
			
 
				+
			
 
				+                # detect
			
 
				+                t0 = time.time()
			
 
				+                bboxes, scores, labels = detector(x)
			
 
				+                print("=============== Frame-{} ================".format(frame_id))
			
 
				+                print("detect time: {:.1f} ms".format((time.time() - t0)*1000))
			
 
				+
			
 
				+                # rescale bboxes
			
 
				+                origin_img_size = [orig_h, orig_w]
			
 
				+                cur_img_size = [*x.shape[-2:]]
			
 
				+                bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
			
 
				+
			
 
				+                # track
			
 
				+                t2 = time.time()
			
 
				+                if len(bboxes) > 0:
			
 
				+                    online_targets = tracker.update(scores, bboxes, labels)
			
 
				+                    online_xywhs = []
			
 
				+                    online_ids = []
			
 
				+                    online_scores = []
			
 
				+                    for t in online_targets:
			
 
				+                        xywh = t.xywh
			
 
				+                        tid = t.track_id
			
 
				+                        vertical = xywh[2] / xywh[3] > args.aspect_ratio_thresh
			
 
				+                        if xywh[2] * xywh[3] > args.min_box_area and not vertical:
			
 
				+                            online_xywhs.append(xywh)
			
 
				+                            online_ids.append(tid)
			
 
				+                            online_scores.append(t.score)
			
 
				+                            results.append(
			
 
				+                                f"{frame_id},{tid},{xywh[0]:.2f},{xywh[1]:.2f},{xywh[2]:.2f},{xywh[3]:.2f},{t.score:.2f},-1,-1,-1\n"
			
 
				+                                )
			
 
				+                    print("tracking time: {:.1f} ms".format((time.time() - t2)*1000))
			
 
				+                    
			
 
				+                    # plot tracking results
			
 
				+                    online_im = plot_tracking(
			
 
				+                        frame, online_xywhs, online_ids, frame_id=frame_id + 1, fps=1. / (time.time() - t0)
			
 
				+                    )
			
 
				+                else:
			
 
				+                    online_im = frame
			
 
				+
			
 
				+                # save results
			
 
				+                if args.save:
			
 
				+                    vid_writer.write(online_im)
			
 
				+                # show results
			
 
				+                if args.show:
			
 
				+                    cv2.imshow('tracking', online_im)
			
 
				+                    ch = cv2.waitKey(1)
			
 
				+                    if ch == 27 or ch == ord("q") or ch == ord("Q"):
			
 
				+                        break
			
 
				+            else:
			
 
				+                break
			
 
				+            frame_id += 1
			
 
				+
			
 
				+        video.release()
			
 
				+        vid_writer.release()
			
 
				+        cv2.destroyAllWindows()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    args = parse_args()
			
 
				+    # cuda
			
 
				+    if args.cuda:
			
 
				+        print('use cuda')
			
 
				+        device = torch.device("cuda")
			
 
				+    else:
			
 
				+        device = torch.device("cpu")
			
 
				+
			
 
				+    np.random.seed(0)
			
 
				+
			
 
				+    # config
			
 
				+    model_cfg = build_model_config(args)
			
 
				+    trans_cfg = build_trans_config(model_cfg['trans_type'])
			
 
				+
			
 
				+    # transform
			
 
				+    transform = build_transform(args.img_size, trans_cfg, is_train=False)
			
 
				+
			
 
				+    # ---------------------- General Object Detector ----------------------
			
 
				+    detector = build_model(args, model_cfg, device, args.num_classes, False)
			
 
				+
			
 
				+    ## load trained weight
			
 
				+    detector = load_weight(detector, args.weight, args.fuse_conv_bn)
			
 
				+    detector.to(device).eval()
			
 
				+    
			
 
				+    # ---------------------- General Object Tracker ----------------------
			
 
				+    tracker = build_tracker(args)
			
 
				+
			
 
				+    # run
			
 
				+    run(args=args,
			
 
				+        tracker=tracker,
			
 
				+        detector=detector, 
			
 
				+        device=device,
			
 
				+        transform=transform)
			
--- a/train.py
+++ b/train.py
@@ -17,7 +17,7 @@ from utils.solver.lr_scheduler import build_lr_scheduler
 
				 from engine import train_one_epoch, val_one_epoch
			
 
				 
			
 
				 from config import build_model_config, build_trans_config
			
 
				-from models import build_model
			
 
				+from models.detectors import build_model
			
 
				 
			
 
				 
			
 
				 def parse_args():
			
--- a/utils/vis_tools.py
+++ b/utils/vis_tools.py
@@ -5,7 +5,8 @@ import matplotlib.pyplot as plt
 
				 from dataset.coco import coco_class_index, coco_class_labels
			
 
				 
			
 
				 
			
 
				-# draw bbox & label on the image
			
 
				+# -------------------------- For Detection Task --------------------------
			
 
				+## draw bbox & label on the image
			
 
				 def plot_bbox_labels(img, bbox, label, cls_color, test_scale=0.4):
			
 
				     x1, y1, x2, y2 = bbox
			
 
				     x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
			
@@ -19,8 +20,7 @@ def plot_bbox_labels(img, bbox, label, cls_color, test_scale=0.4):
 
				 
			
 
				     return img
			
 
				 
			
 
				-
			
 
				-# visualize the detection results
			
 
				+## visualize the detection results
			
 
				 def visualize(img, bboxes, scores, labels, class_colors, vis_thresh=0.3):
			
 
				     ts = 0.4
			
 
				     for i, bbox in enumerate(bboxes):
			
@@ -32,8 +32,7 @@ def visualize(img, bboxes, scores, labels, class_colors, vis_thresh=0.3):
 
				 
			
 
				     return img
			
 
				 
			
 
				-
			
 
				-# visualize the input data during the training stage
			
 
				+## visualize the input data during the training stage
			
 
				 def vis_data(images, targets):
			
 
				     """
			
 
				         images: (tensor) [B, 3, H, W]
			
@@ -66,8 +65,7 @@ def vis_data(images, targets):
 
				         cv2.imshow('train target', image)
			
 
				         cv2.waitKey(0)
			
 
				 
			
 
				-
			
 
				-# convert feature to he heatmap
			
 
				+## convert feature to he heatmap
			
 
				 def convert_feature_heatmap(feature):
			
 
				     """
			
 
				         feature: (ndarray) [H, W, C]
			
@@ -76,8 +74,7 @@ def convert_feature_heatmap(feature):
 
				 
			
 
				     return heatmap
			
 
				 
			
 
				-
			
 
				-# draw feature on the image
			
 
				+## draw feature on the image
			
 
				 def draw_feature(img, features, save=None):
			
 
				     """
			
 
				         img: (ndarray & cv2.Mat) [H, W, C], where the C is 3 for RGB or 1 for Gray.
			
@@ -107,3 +104,42 @@ def draw_feature(img, features, save=None):
 
				             save_dir = 'feature_heatmap'
			
 
				             os.makedirs(save_dir, exist_ok=True)
			
 
				             cv2.imwrite(os.path.join(save_dir, 'feature_{}.png'.format(i) ), superimposed_img)    
			
 
				+
			
 
				+
			
 
				+# -------------------------- For Tracking Task --------------------------
			
 
				+def get_color(idx):
			
 
				+    idx = idx * 3
			
 
				+    color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
			
 
				+
			
 
				+    return color
			
 
				+
			
 
				+
			
 
				+def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
			
 
				+    im = np.ascontiguousarray(np.copy(image))
			
 
				+    im_h, im_w = im.shape[:2]
			
 
				+
			
 
				+    top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
			
 
				+
			
 
				+    #text_scale = max(1, image.shape[1] / 1600.)
			
 
				+    #text_thickness = 2
			
 
				+    #line_thickness = max(1, int(image.shape[1] / 500.))
			
 
				+    text_scale = 2
			
 
				+    text_thickness = 2
			
 
				+    line_thickness = 3
			
 
				+
			
 
				+    radius = max(5, int(im_w/140.))
			
 
				+    cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
			
 
				+                (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), thickness=2)
			
 
				+
			
 
				+    for i, tlwh in enumerate(tlwhs):
			
 
				+        x1, y1, w, h = tlwh
			
 
				+        intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
			
 
				+        obj_id = int(obj_ids[i])
			
 
				+        id_text = '{}'.format(int(obj_id))
			
 
				+        if ids2 is not None:
			
 
				+            id_text = id_text + ', {}'.format(int(ids2[i]))
			
 
				+        color = get_color(abs(obj_id))
			
 
				+        cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
			
 
				+        cv2.putText(im, id_text, (intbox[0], intbox[1]), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
			
 
				+                    thickness=text_thickness)
			
 
				+    return im