[lint] 1. fix lint

InternLM · lvhan028 · May 8, 2025 · Apr 1, 2025 · Apr 2, 2025 · Apr 2, 2025
commit a7bb7c440aac0d01c1258e1d4392e5e6994bc69e
diff --git a/benchmark/profile_generation.py b/benchmark/profile_generation.py
@@ -178,7 +178,7 @@ async def _gather_tasks(tasks):
 
     out_token_throughput = np.round(token_latency_stats.size / elapsed_time, 2)
     total_token_throughput = np.round(concurrency * test_round * (input_seqlen + output_seqlen) / elapsed_time, 2)
-    print(f'\n{"-" * 50}\ntotal time: {elapsed_time:.2f}s\n'
+    print(f'\n{" - " * 50}\ntotal time: {elapsed_time:.2f}s\n'
           f'concurrency: {concurrency}, test_round: {test_round}\n'
           f'input_tokens: {input_seqlen}, output_tokens: {output_seqlen}\n'
           f'first_token latency(min, max, ave): '
@@ -188,7 +188,7 @@ async def _gather_tasks(tasks):
           f'{token_latency_ave}s\n'
           f'token_latency percentiles(50%,75%,95%,99%)(s): {percentiles}\n'
           f'throughput(output): {out_token_throughput} token/s\n'
-          f'throughput(total): {total_token_throughput} token/s\n{"-" * 50}')
+          f'throughput(total): {total_token_throughput} token/s\n{" - " * 50}')
     return model_path, \
         [first_token_latency_min, first_token_latency_max,
          first_token_latency_ave], \

diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py
@@ -1,6 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from lmdeploy.disagg.config import MigrationBackend
-from lmdeploy.disagg.config import EngineRole, MigrationProtocol
+from lmdeploy.disagg.config import EngineRole, MigrationBackend
 from lmdeploy.utils import get_max_batch_size
 
 from .cli import CLI
@@ -131,18 +130,14 @@ def add_parser_api_server():
                             default='Hybrid',
                             choices=['Hybrid', 'Prefill', 'Decode'],
                             help='Hybrid for Non-Disaggregated Engine;'
-                                 'Prefill for Disaggregated Prefill Engine;'
-                                 'Decode fro Disaggregated Decode Engine;')
+                            'Prefill for Disaggregated Prefill Engine;'
+                            'Decode for Disaggregated Decode Engine;')
         parser.add_argument('--migration-backend',
                             type=str,
                             default='DLSlime',
                             choices=['DLSlime', 'Mooncake', 'InfiniStore'],
                             help='kvcache migration management backend when PD disaggregation')
-        parser.add_argument('--available-nics',
-                            type=str,
-                            nargs="+",
-                            default=None,
-                            help='available-nics')
+        parser.add_argument('--available-nics', type=str, nargs='+', default=None, help='available-nics')
         # common args
         ArgumentHelper.backend(parser)
         ArgumentHelper.log_level(parser)
@@ -260,9 +255,7 @@ def add_parser_proxy():
                             choices=['Ethernet', 'IB'],
                             default='Ethernet',
                             help='RDMA Link Type')
-        parser.add_argument('--disable-gdr',
-                            action="store_true",
-                            help='with GPU Direct Memory Access')
+        parser.add_argument('--disable-gdr', action='store_true', help='with GPU Direct Memory Access')
 
         ArgumentHelper.api_keys(parser)
         ArgumentHelper.ssl(parser)

diff --git a/lmdeploy/disagg/README.md b/lmdeploy/disagg/README.md
@@ -1,10 +1,12 @@
 # LMDeploy-DistServe
 
 ## Key Components
+
 1. **Router Service**: Coordinates between prefill/decode engines
-4. **Migration Manager**: Facilitates high-performance memory sharing
+2. **Migration Manager**: Facilitates high-performance memory sharing
 
 ## Installation
+
 ```
 # Inference Engine
 pip install lmdeploy[all] >= 0.7.0
@@ -14,10 +16,12 @@ pip install dlslime==0.0.1.post2
 ```
 
 ## Quick Start
+
 ### 1. Configure Endpoints
+
 First deploy your prefill and decode engines.
 
-``` shell
+```shell
 # Prefill Engine
 CUDA_VISIBLE_DEVICES=0,1 lmdeploy serve api_server internlm/internlm2_5-7b-chat --server-port 23333 --role Prefill --tp 2 --cache-block-seq 32
 # Decode Engine
@@ -26,7 +30,7 @@ CUDA_VISIBLE_DEVICES=2,3 lmdeploy serve api_server internlm/internlm2_5-7b-chat
 
 ### 2. Launch Router Service
 
-``` shell
+```shell
 lmdeploy serve proxy
     --server-name 10.130.8.139
     --server-port 5000
@@ -50,22 +54,27 @@ curl -X POST "http://localhost:5000/v1/completions" \
 
 ### RDMA Connection Failed:
 
-``` bash
+```bash
 ibstatus      # Verify IB device status
 ibv_devinfo   # Check device capabilities
 ```
 
 ### Check NVSHMEM configuration:
+
 Make sure to verify NVSHMEM installation.
 
 ## Fault tolerance
+
 ### CacheFree Issue
+
 When the Decode Engine completes migration, it sends a FreeCache request to the Prefill Engine. However, if the connection fails or the Decode Engine encounters an exception, Cache Free may fail, leading to memory leaks. Future improvements may include:
 
 - Exception monitoring in the Proxy to automatically release unreferenced memory.
 - Adding a timeout mechanism to force cache release if a response is delayed.
-
+  
+
 ### ConnectionPool Issue
+
 Currently, if the Proxy disconnects, the connection pool must be warmed up again. A future enhancement could involve:
 
 A dedicated connection pool management server (e.g., using Raft-based tools like ETCD, as mentioned in Mooncake) to improve connection discovery and avoid repeated warmups.
diff --git a/lmdeploy/disagg/backend/__init__.py b/lmdeploy/disagg/backend/__init__.py
@@ -1,24 +1,24 @@
-from typing import Dict
+# Copyright (c) OpenMMLab. All rights reserved.
 from lmdeploy.logger import get_logger
 
-logger = get_logger("lmdeploy")
-
+logger = get_logger('lmdeploy')
 
 try:
-    logger.debug("Registering DLSlime Backend")
+    logger.debug('Registering DLSlime Backend')
     from .dlslime import DLSlimeBackend
-except ImportError as e:
-    logger.debug("Disable DLSlime Backend")
+except ImportError:
+    logger.warning('Disable DLSlime Backend')
 
 try:
-    logger.debug("Registering Mooncake Backend")
+    logger.debug('Registering Mooncake Backend')
     from .mooncake import MooncakeBackend
-except ImportError as e:
-    logger.debug("Disable Mooncake Backend")
-
+except ImportError:
+    logger.warning('Disable Mooncake Backend')
 
 try:
-    logger.debug("Registering InfiniStoreBackend Backend")
+    logger.debug('Registering InfiniStoreBackend Backend')
     from .infinistore import InfiniStoreBackend
-except ImportError as e:
-    logger.debug("Disable InfiniStoreBackend Backend")
+except ImportError:
+    logger.warning('Disable InfiniStoreBackend Backend')
+
+__all__ = [DLSlimeBackend, MooncakeBackend, InfiniStoreBackend]
diff --git a/lmdeploy/disagg/backend/backend.py b/lmdeploy/disagg/backend/backend.py
@@ -1,12 +1,4 @@
-from lmdeploy.disagg.config import MigrationBackend
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.registry import Registry
 
-
-MIGRATION_BACKENDS = {}
-
-
-def register_migration_backend(backend_name: MigrationBackend):
-    def register(cls):
-        MIGRATION_BACKENDS[backend_name] = cls
-        return cls
-
-    return register
+MIGRATION_BACKENDS = Registry('migration_backend', locations=['lmdeploy.disagg.backend.backend'])
diff --git a/lmdeploy/disagg/backend/base.py b/lmdeploy/disagg/backend/base.py
@@ -1,21 +1,19 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 from abc import abstractmethod
 
-from lmdeploy.disagg.request import DistServeConnectionRequest
-from lmdeploy.disagg.messages import (
-    DistServeRegisterMRMessage,
-    MigrationAssignment
-)
 from lmdeploy.disagg.config import MigrationProtocol
-from lmdeploy.disagg.request import DistServeInitRequest
+from lmdeploy.disagg.messages import DistServeRegisterMRMessage, MigrationAssignment
+from lmdeploy.disagg.request import DistServeConnectionRequest, DistServeInitRequest
 
 
 class MigrationBackendImpl:
+
     @abstractmethod
     def p2p_initialize(self, init_request: DistServeInitRequest):
         raise NotImplementedError
 
     @abstractmethod
-    def register_memory_region(self, register_mr_request:DistServeRegisterMRMessage):
+    def register_memory_region(self, register_mr_request: DistServeRegisterMRMessage):
         raise NotImplementedError
 
     @abstractmethod
@@ -37,4 +35,3 @@ async def store(self, assignment: MigrationAssignment):
     @abstractmethod
     async def load(self, assignment: MigrationAssignment):
         raise NotImplementedError
-
diff --git a/lmdeploy/disagg/backend/dlslime.py b/lmdeploy/disagg/backend/dlslime.py
@@ -1,32 +1,21 @@
-from typing import Dict
-
+# Copyright (c) OpenMMLab. All rights reserved.
 import asyncio
-
-from lmdeploy.logger import get_logger
-
-from lmdeploy.disagg.request import DistServeConnectionRequest
-from lmdeploy.disagg.messages import (
-    DistServeRegisterMRMessage,
-    MigrationAssignment
-)
-
-from lmdeploy.disagg.backend.base import MigrationBackendImpl
-from lmdeploy.disagg.backend.backend import register_migration_backend
-
-from lmdeploy.disagg.config import (
-    DistServeEngineConfig,
-    MigrationBackend,
-    MigrationProtocol
-)
-from lmdeploy.disagg.request import DistServeInitRequest
+from typing import Dict
 
 from dlslime import RDMAEndpoint, available_nic
 
+from lmdeploy.disagg.backend.backend import MIGRATION_BACKENDS
+from lmdeploy.disagg.backend.base import MigrationBackendImpl
+from lmdeploy.disagg.config import DistServeEngineConfig, MigrationBackend, MigrationProtocol
+from lmdeploy.disagg.messages import DistServeRegisterMRMessage, MigrationAssignment
+from lmdeploy.disagg.request import DistServeConnectionRequest, DistServeInitRequest
+from lmdeploy.logger import get_logger
 
-logger = get_logger("lmdeploy")
+logger = get_logger('lmdeploy')
 
 
 class DLSlimeMigrationManagement:
+
     def __init__(self, init_request: DistServeInitRequest):
         self.rank = init_request.rank
         self.local_engine_config: DistServeEngineConfig = init_request.local_engine_config
@@ -39,47 +28,43 @@ def __init__(self, init_request: DistServeInitRequest):
         if init_request.rdma_config:
             nics = self.local_engine_config.available_nics or available_nic()
             device_name = nics[self.rank % len(nics)]
-            logger.info(f"use device {device_name} for kv migration")
-            self.endpoint[MigrationProtocol.RDMA] = RDMAEndpoint(
-                device_name=device_name,
-                ib_port=1,
-                link_type=init_request.rdma_config.link_type.name
-            )
+            logger.info(f'use device {device_name} for kv migration')
+            self.endpoint[MigrationProtocol.RDMA] = RDMAEndpoint(device_name=device_name,
+                                                                 ib_port=1,
+                                                                 link_type=init_request.rdma_config.link_type.name)
         if init_request.nvlink_init_request:
             raise NotImplementedError
         if init_request.tcp_init_request:
             raise NotImplementedError
 
     def register_memory_region(self, register_mr_request: DistServeRegisterMRMessage):
-        self.endpoint[register_mr_request.protocol].register_memory_region(
-            register_mr_request.mr_key,
-            register_mr_request.addr,
-            register_mr_request.length
-        )
+        self.endpoint[register_mr_request.protocol].register_memory_region(register_mr_request.mr_key,
+                                                                           register_mr_request.addr,
+                                                                           register_mr_request.length)
 
     def connect_to(self, connect_request: DistServeConnectionRequest):
         self.endpoint[connect_request.protocol].connect_to(connect_request.remote_endpoint_info)
 
     async def p2p_migrate(self, assignment: MigrationAssignment):
         max_batch = 4096 + 2048
         for i in range(0, len(assignment.target_offset), max_batch):
-            await asyncio.wait_for(self.endpoint[assignment.protocol].read_batch_async(
-                    assignment.mr_key,
-                    assignment.target_offset[i: i+max_batch],
-                    assignment.source_offset[i: i+max_batch],
-                    assignment.length
-                ), 15)
+            await asyncio.wait_for(
+                self.endpoint[assignment.protocol].read_batch_async(assignment.mr_key,
+                                                                    assignment.target_offset[i:i + max_batch],
+                                                                    assignment.source_offset[i:i + max_batch],
+                                                                    assignment.length), 15)
 
 
-@register_migration_backend(MigrationBackend.DLSlime)
+@MIGRATION_BACKENDS.register_module(MigrationBackend.DLSlime.name)
 class DLSlimeBackend(MigrationBackendImpl):
+
     def __init__(self):
         self.links: Dict[int, DLSlimeMigrationManagement] = {}
 
     def p2p_initialize(self, init_request: DistServeInitRequest):
         self.links[init_request.remote_engine_id] = DLSlimeMigrationManagement(init_request)
 
-    def register_memory_region(self, register_mr_request:DistServeRegisterMRMessage):
+    def register_memory_region(self, register_mr_request: DistServeRegisterMRMessage):
         self.links[register_mr_request.remote_engine_id].register_memory_region(register_mr_request)
 
     def endpoint_info(self, remote_engine_id: int, protocol: MigrationProtocol):

diff --git a/lmdeploy/disagg/backend/infinistore.py b/lmdeploy/disagg/backend/infinistore.py
@@ -1,24 +1,18 @@
-from lmdeploy.disagg.messages import (
-    DistServeRegisterMRMessage,
-    MigrationAssignment
-)
-
-from lmdeploy.disagg.backend.backend import register_migration_backend
+# Copyright (c) OpenMMLab. All rights reserved.
+from lmdeploy.disagg.backend.backend import MIGRATION_BACKENDS
 from lmdeploy.disagg.backend.base import MigrationBackendImpl
-from lmdeploy.disagg.config import MigrationProtocol
-from lmdeploy.disagg.request import (
-    DistServeInitRequest,
-    DistServeConnectionRequest
-)
-from lmdeploy.disagg.config import MigrationBackend
+from lmdeploy.disagg.config import MigrationBackend, MigrationProtocol
+from lmdeploy.disagg.messages import DistServeRegisterMRMessage, MigrationAssignment
+from lmdeploy.disagg.request import DistServeConnectionRequest, DistServeInitRequest
 
 
-@register_migration_backend(MigrationBackend.InfiniStore)
+@MIGRATION_BACKENDS.register_module(MigrationBackend.InfiniStore.name)
 class InfiniStoreBackend(MigrationBackendImpl):
+
     def p2p_initialize(self, init_request: DistServeInitRequest):
         raise NotImplementedError
 
-    def register_memory_region(self, register_mr_request:DistServeRegisterMRMessage):
+    def register_memory_region(self, register_mr_request: DistServeRegisterMRMessage):
         raise NotImplementedError
 
     def endpoint_info(self, remote_engine_id: int, protocol: MigrationProtocol):

diff --git a/lmdeploy/disagg/backend/mooncake.py b/lmdeploy/disagg/backend/mooncake.py
@@ -1,20 +1,18 @@
-from lmdeploy.disagg.backend.backend import register_migration_backend
+# Copyright (c) OpenMMLab. All rights reserved.
+from lmdeploy.disagg.backend.backend import MIGRATION_BACKENDS
 from lmdeploy.disagg.backend.base import MigrationBackendImpl
-from lmdeploy.disagg.config import MigrationProtocol
+from lmdeploy.disagg.config import MigrationBackend, MigrationProtocol
 from lmdeploy.disagg.messages import DistServeRegisterMRMessage, MigrationAssignment
-from lmdeploy.disagg.request import (
-    DistServeInitRequest,
-    DistServeConnectionRequest
-)
-from lmdeploy.disagg.config import MigrationBackend
+from lmdeploy.disagg.request import DistServeConnectionRequest, DistServeInitRequest
 
 
-@register_migration_backend(MigrationBackend.Mooncake)
+@MIGRATION_BACKENDS.register_module(MigrationBackend.Mooncake.name)
 class MooncakeBackend(MigrationBackendImpl):
+
     def p2p_initialize(self, init_request: DistServeInitRequest):
         raise NotImplementedError
 
-    def register_memory_region(self, register_mr_request:DistServeRegisterMRMessage):
+    def register_memory_region(self, register_mr_request: DistServeRegisterMRMessage):
         raise NotImplementedError
 
     def endpoint_info(self, remote_engine_id: int, protocol: MigrationProtocol):