mirror of
https://github.com/deepmodeling/Uni-Lab-OS
synced 2026-03-27 13:13:05 +00:00
new registry sys
exp. support with add device
This commit is contained in:
@@ -4,6 +4,7 @@ import os
|
||||
import platform
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
@@ -25,6 +26,84 @@ from unilabos.config.config import load_config, BasicConfig, HTTPConfig
|
||||
_restart_requested: bool = False
|
||||
_restart_reason: str = ""
|
||||
|
||||
RESTART_EXIT_CODE = 42
|
||||
|
||||
|
||||
def _build_child_argv():
|
||||
"""Build sys.argv for child process, stripping supervisor-only arguments."""
|
||||
result = []
|
||||
skip_next = False
|
||||
for arg in sys.argv:
|
||||
if skip_next:
|
||||
skip_next = False
|
||||
continue
|
||||
if arg in ("--restart_mode", "--restart-mode"):
|
||||
continue
|
||||
if arg in ("--auto_restart_count", "--auto-restart-count"):
|
||||
skip_next = True
|
||||
continue
|
||||
if arg.startswith("--auto_restart_count=") or arg.startswith("--auto-restart-count="):
|
||||
continue
|
||||
result.append(arg)
|
||||
return result
|
||||
|
||||
|
||||
def _run_as_supervisor(max_restarts: int):
|
||||
"""
|
||||
Supervisor process that spawns and monitors child processes.
|
||||
|
||||
Similar to Uvicorn's --reload: the supervisor itself does no heavy work,
|
||||
it only launches the real process as a child and restarts it when the child
|
||||
exits with RESTART_EXIT_CODE.
|
||||
"""
|
||||
child_argv = [sys.executable] + _build_child_argv()
|
||||
restart_count = 0
|
||||
|
||||
print_status(
|
||||
f"[Supervisor] Restart mode enabled (max restarts: {max_restarts}), "
|
||||
f"child command: {' '.join(child_argv)}",
|
||||
"info",
|
||||
)
|
||||
|
||||
while True:
|
||||
print_status(
|
||||
f"[Supervisor] Launching process (restart {restart_count}/{max_restarts})...",
|
||||
"info",
|
||||
)
|
||||
|
||||
try:
|
||||
process = subprocess.Popen(child_argv)
|
||||
exit_code = process.wait()
|
||||
except KeyboardInterrupt:
|
||||
print_status("[Supervisor] Interrupted, terminating child process...", "info")
|
||||
process.terminate()
|
||||
try:
|
||||
process.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
process.wait()
|
||||
sys.exit(1)
|
||||
|
||||
if exit_code == RESTART_EXIT_CODE:
|
||||
restart_count += 1
|
||||
if restart_count > max_restarts:
|
||||
print_status(
|
||||
f"[Supervisor] Maximum restart count ({max_restarts}) reached, exiting",
|
||||
"warning",
|
||||
)
|
||||
sys.exit(1)
|
||||
print_status(
|
||||
f"[Supervisor] Child requested restart ({restart_count}/{max_restarts}), restarting in 2s...",
|
||||
"info",
|
||||
)
|
||||
time.sleep(2)
|
||||
else:
|
||||
if exit_code != 0:
|
||||
print_status(f"[Supervisor] Child exited with code {exit_code}", "warning")
|
||||
else:
|
||||
print_status("[Supervisor] Child exited normally", "info")
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
def load_config_from_file(config_path):
|
||||
if config_path is None:
|
||||
@@ -66,6 +145,13 @@ def parse_args():
|
||||
action="append",
|
||||
help="Path to the registry directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--devices",
|
||||
type=str,
|
||||
default=None,
|
||||
action="append",
|
||||
help="Path to Python code directory for AST-based device/resource scanning",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--working_dir",
|
||||
type=str,
|
||||
@@ -155,12 +241,6 @@ def parse_args():
|
||||
action="store_true",
|
||||
help="Skip environment dependency check on startup",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--complete_registry",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Complete registry information",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--check_mode",
|
||||
action="store_true",
|
||||
@@ -178,6 +258,24 @@ def parse_args():
|
||||
default=False,
|
||||
help="Test mode: all actions simulate execution and return mock results without running real hardware",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--extra_resource",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Load extra lab_ prefixed labware resources (529 auto-generated definitions from lab_resources.py)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--restart_mode",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Enable supervisor mode: automatically restart the process when triggered via WebSocket",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--auto_restart_count",
|
||||
type=int,
|
||||
default=500,
|
||||
help="Maximum number of automatic restarts in restart mode (default: 500)",
|
||||
)
|
||||
# workflow upload subcommand
|
||||
workflow_parser = subparsers.add_parser(
|
||||
"workflow_upload",
|
||||
@@ -228,6 +326,11 @@ def main():
|
||||
args = parser.parse_args()
|
||||
args_dict = vars(args)
|
||||
|
||||
# Supervisor mode: spawn child processes and monitor for restart
|
||||
if args_dict.get("restart_mode", False):
|
||||
_run_as_supervisor(args_dict.get("auto_restart_count", 5))
|
||||
return
|
||||
|
||||
# 环境检查 - 检查并自动安装必需的包 (可选)
|
||||
skip_env_check = args_dict.get("skip_env_check", False)
|
||||
check_mode = args_dict.get("check_mode", False)
|
||||
@@ -358,6 +461,9 @@ def main():
|
||||
BasicConfig.test_mode = args_dict.get("test_mode", False)
|
||||
if BasicConfig.test_mode:
|
||||
print_status("启用测试模式:所有动作将模拟执行,不调用真实硬件", "warning")
|
||||
BasicConfig.extra_resource = args_dict.get("extra_resource", False)
|
||||
if BasicConfig.extra_resource:
|
||||
print_status("启用额外资源加载:将加载lab_开头的labware资源定义", "info")
|
||||
BasicConfig.communication_protocol = "websocket"
|
||||
machine_name = platform.node()
|
||||
machine_name = "".join([c if c.isalnum() or c == "_" else "_" for c in machine_name])
|
||||
@@ -382,22 +488,30 @@ def main():
|
||||
# 显示启动横幅
|
||||
print_unilab_banner(args_dict)
|
||||
|
||||
# 注册表 - check_mode 时强制启用 complete_registry
|
||||
complete_registry = args_dict.get("complete_registry", False) or check_mode
|
||||
lab_registry = build_registry(args_dict["registry_path"], complete_registry, BasicConfig.upload_registry)
|
||||
# Step 0: AST 分析优先 + YAML 注册表加载
|
||||
# check_mode 和 upload_registry 都会执行实际 import 验证
|
||||
devices_dirs = args_dict.get("devices", None)
|
||||
lab_registry = build_registry(
|
||||
registry_paths=args_dict["registry_path"],
|
||||
devices_dirs=devices_dirs,
|
||||
upload_registry=BasicConfig.upload_registry,
|
||||
check_mode=check_mode,
|
||||
)
|
||||
|
||||
# Check mode: complete_registry 完成后直接退出,git diff 检测由 CI workflow 执行
|
||||
# Check mode: 注册表验证完成后直接退出
|
||||
if check_mode:
|
||||
print_status("Check mode: complete_registry 完成,退出", "info")
|
||||
device_count = len(lab_registry.device_type_registry)
|
||||
resource_count = len(lab_registry.resource_type_registry)
|
||||
print_status(f"Check mode: 注册表验证完成 ({device_count} 设备, {resource_count} 资源),退出", "info")
|
||||
os._exit(0)
|
||||
|
||||
# Step 1: 上传全部注册表到服务端,同步保存到 unilabos_data
|
||||
if BasicConfig.upload_registry:
|
||||
# 设备注册到服务端 - 需要 ak 和 sk
|
||||
if BasicConfig.ak and BasicConfig.sk:
|
||||
print_status("开始注册设备到服务端...", "info")
|
||||
# print_status("开始注册设备到服务端...", "info")
|
||||
try:
|
||||
register_devices_and_resources(lab_registry)
|
||||
print_status("设备注册完成", "info")
|
||||
# print_status("设备注册完成", "info")
|
||||
except Exception as e:
|
||||
print_status(f"设备注册失败: {e}", "error")
|
||||
else:
|
||||
@@ -482,7 +596,7 @@ def main():
|
||||
continue
|
||||
|
||||
# 如果从远端获取了物料信息,则与本地物料进行同步
|
||||
if request_startup_json and "nodes" in request_startup_json:
|
||||
if file_path is not None and request_startup_json and "nodes" in request_startup_json:
|
||||
print_status("开始同步远端物料到本地...", "info")
|
||||
remote_tree_set = ResourceTreeSet.from_raw_dict_list(request_startup_json["nodes"])
|
||||
resource_tree_set.merge_remote_resources(remote_tree_set)
|
||||
@@ -579,6 +693,10 @@ def main():
|
||||
open_browser=not args_dict["disable_browser"],
|
||||
port=BasicConfig.port,
|
||||
)
|
||||
if restart_requested:
|
||||
print_status("[Main] Restart requested, cleaning up...", "info")
|
||||
cleanup_for_restart()
|
||||
os._exit(RESTART_EXIT_CODE)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,60 +1,83 @@
|
||||
import json
|
||||
import time
|
||||
from typing import Optional, Tuple, Dict, Any
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
from unilabos.utils.log import logger
|
||||
from unilabos.utils.type_check import TypeEncoder
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
def _normalize_device(info: dict) -> dict:
|
||||
"""Serialize via orjson to strip non-JSON types (type objects etc.)."""
|
||||
return orjson.loads(orjson.dumps(info, default=str))
|
||||
except ImportError:
|
||||
def _normalize_device(info: dict) -> dict:
|
||||
return json.loads(json.dumps(info, ensure_ascii=False, cls=TypeEncoder))
|
||||
|
||||
|
||||
def register_devices_and_resources(lab_registry, gather_only=False) -> Optional[Tuple[Dict[str, Any], Dict[str, Any]]]:
|
||||
"""
|
||||
注册设备和资源到服务器(仅支持HTTP)
|
||||
"""
|
||||
|
||||
# 注册资源信息 - 使用HTTP方式
|
||||
from unilabos.app.web.client import http_client
|
||||
|
||||
logger.info("[UniLab Register] 开始注册设备和资源...")
|
||||
|
||||
# 注册设备信息
|
||||
devices_to_register = {}
|
||||
for device_info in lab_registry.obtain_registry_device_info():
|
||||
devices_to_register[device_info["id"]] = json.loads(
|
||||
json.dumps(device_info, ensure_ascii=False, cls=TypeEncoder)
|
||||
)
|
||||
logger.debug(f"[UniLab Register] 收集设备: {device_info['id']}")
|
||||
devices_to_register[device_info["id"]] = _normalize_device(device_info)
|
||||
logger.trace(f"[UniLab Register] 收集设备: {device_info['id']}")
|
||||
|
||||
resources_to_register = {}
|
||||
for resource_info in lab_registry.obtain_registry_resource_info():
|
||||
resources_to_register[resource_info["id"]] = resource_info
|
||||
logger.debug(f"[UniLab Register] 收集资源: {resource_info['id']}")
|
||||
logger.trace(f"[UniLab Register] 收集资源: {resource_info['id']}")
|
||||
|
||||
if gather_only:
|
||||
return devices_to_register, resources_to_register
|
||||
# 注册设备
|
||||
|
||||
if devices_to_register:
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = http_client.resource_registry({"resources": list(devices_to_register.values())})
|
||||
response = http_client.resource_registry(
|
||||
{"resources": list(devices_to_register.values())},
|
||||
tag="device_registry",
|
||||
)
|
||||
cost_time = time.time() - start_time
|
||||
if response.status_code in [200, 201]:
|
||||
logger.info(f"[UniLab Register] 成功注册 {len(devices_to_register)} 个设备 {cost_time}s")
|
||||
res_data = response.json() if response.status_code == 200 else {}
|
||||
skipped = res_data.get("data", {}).get("skipped", False)
|
||||
if skipped:
|
||||
logger.info(
|
||||
f"[UniLab Register] 设备注册跳过(内容未变化)"
|
||||
f" {len(devices_to_register)} 个 {cost_time:.3f}s"
|
||||
)
|
||||
elif response.status_code in [200, 201]:
|
||||
logger.info(f"[UniLab Register] 成功注册 {len(devices_to_register)} 个设备 {cost_time:.3f}s")
|
||||
else:
|
||||
logger.error(f"[UniLab Register] 设备注册失败: {response.status_code}, {response.text} {cost_time}s")
|
||||
logger.error(f"[UniLab Register] 设备注册失败: {response.status_code}, {response.text} {cost_time:.3f}s")
|
||||
except Exception as e:
|
||||
logger.error(f"[UniLab Register] 设备注册异常: {e}")
|
||||
|
||||
# 注册资源
|
||||
if resources_to_register:
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = http_client.resource_registry({"resources": list(resources_to_register.values())})
|
||||
response = http_client.resource_registry(
|
||||
{"resources": list(resources_to_register.values())},
|
||||
tag="resource_registry",
|
||||
)
|
||||
cost_time = time.time() - start_time
|
||||
if response.status_code in [200, 201]:
|
||||
logger.info(f"[UniLab Register] 成功注册 {len(resources_to_register)} 个资源 {cost_time}s")
|
||||
res_data = response.json() if response.status_code == 200 else {}
|
||||
skipped = res_data.get("data", {}).get("skipped", False)
|
||||
if skipped:
|
||||
logger.info(
|
||||
f"[UniLab Register] 资源注册跳过(内容未变化)"
|
||||
f" {len(resources_to_register)} 个 {cost_time:.3f}s"
|
||||
)
|
||||
elif response.status_code in [200, 201]:
|
||||
logger.info(f"[UniLab Register] 成功注册 {len(resources_to_register)} 个资源 {cost_time:.3f}s")
|
||||
else:
|
||||
logger.error(f"[UniLab Register] 资源注册失败: {response.status_code}, {response.text} {cost_time}s")
|
||||
logger.error(f"[UniLab Register] 资源注册失败: {response.status_code}, {response.text} {cost_time:.3f}s")
|
||||
except Exception as e:
|
||||
logger.error(f"[UniLab Register] 资源注册异常: {e}")
|
||||
|
||||
logger.info("[UniLab Register] 设备和资源注册完成.")
|
||||
|
||||
@@ -1052,7 +1052,7 @@ async def handle_file_import(websocket: WebSocket, request_data: dict):
|
||||
"result": {},
|
||||
"schema": lab_registry._generate_unilab_json_command_schema(v["args"], k),
|
||||
"goal_default": {i["name"]: i["default"] for i in v["args"]},
|
||||
"handles": [],
|
||||
"handles": {},
|
||||
}
|
||||
# 不生成已配置action的动作
|
||||
for k, v in enhanced_info["action_methods"].items()
|
||||
|
||||
@@ -8,6 +8,25 @@ import json
|
||||
import os
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
try:
|
||||
import orjson as _json_fast
|
||||
|
||||
def _fast_dumps(obj, **kwargs) -> bytes:
|
||||
return _json_fast.dumps(obj, option=_json_fast.OPT_NON_STR_KEYS, default=str)
|
||||
|
||||
def _fast_dumps_pretty(obj, **kwargs) -> bytes:
|
||||
return _json_fast.dumps(
|
||||
obj, option=_json_fast.OPT_NON_STR_KEYS | _json_fast.OPT_INDENT_2, default=str,
|
||||
)
|
||||
except ImportError:
|
||||
_json_fast = None # type: ignore[assignment]
|
||||
|
||||
def _fast_dumps(obj, **kwargs) -> bytes:
|
||||
return json.dumps(obj, ensure_ascii=False, default=str).encode("utf-8")
|
||||
|
||||
def _fast_dumps_pretty(obj, **kwargs) -> bytes:
|
||||
return json.dumps(obj, indent=2, ensure_ascii=False, default=str).encode("utf-8")
|
||||
|
||||
import requests
|
||||
from unilabos.resources.resource_tracker import ResourceTreeSet
|
||||
from unilabos.utils.log import info
|
||||
@@ -280,29 +299,54 @@ class HTTPClient:
|
||||
)
|
||||
return response
|
||||
|
||||
def resource_registry(self, registry_data: Dict[str, Any] | List[Dict[str, Any]]) -> requests.Response:
|
||||
def resource_registry(
|
||||
self, registry_data: Dict[str, Any] | List[Dict[str, Any]], tag: str = "registry",
|
||||
) -> requests.Response:
|
||||
"""
|
||||
注册资源到服务器
|
||||
注册资源到服务器,同步保存请求/响应到 unilabos_data
|
||||
|
||||
Args:
|
||||
registry_data: 注册表数据,格式为 {resource_id: resource_info} / [{resource_info}]
|
||||
tag: 保存文件的标签后缀 (如 "device_registry" / "resource_registry")
|
||||
|
||||
Returns:
|
||||
Response: API响应对象
|
||||
"""
|
||||
compressed_body = gzip.compress(
|
||||
json.dumps(registry_data, ensure_ascii=False, default=str).encode("utf-8")
|
||||
)
|
||||
# 序列化一次,同时用于保存和发送
|
||||
json_bytes = _fast_dumps(registry_data)
|
||||
|
||||
# 保存请求数据到 unilabos_data
|
||||
req_path = os.path.join(BasicConfig.working_dir, f"req_{tag}_upload.json")
|
||||
try:
|
||||
os.makedirs(BasicConfig.working_dir, exist_ok=True)
|
||||
with open(req_path, "wb") as f:
|
||||
f.write(_fast_dumps_pretty(registry_data))
|
||||
logger.trace(f"注册表请求数据已保存: {req_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"保存注册表请求数据失败: {e}")
|
||||
|
||||
compressed_body = gzip.compress(json_bytes)
|
||||
headers = {
|
||||
"Authorization": f"Lab {self.auth}",
|
||||
"Content-Type": "application/json",
|
||||
"Content-Encoding": "gzip",
|
||||
}
|
||||
response = requests.post(
|
||||
f"{self.remote_addr}/lab/resource",
|
||||
data=compressed_body,
|
||||
headers={
|
||||
"Authorization": f"Lab {self.auth}",
|
||||
"Content-Type": "application/json",
|
||||
"Content-Encoding": "gzip",
|
||||
},
|
||||
headers=headers,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
# 保存响应数据到 unilabos_data
|
||||
res_path = os.path.join(BasicConfig.working_dir, f"res_{tag}_upload.json")
|
||||
try:
|
||||
with open(res_path, "w", encoding="utf-8") as f:
|
||||
f.write(f"{response.status_code}\n{response.text}")
|
||||
logger.trace(f"注册表响应数据已保存: {res_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"保存注册表响应数据失败: {e}")
|
||||
|
||||
if response.status_code not in [200, 201]:
|
||||
logger.error(f"注册资源失败: {response.status_code}, {response.text}")
|
||||
if response.status_code == 200:
|
||||
|
||||
@@ -86,7 +86,7 @@ def setup_server() -> FastAPI:
|
||||
# 设置页面路由
|
||||
try:
|
||||
setup_web_pages(pages)
|
||||
info("[Web] 已加载Web UI模块")
|
||||
# info("[Web] 已加载Web UI模块")
|
||||
except ImportError as e:
|
||||
info(f"[Web] 未找到Web页面模块: {str(e)}")
|
||||
except Exception as e:
|
||||
@@ -138,7 +138,7 @@ def start_server(host: str = "0.0.0.0", port: int = 8002, open_browser: bool = T
|
||||
server_thread = threading.Thread(target=server.run, daemon=True, name="uvicorn_server")
|
||||
server_thread.start()
|
||||
|
||||
info("[Web] Server started, monitoring for restart requests...")
|
||||
# info("[Web] Server started, monitoring for restart requests...")
|
||||
|
||||
# 监控重启标志
|
||||
import unilabos.app.main as main_module
|
||||
|
||||
@@ -26,6 +26,7 @@ from enum import Enum
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from unilabos.app.model import JobAddReq
|
||||
from unilabos.resources.resource_tracker import ResourceDictType
|
||||
from unilabos.ros.nodes.presets.host_node import HostNode
|
||||
from unilabos.utils.type_check import serialize_result_info
|
||||
from unilabos.app.communication import BaseCommunicationClient
|
||||
@@ -408,6 +409,7 @@ class MessageProcessor:
|
||||
# 线程控制
|
||||
self.is_running = False
|
||||
self.thread = None
|
||||
self._loop = None # asyncio event loop引用,用于外部关闭websocket
|
||||
self.reconnect_count = 0
|
||||
|
||||
logger.info(f"[MessageProcessor] Initialized for URL: {websocket_url}")
|
||||
@@ -434,22 +436,31 @@ class MessageProcessor:
|
||||
def stop(self) -> None:
|
||||
"""停止消息处理线程"""
|
||||
self.is_running = False
|
||||
# 主动关闭websocket以快速中断消息接收循环
|
||||
ws = self.websocket
|
||||
loop = self._loop
|
||||
if ws and loop and loop.is_running():
|
||||
try:
|
||||
asyncio.run_coroutine_threadsafe(ws.close(), loop)
|
||||
except Exception:
|
||||
pass
|
||||
if self.thread and self.thread.is_alive():
|
||||
self.thread.join(timeout=2)
|
||||
logger.info("[MessageProcessor] Stopped")
|
||||
|
||||
def _run(self):
|
||||
"""运行消息处理主循环"""
|
||||
loop = asyncio.new_event_loop()
|
||||
self._loop = asyncio.new_event_loop()
|
||||
try:
|
||||
asyncio.set_event_loop(loop)
|
||||
loop.run_until_complete(self._connection_handler())
|
||||
asyncio.set_event_loop(self._loop)
|
||||
self._loop.run_until_complete(self._connection_handler())
|
||||
except Exception as e:
|
||||
logger.error(f"[MessageProcessor] Thread error: {str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
if loop:
|
||||
loop.close()
|
||||
if self._loop:
|
||||
self._loop.close()
|
||||
self._loop = None
|
||||
|
||||
async def _connection_handler(self):
|
||||
"""处理WebSocket连接和重连逻辑"""
|
||||
@@ -648,6 +659,10 @@ class MessageProcessor:
|
||||
# elif message_type == "session_id":
|
||||
# self.session_id = message_data.get("session_id")
|
||||
# logger.info(f"[MessageProcessor] Session ID: {self.session_id}")
|
||||
elif message_type == "add_device":
|
||||
await self._handle_device_manage(message_data, "add")
|
||||
elif message_type == "remove_device":
|
||||
await self._handle_device_manage(message_data, "remove")
|
||||
elif message_type == "request_restart":
|
||||
await self._handle_request_restart(message_data)
|
||||
else:
|
||||
@@ -984,6 +999,37 @@ class MessageProcessor:
|
||||
)
|
||||
thread.start()
|
||||
|
||||
async def _handle_device_manage(self, device_list: list[ResourceDictType], action: str):
|
||||
"""Handle add_device / remove_device from LabGo server."""
|
||||
if not device_list:
|
||||
return
|
||||
|
||||
for item in device_list:
|
||||
target_node_id = item.get("target_node_id", "host_node")
|
||||
|
||||
def _notify(target_id: str, act: str, cfg: ResourceDictType):
|
||||
try:
|
||||
host_node = HostNode.get_instance(timeout=5)
|
||||
if not host_node:
|
||||
logger.error(f"[DeviceManage] HostNode not available for {act}_device")
|
||||
return
|
||||
success = host_node.notify_device_manage(target_id, act, cfg)
|
||||
if success:
|
||||
logger.info(f"[DeviceManage] {act}_device completed on {target_id}")
|
||||
else:
|
||||
logger.warning(f"[DeviceManage] {act}_device failed on {target_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"[DeviceManage] Error in {act}_device: {e}")
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
thread = threading.Thread(
|
||||
target=_notify,
|
||||
args=(target_node_id, action, item),
|
||||
daemon=True,
|
||||
name=f"DeviceManage-{action}-{item.get('id', '')}",
|
||||
)
|
||||
thread.start()
|
||||
|
||||
async def _handle_request_restart(self, data: Dict[str, Any]):
|
||||
"""
|
||||
处理重启请求
|
||||
@@ -995,10 +1041,9 @@ class MessageProcessor:
|
||||
logger.info(f"[MessageProcessor] Received restart request, reason: {reason}, delay: {delay}s")
|
||||
|
||||
# 发送确认消息
|
||||
if self.websocket_client:
|
||||
await self.websocket_client.send_message(
|
||||
{"action": "restart_acknowledged", "data": {"reason": reason, "delay": delay}}
|
||||
)
|
||||
self.send_message(
|
||||
{"action": "restart_acknowledged", "data": {"reason": reason, "delay": delay}}
|
||||
)
|
||||
|
||||
# 设置全局重启标志
|
||||
import unilabos.app.main as main_module
|
||||
@@ -1100,6 +1145,7 @@ class QueueProcessor:
|
||||
def stop(self) -> None:
|
||||
"""停止队列处理线程"""
|
||||
self.is_running = False
|
||||
self.queue_update_event.set() # 立即唤醒等待中的线程
|
||||
if self.thread and self.thread.is_alive():
|
||||
self.thread.join(timeout=2)
|
||||
logger.info("[QueueProcessor] Stopped")
|
||||
@@ -1353,8 +1399,8 @@ class WebSocketClient(BaseCommunicationClient):
|
||||
message = {"action": "normal_exit", "data": {"session_id": session_id}}
|
||||
self.message_processor.send_message(message)
|
||||
logger.info(f"[WebSocketClient] Sent normal_exit message with session_id: {session_id}")
|
||||
# 给一点时间让消息发送出去
|
||||
time.sleep(1)
|
||||
# send_handler 每100ms检查一次队列,等300ms足以让消息发出
|
||||
time.sleep(0.3)
|
||||
except Exception as e:
|
||||
logger.warning(f"[WebSocketClient] Failed to send normal_exit message: {str(e)}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user