mirror of
https://github.com/deepmodeling/Uni-Lab-OS
synced 2026-03-24 09:39:17 +00:00
v0.10.19
fast registry load minor fix on skill & registry stripe ros2 schema desc add create-device-skill new registry system backwards to yaml remove not exist resource new registry sys exp. support with add device add ai conventions correct raise create resource error ret info fix revert ret info fix fix prcxi check add create_resource schema re signal host ready event add websocket connection timeout and improve reconnection logic add open_timeout parameter to websocket connection add TimeoutError and InvalidStatus exception handling implement exponential backoff for reconnection attempts simplify reconnection logic flow add gzip change pose extra to any add isFlapY
This commit is contained in:
@@ -12,6 +12,7 @@ from geometry_msgs.msg import Point
|
||||
from rclpy.action import ActionClient, get_action_server_names_and_types_by_node
|
||||
from rclpy.service import Service
|
||||
from typing_extensions import TypedDict
|
||||
from unilabos_msgs.action import EmptyIn, StrSingleInput, ResourceCreateFromOuterEasy, ResourceCreateFromOuter
|
||||
from unilabos_msgs.msg import Resource # type: ignore
|
||||
from unilabos_msgs.srv import (
|
||||
ResourceAdd,
|
||||
@@ -23,6 +24,7 @@ from unilabos_msgs.srv import (
|
||||
from unilabos_msgs.srv._serial_command import SerialCommand_Request, SerialCommand_Response
|
||||
from unique_identifier_msgs.msg import UUID
|
||||
|
||||
from unilabos.registry.decorators import device
|
||||
from unilabos.registry.placeholder_type import ResourceSlot, DeviceSlot
|
||||
from unilabos.registry.registry import lab_registry
|
||||
from unilabos.resources.container import RegularContainer
|
||||
@@ -30,6 +32,7 @@ from unilabos.resources.graphio import initialize_resource
|
||||
from unilabos.resources.registry import add_schema
|
||||
from unilabos.resources.resource_tracker import (
|
||||
ResourceDict,
|
||||
ResourceDictType,
|
||||
ResourceDictInstance,
|
||||
ResourceTreeSet,
|
||||
ResourceTreeInstance,
|
||||
@@ -65,7 +68,13 @@ class DeviceActionStatus:
|
||||
class TestResourceReturn(TypedDict):
|
||||
resources: List[List[ResourceDict]]
|
||||
devices: List[Dict[str, Any]]
|
||||
unilabos_samples: List[LabSample]
|
||||
# unilabos_samples: List[LabSample]
|
||||
|
||||
|
||||
class CreateResourceReturn(TypedDict):
|
||||
created_resource_tree: List[List[ResourceDict]]
|
||||
liquid_input_resource_tree: List[Dict[str, Any]]
|
||||
# unilabos_samples: List[LabSample]
|
||||
|
||||
|
||||
class TestLatencyReturn(TypedDict):
|
||||
@@ -80,6 +89,7 @@ class TestLatencyReturn(TypedDict):
|
||||
status: str
|
||||
|
||||
|
||||
@device(id="host_node", category=[], description="Host Node", icon="icon_device.webp")
|
||||
class HostNode(BaseROS2DeviceNode):
|
||||
"""
|
||||
主机节点类,负责管理设备、资源和控制器
|
||||
@@ -268,44 +278,42 @@ class HostNode(BaseROS2DeviceNode):
|
||||
self._action_clients: Dict[str, ActionClient] = { # 为了方便了解实际的数据类型,host的默认写好
|
||||
"/devices/host_node/create_resource": ActionClient(
|
||||
self,
|
||||
lab_registry.ResourceCreateFromOuterEasy,
|
||||
ResourceCreateFromOuterEasy,
|
||||
"/devices/host_node/create_resource",
|
||||
callback_group=self.callback_group,
|
||||
),
|
||||
"/devices/host_node/create_resource_detailed": ActionClient(
|
||||
self,
|
||||
lab_registry.ResourceCreateFromOuter,
|
||||
ResourceCreateFromOuter,
|
||||
"/devices/host_node/create_resource_detailed",
|
||||
callback_group=self.callback_group,
|
||||
),
|
||||
"/devices/host_node/test_latency": ActionClient(
|
||||
self,
|
||||
lab_registry.EmptyIn,
|
||||
EmptyIn,
|
||||
"/devices/host_node/test_latency",
|
||||
callback_group=self.callback_group,
|
||||
),
|
||||
"/devices/host_node/test_resource": ActionClient(
|
||||
self,
|
||||
lab_registry.EmptyIn,
|
||||
EmptyIn,
|
||||
"/devices/host_node/test_resource",
|
||||
callback_group=self.callback_group,
|
||||
),
|
||||
"/devices/host_node/_execute_driver_command": ActionClient(
|
||||
self,
|
||||
lab_registry.StrSingleInput,
|
||||
StrSingleInput,
|
||||
"/devices/host_node/_execute_driver_command",
|
||||
callback_group=self.callback_group,
|
||||
),
|
||||
"/devices/host_node/_execute_driver_command_async": ActionClient(
|
||||
self,
|
||||
lab_registry.StrSingleInput,
|
||||
StrSingleInput,
|
||||
"/devices/host_node/_execute_driver_command_async",
|
||||
callback_group=self.callback_group,
|
||||
),
|
||||
} # 用来存储多个ActionClient实例
|
||||
self._action_value_mappings: Dict[str, Dict] = (
|
||||
{}
|
||||
) # device_id -> action_value_mappings(本地+远程设备统一存储)
|
||||
self._action_value_mappings: Dict[str, Dict] = {} # device_id -> action_value_mappings(本地+远程设备统一存储)
|
||||
self._slave_registry_configs: Dict[str, Dict] = {} # registry_name -> registry_config(含action_value_mappings)
|
||||
self._goals: Dict[str, Any] = {} # 用来存储多个目标的状态
|
||||
self._online_devices: Set[str] = {f"{self.namespace}/{device_id}"} # 用于跟踪在线设备
|
||||
@@ -323,10 +331,18 @@ class HostNode(BaseROS2DeviceNode):
|
||||
self._discover_devices()
|
||||
|
||||
# 初始化所有本机设备节点,多一次过滤,防止重复初始化
|
||||
local_machine = BasicConfig.machine_name
|
||||
for device_config in devices_config.root_nodes:
|
||||
device_id = device_config.res_content.id
|
||||
if device_config.res_content.type != "device":
|
||||
continue
|
||||
dev_machine = device_config.res_content.machine_name
|
||||
if dev_machine and local_machine and dev_machine != local_machine:
|
||||
self.lab_logger().info(
|
||||
f"[Host Node] Device {device_id} belongs to machine '{dev_machine}', "
|
||||
f"local is '{local_machine}', skipping initialization."
|
||||
)
|
||||
continue
|
||||
if device_id not in self.devices_names:
|
||||
self.initialize_device(device_id, device_config)
|
||||
else:
|
||||
@@ -556,7 +572,7 @@ class HostNode(BaseROS2DeviceNode):
|
||||
liquid_type: list[str] = [],
|
||||
liquid_volume: list[int] = [],
|
||||
slot_on_deck: str = "",
|
||||
):
|
||||
) -> CreateResourceReturn:
|
||||
# 暂不支持多对同名父子同时存在
|
||||
res_creation_input = {
|
||||
"id": res_id.split("/")[-1],
|
||||
@@ -609,6 +625,8 @@ class HostNode(BaseROS2DeviceNode):
|
||||
assert len(response) == 1, "Create Resource应当只返回一个结果"
|
||||
for i in response:
|
||||
res = json.loads(i)
|
||||
if "suc" in res:
|
||||
raise ValueError(res.get("error"))
|
||||
return res
|
||||
except Exception as ex:
|
||||
pass
|
||||
@@ -650,7 +668,12 @@ class HostNode(BaseROS2DeviceNode):
|
||||
action_id = f"/devices/{device_id}/{action_name}"
|
||||
if action_id not in self._action_clients:
|
||||
action_type = action_value_mapping["type"]
|
||||
self._action_clients[action_id] = ActionClient(self, action_type, action_id)
|
||||
try:
|
||||
self._action_clients[action_id] = ActionClient(self, action_type, action_id)
|
||||
except Exception as e:
|
||||
self.lab_logger().error(
|
||||
f"创建ActionClient失败,Device: {device_id}, Action Name: {action_name}, Action Type: {action_type}, Error: {e}")
|
||||
continue
|
||||
self.lab_logger().trace(
|
||||
f"[Host Node] Created ActionClient (Local): {action_id}"
|
||||
) # 子设备再创建用的是Discover发现的
|
||||
@@ -1250,9 +1273,9 @@ class HostNode(BaseROS2DeviceNode):
|
||||
|
||||
# 用 registry_name 索引已存储的 registry_config,获取 action_value_mappings
|
||||
if registry_name and registry_name in self._slave_registry_configs:
|
||||
action_mappings = self._slave_registry_configs[registry_name].get(
|
||||
"class", {}
|
||||
).get("action_value_mappings", {})
|
||||
action_mappings = (
|
||||
self._slave_registry_configs[registry_name].get("class", {}).get("action_value_mappings", {})
|
||||
)
|
||||
if action_mappings:
|
||||
self._action_value_mappings[edge_device_id] = action_mappings
|
||||
self.lab_logger().info(
|
||||
@@ -1272,14 +1295,19 @@ class HostNode(BaseROS2DeviceNode):
|
||||
|
||||
# 解析 devices_config,建立 device_id -> action_value_mappings 映射
|
||||
if devices_config:
|
||||
machine_name = info["machine_name"]
|
||||
# Stamp machine_name on each device dict before parsing
|
||||
for device_tree in devices_config:
|
||||
for device_dict in device_tree:
|
||||
device_dict["machine_name"] = machine_name
|
||||
device_id = device_dict.get("id", "")
|
||||
class_name = device_dict.get("class", "")
|
||||
if device_id and class_name and class_name in self._slave_registry_configs:
|
||||
action_mappings = self._slave_registry_configs[class_name].get(
|
||||
"class", {}
|
||||
).get("action_value_mappings", {})
|
||||
action_mappings = (
|
||||
self._slave_registry_configs[class_name]
|
||||
.get("class", {})
|
||||
.get("action_value_mappings", {})
|
||||
)
|
||||
if action_mappings:
|
||||
self._action_value_mappings[device_id] = action_mappings
|
||||
self.lab_logger().info(
|
||||
@@ -1287,6 +1315,18 @@ class HostNode(BaseROS2DeviceNode):
|
||||
f"for remote device {device_id} (class: {class_name})"
|
||||
)
|
||||
|
||||
# Merge slave devices_config into self.devices_config tree
|
||||
try:
|
||||
slave_tree_set = ResourceTreeSet.load(devices_config) # slave一定是根节点的tree
|
||||
for tree in slave_tree_set.trees:
|
||||
self.devices_config.trees.append(tree)
|
||||
self.lab_logger().info(
|
||||
f"[Host Node] Merged {len(slave_tree_set.trees)} slave device trees "
|
||||
f"(machine: {machine_name}) into devices_config"
|
||||
)
|
||||
except Exception as e:
|
||||
self.lab_logger().error(f"[Host Node] Failed to merge slave devices_config: {e}")
|
||||
|
||||
self.lab_logger().debug(f"[Host Node] Node info update: {info}")
|
||||
response.response = "OK"
|
||||
except Exception as e:
|
||||
@@ -1695,3 +1735,177 @@ class HostNode(BaseROS2DeviceNode):
|
||||
self.lab_logger().error(f"[Host Node-Resource] Error notifying resource tree update: {str(e)}")
|
||||
self.lab_logger().error(traceback.format_exc())
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Device lifecycle (add / remove) — pure forwarder
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def notify_device_manage(self, target_node_id: str, action: str, config: ResourceDictType) -> bool:
|
||||
"""Forward an add/remove device command to the target node via ROS2 SerialCommand.
|
||||
|
||||
The HostNode does NOT interpret the command; it simply resolves the
|
||||
target namespace and forwards the request to ``s2c_device_manage``.
|
||||
|
||||
If *target_node_id* equals the HostNode's own device_id (i.e. the
|
||||
command targets the host itself), we call our local ``create_device``
|
||||
/ ``destroy_device`` directly instead of going through ROS2.
|
||||
"""
|
||||
try:
|
||||
# If the target is the host itself, handle locally
|
||||
device_id = config["id"]
|
||||
if target_node_id == self.device_id:
|
||||
if action == "add":
|
||||
return self.create_device(device_id, config).get("success", False)
|
||||
elif action == "remove":
|
||||
return self.destroy_device(device_id).get("success", False)
|
||||
|
||||
if target_node_id not in self.devices_names:
|
||||
self.lab_logger().error(
|
||||
f"[Host Node-DeviceMgr] Target {target_node_id} not found in devices_names"
|
||||
)
|
||||
return False
|
||||
|
||||
namespace = self.devices_names[target_node_id]
|
||||
device_key = f"{namespace}/{target_node_id}"
|
||||
if device_key not in self._online_devices:
|
||||
self.lab_logger().error(f"[Host Node-DeviceMgr] Target {device_key} is offline")
|
||||
return False
|
||||
|
||||
srv_address = f"/srv{namespace}/s2c_device_manage"
|
||||
self.lab_logger().info(
|
||||
f"[Host Node-DeviceMgr] Forwarding {action}_device to {target_node_id} ({srv_address})"
|
||||
)
|
||||
|
||||
sclient = self.create_client(SerialCommand, srv_address)
|
||||
if not sclient.wait_for_service(timeout_sec=5.0):
|
||||
self.lab_logger().error(f"[Host Node-DeviceMgr] Service {srv_address} not available")
|
||||
return False
|
||||
|
||||
request = SerialCommand.Request()
|
||||
request.command = json.dumps({"action": action, "data": config}, ensure_ascii=False)
|
||||
|
||||
future = sclient.call_async(request)
|
||||
timeout = 30.0
|
||||
start_time = time.time()
|
||||
while not future.done():
|
||||
if time.time() - start_time > timeout:
|
||||
self.lab_logger().error(
|
||||
f"[Host Node-DeviceMgr] Timeout waiting for {action}_device on {target_node_id}"
|
||||
)
|
||||
return False
|
||||
time.sleep(0.05)
|
||||
|
||||
response = future.result()
|
||||
self.lab_logger().info(
|
||||
f"[Host Node-DeviceMgr] {action}_device on {target_node_id} completed"
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.lab_logger().error(f"[Host Node-DeviceMgr] Error: {e}")
|
||||
self.lab_logger().error(traceback.format_exc())
|
||||
return False
|
||||
|
||||
def create_device(self, device_id: str, config: ResourceDictType) -> dict:
|
||||
"""Dynamically create a root-level device on the host."""
|
||||
if not device_id:
|
||||
return {"success": False, "error": "device_id required"}
|
||||
|
||||
if device_id in self.devices_names:
|
||||
return {"success": False, "error": f"Device {device_id} already exists"}
|
||||
|
||||
try:
|
||||
config.setdefault("id", device_id)
|
||||
config.setdefault("type", "device")
|
||||
config.setdefault("machine_name", BasicConfig.machine_name or "本地")
|
||||
res_dict = ResourceDictInstance.get_resource_instance_from_dict(config)
|
||||
|
||||
self.initialize_device(device_id, res_dict)
|
||||
|
||||
if device_id not in self.devices_names:
|
||||
return {"success": False, "error": f"initialize_device failed for {device_id}"}
|
||||
|
||||
# Add to config tree (devices_config)
|
||||
tree = ResourceTreeInstance(res_dict)
|
||||
self.devices_config.trees.append(tree)
|
||||
|
||||
# Add to resource tracker so s2c_resource_tree can find it
|
||||
try:
|
||||
for plr_resource in ResourceTreeSet([tree]).to_plr_resources():
|
||||
self._resource_tracker.add_resource(plr_resource)
|
||||
except Exception as ex:
|
||||
self.lab_logger().warning(f"[Host Node-DeviceMgr] PLR resource registration skipped: {ex}")
|
||||
|
||||
self.lab_logger().info(f"[Host Node-DeviceMgr] Device {device_id} created successfully")
|
||||
return {"success": True, "device_id": device_id}
|
||||
|
||||
except Exception as e:
|
||||
self.lab_logger().error(f"[Host Node-DeviceMgr] Failed to create {device_id}: {e}")
|
||||
self.lab_logger().error(traceback.format_exc())
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
def destroy_device(self, device_id: str) -> dict:
|
||||
"""Remove a root-level device from the host."""
|
||||
if not device_id:
|
||||
return {"success": False, "error": "device_id required"}
|
||||
|
||||
if device_id not in self.devices_names:
|
||||
return {"success": False, "error": f"Device {device_id} not found"}
|
||||
|
||||
if device_id == self.device_id:
|
||||
return {"success": False, "error": "Cannot destroy host_node itself"}
|
||||
|
||||
try:
|
||||
namespace = self.devices_names[device_id]
|
||||
device_key = f"{namespace}/{device_id}"
|
||||
|
||||
# Remove action clients
|
||||
action_prefix = f"/devices/{device_id}/"
|
||||
to_remove = [k for k in self._action_clients if k.startswith(action_prefix)]
|
||||
for k in to_remove:
|
||||
try:
|
||||
self._action_clients[k].destroy()
|
||||
except Exception:
|
||||
pass
|
||||
del self._action_clients[k]
|
||||
|
||||
# Remove from config tree (devices_config)
|
||||
self.devices_config.trees = [
|
||||
t for t in self.devices_config.trees
|
||||
if t.root_node.res_content.id != device_id
|
||||
]
|
||||
|
||||
# Remove from resource tracker
|
||||
try:
|
||||
tracked = self._resource_tracker.uuid_to_resources.copy()
|
||||
for uid, res in tracked.items():
|
||||
res_id = res.get("id") if isinstance(res, dict) else getattr(res, "name", None)
|
||||
if res_id == device_id:
|
||||
self._resource_tracker.remove_resource(res)
|
||||
except Exception as ex:
|
||||
self.lab_logger().warning(f"[Host Node-DeviceMgr] Resource tracker cleanup: {ex}")
|
||||
|
||||
# Clean internal state
|
||||
self._online_devices.discard(device_key)
|
||||
self.devices_names.pop(device_id, None)
|
||||
self.device_machine_names.pop(device_id, None)
|
||||
self._action_value_mappings.pop(device_id, None)
|
||||
|
||||
# Destroy the ROS2 node of the device
|
||||
instance = self.devices_instances.pop(device_id, None)
|
||||
if instance is not None:
|
||||
try:
|
||||
# noinspection PyProtectedMember
|
||||
ros_node = getattr(instance, "_ros_node", None)
|
||||
if ros_node is not None:
|
||||
ros_node.destroy_node()
|
||||
except Exception as e:
|
||||
self.lab_logger().warning(f"[Host Node-DeviceMgr] Error destroying ROS node for {device_id}: {e}")
|
||||
|
||||
self.lab_logger().info(f"[Host Node-DeviceMgr] Device {device_id} destroyed")
|
||||
return {"success": True, "device_id": device_id}
|
||||
|
||||
except Exception as e:
|
||||
self.lab_logger().error(f"[Host Node-DeviceMgr] Failed to destroy {device_id}: {e}")
|
||||
self.lab_logger().error(traceback.format_exc())
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
Reference in New Issue
Block a user