new registry system backwards to yaml

2026-03-24 11:49:14 +00:00 · 2026-03-22 02:19:54 +08:00
parent 4c2adea55a
commit 59c26265e9
62 changed files with 5034 additions and 6271 deletions
--- a/unilabos/utils/import_manager.py
+++ b/unilabos/utils/import_manager.py
@@ -21,15 +21,11 @@ __all__ = [
    "get_class",
    "get_module",
    "init_from_list",
-    "get_class_info_static",
-    "get_registry_class_info",
+    "get_enhanced_class_info",
 ]

-from ast import Constant
-
 from unilabos.resources.resource_tracker import PARAM_SAMPLE_UUIDS
 from unilabos.utils import logger
-from unilabos.registry.decorators import is_not_action, is_always_free


 class ImportManager:
@@ -184,168 +180,87 @@ class ImportManager:

        return None

-    def get_enhanced_class_info(self, module_path: str, use_dynamic: bool = True) -> Dict[str, Any]:
-        """
-        获取增强的类信息，支持动态导入和静态分析
+    def get_enhanced_class_info(self, module_path: str, **_kwargs) -> Dict[str, Any]:
+        """通过 AST 分析获取类的增强信息。
+
+        复用 ``ast_registry_scanner`` 的 ``_collect_imports`` / ``_extract_class_body``，
+        与 AST 扫描注册表完全一致。

        Args:
-            module_path: 模块路径，格式为 "module.path" 或 "module.path:ClassName"
-            use_dynamic: 是否优先使用动态导入
+            module_path: 格式 ``"module.path:ClassName"``

        Returns:
-            包含详细类信息的字典
+            ``{"module_path", "ast_analysis_success", "import_map",
+              "init_params", "status_methods", "action_methods"}``
        """
-        result = {
+        from unilabos.registry.ast_registry_scanner import (
+            _collect_imports,
+            _extract_class_body,
+            _filepath_to_module,
+        )
+
+        result: Dict[str, Any] = {
            "module_path": module_path,
-            "dynamic_import_success": False,
-            "static_analysis_success": False,
-            "init_params": {},
-            "status_methods": {},  # get_ 开头和 @property 方法
-            "action_methods": {},  # set_ 开头和其他非_开头方法
-        }
-
-        # 尝试动态导入
-        dynamic_info = None
-        static_info = None
-        if use_dynamic:
-            try:
-                dynamic_info = self._get_dynamic_class_info(module_path)
-                result["dynamic_import_success"] = True
-                logger.debug(f"[ImportManager] 动态导入类 {module_path} 成功")
-            except Exception as e:
-                logger.warning(
-                    f"[UniLab Registry] 在补充注册表时，动态导入类 "
-                    f"{module_path} 失败（将使用静态分析，"
-                    f"建议修复导入错误，以实现更好的注册表识别效果！）: {e}"
-                )
-                use_dynamic = False
-        if not use_dynamic:
-            # 尝试静态分析
-            try:
-                static_info = self._get_static_class_info(module_path)
-                result["static_analysis_success"] = True
-                logger.debug(f"[ImportManager] 静态分析类 {module_path} 成功")
-            except Exception as e:
-                logger.warning(f"[ImportManager] 静态分析类 {module_path} 失败: {e}")
-
-        # 合并信息（优先使用动态导入的信息）
-        if dynamic_info:
-            result.update(dynamic_info)
-        elif static_info:
-            result.update(static_info)
-
-        return result
-
-    def _get_dynamic_class_info(self, class_path: str) -> Dict[str, Any]:
-        """使用inspect模块动态获取类信息"""
-        cls = get_class(class_path)
-        class_name = cls.__name__
-
-        result = {
-            "class_name": class_name,
-            "init_params": self._analyze_method_signature(cls.__init__)["args"],
+            "ast_analysis_success": False,
+            "import_map": {},
+            "init_params": [],
            "status_methods": {},
            "action_methods": {},
        }
-        # 分析类的所有成员
-        for name, method in cls.__dict__.items():
-            if name.startswith("_"):
-                continue

-            # 检查是否是property
-            if isinstance(method, property):
-                # @property 装饰的方法
-                # noinspection PyTypeChecker
-                return_type = self._get_return_type_from_method(method.fget) if method.fget else "Any"
-                prop_info = {
-                    "name": name,
-                    "return_type": return_type,
-                }
-                result["status_methods"][name] = prop_info
-
-                # 检查是否有对应的setter
-                if method.fset:
-                    setter_info = self._analyze_method_signature(method.fset)
-                    result["action_methods"][name] = setter_info
-
-            elif inspect.ismethod(method) or inspect.isfunction(method):
-                if name.startswith("get_"):
-                    actual_name = name[4:]  # 去掉get_前缀
-                    if actual_name in result["status_methods"]:
-                        continue
-                    # get_ 开头的方法归类为status
-                    method_info = self._analyze_method_signature(method)
-                    result["status_methods"][actual_name] = method_info
-                elif not name.startswith("_"):
-                    # 检查是否被 @not_action 装饰器标记
-                    if is_not_action(method):
-                        continue
-                    # 其他非_开头的方法归类为action
-                    method_info = self._analyze_method_signature(method)
-                    # 检查是否被 @always_free 装饰器标记
-                    if is_always_free(method):
-                        method_info["always_free"] = True
-                    result["action_methods"][name] = method_info
-
-        return result
-
-    def _get_static_class_info(self, module_path: str) -> Dict[str, Any]:
-        """使用AST静态分析获取类信息"""
        module_name, class_name = module_path.rsplit(":", 1)
-        # 将模块路径转换为文件路径
        file_path = self._module_path_to_file_path(module_name)
        if not file_path or not os.path.exists(file_path):
-            raise FileNotFoundError(f"找不到模块文件: {module_name} -> {file_path}")
+            logger.warning(f"[ImportManager] 找不到模块文件: {module_name} -> {file_path}")
+            return result

-        with open(file_path, "r", encoding="utf-8") as f:
-            source_code = f.read()
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                tree = ast.parse(f.read(), filename=file_path)
+        except Exception as e:
+            logger.warning(f"[ImportManager] 解析文件 {file_path} 失败: {e}")
+            return result

-        tree = ast.parse(source_code)
+        # 推导 module dotted path → 构建 import_map
+        python_path = Path(file_path)
+        for sp in sorted(sys.path, key=len, reverse=True):
+            try:
+                Path(file_path).relative_to(sp)
+                python_path = Path(sp)
+                break
+            except ValueError:
+                continue
+        module_dotted = _filepath_to_module(Path(file_path), python_path)
+        import_map = _collect_imports(tree, module_dotted)
+        result["import_map"] = import_map

-        # 查找目标类
+        # 定位目标类 AST 节点
        target_class = None
        for node in ast.walk(tree):
-            if isinstance(node, ast.ClassDef):
-                if node.name == class_name:
-                    target_class = node
-                    break
+            if isinstance(node, ast.ClassDef) and node.name == class_name:
+                target_class = node
+                break

        if target_class is None:
-            raise AttributeError(f"在文件 {file_path} 中找不到类 {class_name}")
+            logger.warning(f"[ImportManager] 在文件 {file_path} 中找不到类 {class_name}")
+            return result

-        result = {
-            "class_name": class_name,
-            "init_params": {},
-            "status_methods": {},
-            "action_methods": {},
+        body = _extract_class_body(target_class, import_map)
+
+        # 映射到统一字段名（与 registry.py complete_registry 消费端一致）
+        result["init_params"] = body.get("init_params", [])
+        result["status_methods"] = body.get("status_properties", {})
+        result["action_methods"] = {
+            k: {
+                "args": v.get("params", []),
+                "return_type": v.get("return_type", ""),
+                "is_async": v.get("is_async", False),
+                "always_free": v.get("always_free", False),
+                "docstring": v.get("docstring"),
+            }
+            for k, v in body.get("auto_methods", {}).items()
        }
-
-        # 分析类的方法
-        for node in target_class.body:
-            if isinstance(node, ast.FunctionDef):
-                method_info = self._analyze_method_node(node)
-                method_name = node.name
-                if method_name == "__init__":
-                    result["init_params"] = method_info["args"]
-                elif method_name.startswith("_"):
-                    continue
-                elif self._is_property_method(node):
-                    # @property 装饰的方法
-                    result["status_methods"][method_name] = method_info
-                elif method_name.startswith("get_"):
-                    # get_ 开头的方法归类为status
-                    actual_name = method_name[4:]  # 去掉get_前缀
-                    if actual_name not in result["status_methods"]:
-                        result["status_methods"][actual_name] = method_info
-                else:
-                    # 检查是否被 @not_action 装饰器标记
-                    if self._is_not_action_method(node):
-                        continue
-                    # 其他非_开头的方法归类为action
-                    # 检查是否被 @always_free 装饰器标记
-                    if self._is_always_free_method(node):
-                        method_info["always_free"] = True
-                    result["action_methods"][method_name] = method_info
+        result["ast_analysis_success"] = True
        return result

    def _analyze_method_signature(self, method, skip_unilabos_params: bool = True) -> Dict[str, Any]:
@@ -401,23 +316,25 @@ class ImportManager:
            "name": method.__name__,
            "args": args,
            "return_type": self._get_type_string(signature.return_annotation),
-            "return_annotation": signature.return_annotation,  # 保留原始类型注解，用于TypedDict等特殊处理
            "is_async": inspect.iscoroutinefunction(method),
        }

-    def _get_return_type_from_method(self, method) -> str:
+    def _get_return_type_from_method(self, method) -> Union[str, Tuple[str, Any]]:
        """从方法中获取返回类型"""
        signature = inspect.signature(method)
        return self._get_type_string(signature.return_annotation)

    def _get_type_string(self, annotation) -> Union[str, Tuple[str, Any]]:
-        """将类型注解转换为Class Library中可搜索的类名"""
+        """将类型注解转换为短类名（与 AST _get_annotation_str 对齐）。
+
+        自定义类只返回短名（如 ``"SetLiquidReturn"``），完整路径由
+        ``import_map`` 负责解析，保持与 AST 分析一致。
+        """
        if annotation == inspect.Parameter.empty:
-            return "Any"  # 如果没有注解，返回Any
+            return "Any"
        if annotation is None:
-            return "None"  # 明确的None类型
+            return "None"
        if hasattr(annotation, "__origin__"):
-            # 处理typing模块的类型
            origin = annotation.__origin__
            if origin in (list, set, tuple):
                if hasattr(annotation, "__args__") and annotation.__args__:
@@ -432,132 +349,23 @@ class ImportManager:
                return "dict"
            elif origin is Optional:
                return "Unknown"
-            return f"Unknown"
+            return "Unknown"
        annotation_str = str(annotation)
-        # 处理typing模块的复杂类型
        if "typing." in annotation_str:
-            # 简化typing类型显示
            return (
                annotation_str.replace("typing.", "")
                if getattr(annotation, "_name", None) is None
                else annotation._name.lower()
            )
-        # 如果是类型对象
        if hasattr(annotation, "__name__"):
-            # 如果是内置类型
-            if annotation.__module__ == "builtins":
-                return annotation.__name__
-            else:
-                # 如果是自定义类，返回完整路径
-                return f"{annotation.__module__}:{annotation.__name__}"
-        # 如果是typing模块的类型
+            return annotation.__name__
        elif hasattr(annotation, "_name"):
            return annotation._name
-        # 如果是字符串形式的类型注解
        elif isinstance(annotation, str):
            return annotation
        else:
            return annotation_str

-    def _is_property_method(self, node: ast.FunctionDef) -> bool:
-        """检查是否是@property装饰的方法"""
-        for decorator in node.decorator_list:
-            if isinstance(decorator, ast.Name) and decorator.id == "property":
-                return True
-        return False
-
-    def _is_setter_method(self, node: ast.FunctionDef) -> bool:
-        """检查是否是@xxx.setter装饰的方法"""
-        for decorator in node.decorator_list:
-            if isinstance(decorator, ast.Attribute) and decorator.attr == "setter":
-                return True
-        return False
-
-    def _is_not_action_method(self, node: ast.FunctionDef) -> bool:
-        """检查是否是@not_action装饰的方法"""
-        for decorator in node.decorator_list:
-            if isinstance(decorator, ast.Name) and decorator.id == "not_action":
-                return True
-        return False
-
-    def _is_always_free_method(self, node: ast.FunctionDef) -> bool:
-        """检查是否是@always_free装饰的方法，或 @action(always_free=True) 装饰的方法"""
-        for decorator in node.decorator_list:
-            # 检查 @action(always_free=True)
-            if isinstance(decorator, ast.Call):
-                func = decorator.func
-                if isinstance(func, ast.Name) and func.id == "action":
-                    for keyword in decorator.keywords:
-                        if keyword.arg == "always_free":
-                            if isinstance(keyword.value, Constant) and keyword.value.value is True:
-                                return True
-        return False
-
-    def _get_property_name_from_setter(self, node: ast.FunctionDef) -> str:
-        """从setter装饰器中获取属性名"""
-        for decorator in node.decorator_list:
-            if isinstance(decorator, ast.Attribute) and decorator.attr == "setter":
-                if isinstance(decorator.value, ast.Name):
-                    return decorator.value.id
-        return node.name
-
-    def get_class_info_static(self, module_class_path: str) -> Dict[str, Any]:
-        """
-        静态分析获取类的方法信息，不需要实际导入模块
-
-        Args:
-            module_class_path: 格式为 "module.path:ClassName" 的字符串
-
-        Returns:
-            包含类方法信息的字典
-        """
-        try:
-            if ":" not in module_class_path:
-                raise ValueError("module_class_path必须是 'module.path:ClassName' 格式")
-
-            module_path, class_name = module_class_path.rsplit(":", 1)
-
-            # 将模块路径转换为文件路径
-            file_path = self._module_path_to_file_path(module_path)
-            if not file_path or not os.path.exists(file_path):
-                logger.warning(f"找不到模块文件: {module_path} -> {file_path}")
-                return {}
-
-            # 解析源码
-            with open(file_path, "r", encoding="utf-8") as f:
-                source_code = f.read()
-
-            tree = ast.parse(source_code)
-
-            # 查找目标类
-            class_node = None
-            for node in ast.walk(tree):
-                if isinstance(node, ast.ClassDef) and node.name == class_name:
-                    class_node = node
-                    break
-
-            if not class_node:
-                logger.warning(f"在模块 {module_path} 中找不到类 {class_name}")
-                return {}
-
-            # 分析类的方法
-            methods_info = {}
-            for node in class_node.body:
-                if isinstance(node, ast.FunctionDef):
-                    method_info = self._analyze_method_node(node)
-                    methods_info[node.name] = method_info
-
-            return {
-                "class_name": class_name,
-                "module_path": module_path,
-                "file_path": file_path,
-                "methods": methods_info,
-            }
-
-        except Exception as e:
-            logger.error(f"静态分析类 {module_class_path} 时出错: {str(e)}")
-            return {}
-
    def _module_path_to_file_path(self, module_path: str) -> Optional[str]:
        for path in sys.path:
            potential_path = Path(path) / module_path.replace(".", "/")
@@ -572,222 +380,6 @@ class ImportManager:

        return None

-    def _analyze_method_node(self, node: ast.FunctionDef) -> Dict[str, Any]:
-        """分析方法节点，提取参数和返回类型信息"""
-        method_info = {
-            "name": node.name,
-            "args": [],
-            "return_type": None,
-            "is_async": isinstance(node, ast.AsyncFunctionDef),
-        }
-        # 获取默认值列表
-        defaults = node.args.defaults
-        num_defaults = len(defaults)
-
-        # 计算必需参数数量
-        total_args = len(node.args.args)
-        num_required = total_args - num_defaults
-
-        # 提取参数信息
-        for i, arg in enumerate(node.args.args):
-            if arg.arg == "self":
-                continue
-            # 跳过 sample_uuids 参数（由系统自动注入）
-            if arg.arg == PARAM_SAMPLE_UUIDS:
-                continue
-            arg_info = {
-                "name": arg.arg,
-                "type": None,
-                "default": None,
-                "required": i < num_required,
-            }
-
-            # 提取类型注解
-            if arg.annotation:
-                arg_info["type"] = ast.unparse(arg.annotation) if hasattr(ast, "unparse") else str(arg.annotation)
-
-            # 提取默认值并推断类型
-            if i >= num_required:
-                default_index = i - num_required
-                if default_index < len(defaults):
-                    default_value: Constant = defaults[default_index]  # type: ignore
-                    assert isinstance(default_value, Constant), "暂不支持对非常量类型进行推断，可反馈开源仓库"
-                    arg_info["default"] = default_value.value
-                    # 如果没有类型注解，尝试从默认值推断类型
-                    if not arg_info["type"]:
-                        arg_info["type"] = self._get_type_string(type(arg_info["default"]))
-            method_info["args"].append(arg_info)
-
-        # 提取返回类型
-        if node.returns:
-            method_info["return_type"] = ast.unparse(node.returns) if hasattr(ast, "unparse") else str(node.returns)
-
-        return method_info
-
-    def _infer_type_from_default(self, node: ast.AST) -> Optional[str]:
-        """从默认值推断参数类型"""
-        if isinstance(node, ast.Constant):
-            value = node.value
-            if isinstance(value, bool):
-                return "bool"
-            elif isinstance(value, int):
-                return "int"
-            elif isinstance(value, float):
-                return "float"
-            elif isinstance(value, str):
-                return "str"
-            elif value is None:
-                return "Optional[Any]"
-        elif isinstance(node, ast.List):
-            return "List"
-        elif isinstance(node, ast.Dict):
-            return "Dict"
-        elif isinstance(node, ast.Tuple):
-            return "Tuple"
-        elif isinstance(node, ast.Set):
-            return "Set"
-        elif isinstance(node, ast.Name):
-            # 常见的默认值模式
-            if node.id in ["None"]:
-                return "Optional[Any]"
-            elif node.id in ["True", "False"]:
-                return "bool"
-
-        return None
-
-    def _infer_types_from_docstring(self, method_info: Dict[str, Any]) -> None:
-        """从docstring中推断参数类型"""
-        docstring = method_info.get("docstring", "")
-        if not docstring:
-            return
-
-        lines = docstring.split("\n")
-        in_args_section = False
-
-        for line in lines:
-            line = line.strip()
-
-            # 检测Args或Arguments段落
-            if line.lower().startswith(("args:", "arguments:")):
-                in_args_section = True
-                continue
-            elif line.startswith(("returns:", "return:", "yields:", "raises:")):
-                in_args_section = False
-                continue
-            elif not line or not in_args_section:
-                continue
-
-            # 解析参数行，格式通常是: param_name (type): description 或 param_name: description
-            if ":" in line:
-                parts = line.split(":", 1)
-                param_part = parts[0].strip()
-
-                # 提取参数名和类型
-                param_name = None
-                param_type = None
-
-                if "(" in param_part and ")" in param_part:
-                    # 格式: param_name (type)
-                    param_name = param_part.split("(")[0].strip()
-                    type_part = param_part.split("(")[1].split(")")[0].strip()
-                    param_type = type_part
-                else:
-                    # 格式: param_name
-                    param_name = param_part
-
-                # 更新对应参数的类型信息
-                if param_name:
-                    for arg_info in method_info["args"]:
-                        if arg_info["name"] == param_name and not arg_info["type"]:
-                            if param_type:
-                                arg_info["inferred_type"] = param_type
-                            elif not arg_info["inferred_type"]:
-                                # 从描述中推断类型
-                                description = parts[1].strip().lower()
-                                if any(word in description for word in ["path", "file", "directory", "filename"]):
-                                    arg_info["inferred_type"] = "str"
-                                elif any(
-                                    word in description for word in ["port", "number", "count", "size", "length"]
-                                ):
-                                    arg_info["inferred_type"] = "int"
-                                elif any(
-                                    word in description for word in ["rate", "ratio", "percentage", "temperature"]
-                                ):
-                                    arg_info["inferred_type"] = "float"
-                                elif any(word in description for word in ["flag", "enable", "disable", "option"]):
-                                    arg_info["inferred_type"] = "bool"
-
-    def get_registry_class_info(self, module_class_path: str) -> Dict[str, Any]:
-        """
-        获取适用于注册表的类信息，包含完整的类型推断
-
-        Args:
-            module_class_path: 格式为 "module.path:ClassName" 的字符串
-
-        Returns:
-            适用于注册表的类信息字典
-        """
-        class_info = self.get_class_info_static(module_class_path)
-        if not class_info:
-            return {}
-
-        registry_info = {
-            "class_name": class_info["class_name"],
-            "module_path": class_info["module_path"],
-            "file_path": class_info["file_path"],
-            "methods": {},
-            "properties": [],
-            "init_params": {},
-            "action_methods": {},
-        }
-
-        for method_name, method_info in class_info["methods"].items():
-            # 分类处理不同类型的方法
-            if method_info["is_property"]:
-                registry_info["properties"].append(
-                    {
-                        "name": method_name,
-                        "return_type": method_info.get("return_type"),
-                        "docstring": method_info.get("docstring"),
-                    }
-                )
-            elif method_name == "__init__":
-                # 处理初始化参数
-                init_params = {}
-                for arg in method_info["args"]:
-                    if arg["name"] != "self":
-                        param_info = {
-                            "name": arg["name"],
-                            "type": arg.get("type") or arg.get("inferred_type"),
-                            "required": arg.get("is_required", True),
-                            "default": arg.get("default"),
-                        }
-                        init_params[arg["name"]] = param_info
-                registry_info["init_params"] = init_params
-            elif not method_name.startswith("_"):
-                # 处理公共方法（可能的action方法）
-                action_info = {
-                    "name": method_name,
-                    "params": {},
-                    "return_type": method_info.get("return_type"),
-                    "docstring": method_info.get("docstring"),
-                    "num_required": method_info.get("num_required", 0) - 1,  # 减去self
-                    "num_defaults": method_info.get("num_defaults", 0),
-                }
-
-                for arg in method_info["args"]:
-                    if arg["name"] != "self":
-                        param_info = {
-                            "name": arg["name"],
-                            "type": arg.get("type") or arg.get("inferred_type"),
-                            "required": arg.get("is_required", True),
-                            "default": arg.get("default"),
-                        }
-                        action_info["params"][arg["name"]] = param_info
-
-                registry_info["action_methods"][method_name] = action_info
-
-        return registry_info


 # 全局实例，便于直接使用
@@ -815,16 +407,6 @@ def init_from_list(module_list: List[str]) -> None:
    default_manager = ImportManager(module_list)


-def get_class_info_static(module_class_path: str) -> Dict[str, Any]:
-    """静态分析获取类信息的便捷函数"""
-    return default_manager.get_class_info_static(module_class_path)
-
-
-def get_registry_class_info(module_class_path: str) -> Dict[str, Any]:
-    """获取适用于注册表的类信息的便捷函数"""
-    return default_manager.get_registry_class_info(module_class_path)
-
-
-def get_enhanced_class_info(module_path: str, use_dynamic: bool = True) -> Dict[str, Any]:
+def get_enhanced_class_info(module_path: str, **kwargs) -> Dict[str, Any]:
    """获取增强的类信息的便捷函数"""
-    return default_manager.get_enhanced_class_info(module_path, use_dynamic)
+    return default_manager.get_enhanced_class_info(module_path, **kwargs)