Files
Uni-Lab-OS/.cursor/skills/batch-submit-experiment/scripts/gen_notebook_params.py
2026-03-25 00:42:28 +08:00

395 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
从 workflow 模板详情 + 本地设备注册表生成 notebook 提交用的 node_params 模板。
用法:
python gen_notebook_params.py --auth <token> --base <url> --workflow-uuid <uuid> [选项]
选项:
--auth <token> Lab tokenbase64(ak:sk) 的结果,不含 "Lab " 前缀)
--base <url> API 基础 URL如 https://uni-lab.test.bohrium.com
--workflow-uuid <uuid> 目标 workflow 的 UUID
--registry <path> 本地注册表文件路径(默认自动搜索)
--rounds <n> 实验轮次数(默认 1
--output <path> 输出模板文件路径(默认 notebook_template.json
--dump-response 打印 workflow detail API 的原始响应(调试用)
示例:
python gen_notebook_params.py \\
--auth YTFmZDlkNGUtxxxx \\
--base https://uni-lab.test.bohrium.com \\
--workflow-uuid abc-123-def \\
--rounds 2
"""
import copy
import json
import os
import sys
from datetime import datetime
from urllib.request import Request, urlopen
from urllib.error import HTTPError, URLError
REGISTRY_FILENAME = "req_device_registry_upload.json"
def find_registry(explicit_path=None):
"""查找本地注册表文件,逻辑同 extract_device_actions.py"""
if explicit_path:
if os.path.isfile(explicit_path):
return explicit_path
if os.path.isdir(explicit_path):
fp = os.path.join(explicit_path, REGISTRY_FILENAME)
if os.path.isfile(fp):
return fp
print(f"警告: 指定的注册表路径不存在: {explicit_path}")
return None
candidates = [
os.path.join("unilabos_data", REGISTRY_FILENAME),
REGISTRY_FILENAME,
]
for c in candidates:
if os.path.isfile(c):
return c
script_dir = os.path.dirname(os.path.abspath(__file__))
workspace_root = os.path.normpath(os.path.join(script_dir, "..", "..", ".."))
for c in candidates:
path = os.path.join(workspace_root, c)
if os.path.isfile(path):
return path
cwd = os.getcwd()
for _ in range(5):
parent = os.path.dirname(cwd)
if parent == cwd:
break
cwd = parent
for c in candidates:
path = os.path.join(cwd, c)
if os.path.isfile(path):
return path
return None
def load_registry(path):
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def build_registry_index(registry_data):
"""构建 device_id → action_value_mappings 的索引"""
index = {}
for res in registry_data.get("resources", []):
rid = res.get("id", "")
avm = res.get("class", {}).get("action_value_mappings", {})
if rid and avm:
index[rid] = avm
return index
def flatten_goal_schema(action_data):
"""从 action_value_mappings 条目中提取 goal 层的 schema"""
schema = action_data.get("schema", {})
goal_schema = schema.get("properties", {}).get("goal", {})
return goal_schema if goal_schema else schema
def build_param_template(goal_schema):
"""根据 goal schema 生成 param 模板,含类型标注"""
properties = goal_schema.get("properties", {})
required = set(goal_schema.get("required", []))
template = {}
for field_name, field_def in properties.items():
if field_name == "unilabos_device_id":
continue
ftype = field_def.get("type", "any")
default = field_def.get("default")
if default is not None:
template[field_name] = default
elif ftype == "string":
template[field_name] = f"$TODO ({ftype}, {'required' if field_name in required else 'optional'})"
elif ftype == "number" or ftype == "integer":
template[field_name] = 0
elif ftype == "boolean":
template[field_name] = False
elif ftype == "array":
template[field_name] = []
elif ftype == "object":
template[field_name] = {}
else:
template[field_name] = f"$TODO ({ftype})"
return template
def fetch_workflow_detail(base_url, auth_token, workflow_uuid):
"""调用 workflow detail API"""
url = f"{base_url}/api/v1/lab/workflow/template/detail/{workflow_uuid}"
req = Request(url, method="GET")
req.add_header("Authorization", f"Lab {auth_token}")
try:
with urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode("utf-8"))
except HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
print(f"API 错误 {e.code}: {body}")
return None
except URLError as e:
print(f"网络错误: {e.reason}")
return None
def extract_nodes_from_response(response):
"""
从 workflow detail 响应中提取 action 节点列表。
适配多种可能的响应格式。
返回: [(node_uuid, resource_template_name, node_template_name, existing_param), ...]
"""
data = response.get("data", response)
search_keys = ["nodes", "workflow_nodes", "node_list", "steps"]
nodes_raw = None
for key in search_keys:
if key in data and isinstance(data[key], list):
nodes_raw = data[key]
break
if nodes_raw is None:
if isinstance(data, list):
nodes_raw = data
else:
for v in data.values():
if isinstance(v, list) and len(v) > 0 and isinstance(v[0], dict):
nodes_raw = v
break
if not nodes_raw:
print("警告: 未能从响应中提取节点列表")
print("响应顶层 keys:", list(data.keys()) if isinstance(data, dict) else type(data).__name__)
return []
result = []
for node in nodes_raw:
if not isinstance(node, dict):
continue
node_uuid = (
node.get("uuid")
or node.get("node_uuid")
or node.get("id")
or ""
)
resource_name = (
node.get("resource_template_name")
or node.get("device_id")
or node.get("resource_name")
or node.get("device_name")
or ""
)
template_name = (
node.get("node_template_name")
or node.get("action_name")
or node.get("template_name")
or node.get("action")
or node.get("name")
or ""
)
existing_param = node.get("param", {}) or {}
if node_uuid:
result.append((node_uuid, resource_name, template_name, existing_param))
return result
def generate_template(nodes, registry_index, rounds):
"""生成 notebook 提交模板"""
node_params = []
schema_info = {}
datas_template = []
for node_uuid, resource_name, template_name, existing_param in nodes:
param_template = {}
matched = False
if resource_name and template_name and resource_name in registry_index:
avm = registry_index[resource_name]
if template_name in avm:
goal_schema = flatten_goal_schema(avm[template_name])
param_template = build_param_template(goal_schema)
goal_default = avm[template_name].get("goal_default", {})
if goal_default:
for k, v in goal_default.items():
if k in param_template and v is not None:
param_template[k] = v
matched = True
schema_info[node_uuid] = {
"device_id": resource_name,
"action_name": template_name,
"action_type": avm[template_name].get("type", ""),
"schema_properties": list(goal_schema.get("properties", {}).keys()),
"required": goal_schema.get("required", []),
}
if not matched and existing_param:
param_template = existing_param
if not matched and not existing_param:
schema_info[node_uuid] = {
"device_id": resource_name,
"action_name": template_name,
"warning": "未在本地注册表中找到匹配的 action schema",
}
datas_template.append({
"node_uuid": node_uuid,
"param": param_template,
"sample_params": [
{
"container_uuid": "$TODO_CONTAINER_UUID",
"sample_value": {
"liquid_names": "$TODO_LIQUID_NAME",
"volumes": 0,
},
}
],
})
for i in range(rounds):
node_params.append({
"sample_uuids": f"$TODO_SAMPLE_UUID_ROUND_{i + 1}",
"datas": copy.deepcopy(datas_template),
})
return {
"lab_uuid": "$TODO_LAB_UUID",
"workflow_uuid": "$TODO_WORKFLOW_UUID",
"name": "$TODO_EXPERIMENT_NAME",
"node_params": node_params,
"_schema_info仅参考提交时删除": schema_info,
}
def parse_args(argv):
"""简单的参数解析"""
opts = {
"auth": None,
"base": None,
"workflow_uuid": None,
"registry": None,
"rounds": 1,
"output": "notebook_template.json",
"dump_response": False,
}
i = 0
while i < len(argv):
arg = argv[i]
if arg == "--auth" and i + 1 < len(argv):
opts["auth"] = argv[i + 1]
i += 2
elif arg == "--base" and i + 1 < len(argv):
opts["base"] = argv[i + 1].rstrip("/")
i += 2
elif arg == "--workflow-uuid" and i + 1 < len(argv):
opts["workflow_uuid"] = argv[i + 1]
i += 2
elif arg == "--registry" and i + 1 < len(argv):
opts["registry"] = argv[i + 1]
i += 2
elif arg == "--rounds" and i + 1 < len(argv):
opts["rounds"] = int(argv[i + 1])
i += 2
elif arg == "--output" and i + 1 < len(argv):
opts["output"] = argv[i + 1]
i += 2
elif arg == "--dump-response":
opts["dump_response"] = True
i += 1
else:
print(f"未知参数: {arg}")
i += 1
return opts
def main():
opts = parse_args(sys.argv[1:])
if not opts["auth"] or not opts["base"] or not opts["workflow_uuid"]:
print("用法:")
print(" python gen_notebook_params.py --auth <token> --base <url> --workflow-uuid <uuid> [选项]")
print()
print("必需参数:")
print(" --auth <token> Lab tokenbase64(ak:sk)")
print(" --base <url> API 基础 URL")
print(" --workflow-uuid <uuid> 目标 workflow UUID")
print()
print("可选参数:")
print(" --registry <path> 注册表文件路径(默认自动搜索)")
print(" --rounds <n> 实验轮次数(默认 1")
print(" --output <path> 输出文件路径(默认 notebook_template.json")
print(" --dump-response 打印 API 原始响应")
sys.exit(1)
# 1. 查找并加载本地注册表
registry_path = find_registry(opts["registry"])
registry_index = {}
if registry_path:
mtime = os.path.getmtime(registry_path)
gen_time = datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
print(f"注册表: {registry_path} (生成时间: {gen_time})")
registry_data = load_registry(registry_path)
registry_index = build_registry_index(registry_data)
print(f"已索引 {len(registry_index)} 个设备的 action schemas")
else:
print("警告: 未找到本地注册表,将跳过 param 模板生成")
print(" 提交时需要手动填写各节点的 param 字段")
# 2. 获取 workflow 详情
print(f"\n正在获取 workflow 详情: {opts['workflow_uuid']}")
response = fetch_workflow_detail(opts["base"], opts["auth"], opts["workflow_uuid"])
if not response:
print("错误: 无法获取 workflow 详情")
sys.exit(1)
if opts["dump_response"]:
print("\n=== API 原始响应 ===")
print(json.dumps(response, indent=2, ensure_ascii=False)[:5000])
print("=== 响应结束(截断至 5000 字符) ===\n")
# 3. 提取节点
nodes = extract_nodes_from_response(response)
if not nodes:
print("错误: 未能从 workflow 中提取任何 action 节点")
print("请使用 --dump-response 查看原始响应结构")
sys.exit(1)
print(f"\n找到 {len(nodes)} 个 action 节点:")
print(f" {'节点 UUID':<40} {'设备 ID':<30} {'动作名':<25} {'Schema'}")
print(" " + "-" * 110)
for node_uuid, resource_name, template_name, _ in nodes:
matched = "" if (resource_name in registry_index and
template_name in registry_index.get(resource_name, {})) else ""
print(f" {node_uuid:<40} {resource_name:<30} {template_name:<25} {matched}")
# 4. 生成模板
template = generate_template(nodes, registry_index, opts["rounds"])
template["workflow_uuid"] = opts["workflow_uuid"]
output_path = opts["output"]
with open(output_path, "w", encoding="utf-8") as f:
json.dump(template, f, indent=2, ensure_ascii=False)
print(f"\n模板已写入: {output_path}")
print(f" 轮次数: {opts['rounds']}")
print(f" 节点数/轮: {len(nodes)}")
print()
print("下一步:")
print(" 1. 打开模板文件,将 $TODO 占位符替换为实际值")
print(" 2. 删除 _schema_info 字段(仅供参考)")
print(" 3. 使用 POST /api/v1/lab/notebook 提交")
if __name__ == "__main__":
main()