389 lines
13 KiB
Python
389 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
InsightFlow Phase 8 Task 4 测试脚本
|
|
测试 AI 能力增强功能
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
|
|
from ai_manager import ModelType, PredictionType, get_ai_manager
|
|
|
|
# Add backend directory to path
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
def test_custom_model():
|
|
"""测试自定义模型功能"""
|
|
print("\n=== 测试自定义模型 ===")
|
|
|
|
manager = get_ai_manager()
|
|
|
|
# 1. 创建自定义模型
|
|
print("1. 创建自定义模型...")
|
|
model = manager.create_custom_model(
|
|
tenant_id="tenant_001",
|
|
name="领域实体识别模型",
|
|
description="用于识别医疗领域实体的自定义模型",
|
|
model_type=ModelType.CUSTOM_NER,
|
|
training_data={
|
|
"entity_types": ["DISEASE", "SYMPTOM", "DRUG", "TREATMENT"],
|
|
"domain": "medical"
|
|
},
|
|
hyperparameters={
|
|
"epochs": 15,
|
|
"learning_rate": 0.001,
|
|
"batch_size": 32
|
|
},
|
|
created_by="user_001"
|
|
)
|
|
print(f" 创建成功: {model.id}, 状态: {model.status.value}")
|
|
|
|
# 2. 添加训练样本
|
|
print("2. 添加训练样本...")
|
|
samples = [
|
|
{
|
|
"text": "患者张三患有高血压,正在服用降压药治疗。",
|
|
"entities": [
|
|
{"start": 2, "end": 4, "label": "PERSON", "text": "张三"},
|
|
{"start": 6, "end": 9, "label": "DISEASE", "text": "高血压"},
|
|
{"start": 14, "end": 17, "label": "DRUG", "text": "降压药"}
|
|
]
|
|
},
|
|
{
|
|
"text": "李四因感冒发烧到医院就诊,医生开具了退烧药。",
|
|
"entities": [
|
|
{"start": 0, "end": 2, "label": "PERSON", "text": "李四"},
|
|
{"start": 3, "end": 5, "label": "SYMPTOM", "text": "感冒"},
|
|
{"start": 5, "end": 7, "label": "SYMPTOM", "text": "发烧"},
|
|
{"start": 21, "end": 24, "label": "DRUG", "text": "退烧药"}
|
|
]
|
|
},
|
|
{
|
|
"text": "王五接受了心脏搭桥手术,术后恢复良好。",
|
|
"entities": [
|
|
{"start": 0, "end": 2, "label": "PERSON", "text": "王五"},
|
|
{"start": 5, "end": 11, "label": "TREATMENT", "text": "心脏搭桥手术"}
|
|
]
|
|
}
|
|
]
|
|
|
|
for sample_data in samples:
|
|
sample = manager.add_training_sample(
|
|
model_id=model.id,
|
|
text=sample_data["text"],
|
|
entities=sample_data["entities"],
|
|
metadata={"source": "manual"}
|
|
)
|
|
print(f" 添加样本: {sample.id}")
|
|
|
|
# 3. 获取训练样本
|
|
print("3. 获取训练样本...")
|
|
all_samples = manager.get_training_samples(model.id)
|
|
print(f" 共有 {len(all_samples)} 个训练样本")
|
|
|
|
# 4. 列出自定义模型
|
|
print("4. 列出自定义模型...")
|
|
models = manager.list_custom_models(tenant_id="tenant_001")
|
|
print(f" 找到 {len(models)} 个模型")
|
|
for m in models:
|
|
print(f" - {m.name} ({m.model_type.value}): {m.status.value}")
|
|
|
|
return model.id
|
|
|
|
async def test_train_and_predict(model_id: str):
|
|
"""测试训练和预测"""
|
|
print("\n=== 测试模型训练和预测 ===")
|
|
|
|
manager = get_ai_manager()
|
|
|
|
# 1. 训练模型
|
|
print("1. 训练模型...")
|
|
try:
|
|
trained_model = await manager.train_custom_model(model_id)
|
|
print(f" 训练完成: {trained_model.status.value}")
|
|
print(f" 指标: {trained_model.metrics}")
|
|
except Exception as e:
|
|
print(f" 训练失败: {e}")
|
|
return
|
|
|
|
# 2. 使用模型预测
|
|
print("2. 使用模型预测...")
|
|
test_text = "赵六患有糖尿病,正在使用胰岛素治疗。"
|
|
try:
|
|
entities = await manager.predict_with_custom_model(model_id, test_text)
|
|
print(f" 输入: {test_text}")
|
|
print(f" 预测实体: {entities}")
|
|
except Exception as e:
|
|
print(f" 预测失败: {e}")
|
|
|
|
def test_prediction_models():
|
|
"""测试预测模型"""
|
|
print("\n=== 测试预测模型 ===")
|
|
|
|
manager = get_ai_manager()
|
|
|
|
# 1. 创建趋势预测模型
|
|
print("1. 创建趋势预测模型...")
|
|
trend_model = manager.create_prediction_model(
|
|
tenant_id="tenant_001",
|
|
project_id="project_001",
|
|
name="实体数量趋势预测",
|
|
prediction_type=PredictionType.TREND,
|
|
target_entity_type="PERSON",
|
|
features=["entity_count", "time_period", "document_count"],
|
|
model_config={
|
|
"algorithm": "linear_regression",
|
|
"window_size": 7
|
|
}
|
|
)
|
|
print(f" 创建成功: {trend_model.id}")
|
|
|
|
# 2. 创建异常检测模型
|
|
print("2. 创建异常检测模型...")
|
|
anomaly_model = manager.create_prediction_model(
|
|
tenant_id="tenant_001",
|
|
project_id="project_001",
|
|
name="实体增长异常检测",
|
|
prediction_type=PredictionType.ANOMALY,
|
|
target_entity_type=None,
|
|
features=["daily_growth", "weekly_growth"],
|
|
model_config={
|
|
"threshold": 2.5,
|
|
"sensitivity": "medium"
|
|
}
|
|
)
|
|
print(f" 创建成功: {anomaly_model.id}")
|
|
|
|
# 3. 列出预测模型
|
|
print("3. 列出预测模型...")
|
|
models = manager.list_prediction_models(tenant_id="tenant_001")
|
|
print(f" 找到 {len(models)} 个预测模型")
|
|
for m in models:
|
|
print(f" - {m.name} ({m.prediction_type.value})")
|
|
|
|
return trend_model.id, anomaly_model.id
|
|
|
|
async def test_predictions(trend_model_id: str, anomaly_model_id: str):
|
|
"""测试预测功能"""
|
|
print("\n=== 测试预测功能 ===")
|
|
|
|
manager = get_ai_manager()
|
|
|
|
# 1. 训练趋势预测模型
|
|
print("1. 训练趋势预测模型...")
|
|
historical_data = [
|
|
{"date": "2024-01-01", "value": 10},
|
|
{"date": "2024-01-02", "value": 12},
|
|
{"date": "2024-01-03", "value": 15},
|
|
{"date": "2024-01-04", "value": 14},
|
|
{"date": "2024-01-05", "value": 18},
|
|
{"date": "2024-01-06", "value": 20},
|
|
{"date": "2024-01-07", "value": 22}
|
|
]
|
|
trained = await manager.train_prediction_model(trend_model_id, historical_data)
|
|
print(f" 训练完成,准确率: {trained.accuracy}")
|
|
|
|
# 2. 趋势预测
|
|
print("2. 趋势预测...")
|
|
trend_result = await manager.predict(
|
|
trend_model_id,
|
|
{"historical_values": [10, 12, 15, 14, 18, 20, 22]}
|
|
)
|
|
print(f" 预测结果: {trend_result.prediction_data}")
|
|
|
|
# 3. 异常检测
|
|
print("3. 异常检测...")
|
|
anomaly_result = await manager.predict(
|
|
anomaly_model_id,
|
|
{
|
|
"value": 50,
|
|
"historical_values": [10, 12, 11, 13, 12, 14, 13]
|
|
}
|
|
)
|
|
print(f" 检测结果: {anomaly_result.prediction_data}")
|
|
|
|
def test_kg_rag():
|
|
"""测试知识图谱 RAG"""
|
|
print("\n=== 测试知识图谱 RAG ===")
|
|
|
|
manager = get_ai_manager()
|
|
|
|
# 创建 RAG 配置
|
|
print("1. 创建知识图谱 RAG 配置...")
|
|
rag = manager.create_kg_rag(
|
|
tenant_id="tenant_001",
|
|
project_id="project_001",
|
|
name="项目知识问答",
|
|
description="基于项目知识图谱的智能问答",
|
|
kg_config={
|
|
"entity_types": ["PERSON", "ORG", "PROJECT", "TECH"],
|
|
"relation_types": ["works_with", "belongs_to", "depends_on"]
|
|
},
|
|
retrieval_config={
|
|
"top_k": 5,
|
|
"similarity_threshold": 0.7,
|
|
"expand_relations": True
|
|
},
|
|
generation_config={
|
|
"temperature": 0.3,
|
|
"max_tokens": 1000,
|
|
"include_sources": True
|
|
}
|
|
)
|
|
print(f" 创建成功: {rag.id}")
|
|
|
|
# 列出 RAG 配置
|
|
print("2. 列出 RAG 配置...")
|
|
rags = manager.list_kg_rags(tenant_id="tenant_001")
|
|
print(f" 找到 {len(rags)} 个配置")
|
|
|
|
return rag.id
|
|
|
|
async def test_kg_rag_query(rag_id: str):
|
|
"""测试 RAG 查询"""
|
|
print("\n=== 测试知识图谱 RAG 查询 ===")
|
|
|
|
manager = get_ai_manager()
|
|
|
|
# 模拟项目实体和关系
|
|
project_entities = [
|
|
{"id": "e1", "name": "张三", "type": "PERSON", "definition": "项目经理"},
|
|
{"id": "e2", "name": "李四", "type": "PERSON", "definition": "技术负责人"},
|
|
{"id": "e3", "name": "Project Alpha", "type": "PROJECT", "definition": "核心产品项目"},
|
|
{"id": "e4", "name": "Kubernetes", "type": "TECH", "definition": "容器编排平台"},
|
|
{"id": "e5", "name": "TechCorp", "type": "ORG", "definition": "科技公司"}
|
|
]
|
|
|
|
project_relations = [{"source_entity_id": "e1",
|
|
"target_entity_id": "e3",
|
|
"source_name": "张三",
|
|
"target_name": "Project Alpha",
|
|
"relation_type": "works_with",
|
|
"evidence": "张三负责 Project Alpha 的管理工作"},
|
|
{"source_entity_id": "e2",
|
|
"target_entity_id": "e3",
|
|
"source_name": "李四",
|
|
"target_name": "Project Alpha",
|
|
"relation_type": "works_with",
|
|
"evidence": "李四负责 Project Alpha 的技术架构"},
|
|
{"source_entity_id": "e3",
|
|
"target_entity_id": "e4",
|
|
"source_name": "Project Alpha",
|
|
"target_name": "Kubernetes",
|
|
"relation_type": "depends_on",
|
|
"evidence": "项目使用 Kubernetes 进行部署"},
|
|
{"source_entity_id": "e1",
|
|
"target_entity_id": "e5",
|
|
"source_name": "张三",
|
|
"target_name": "TechCorp",
|
|
"relation_type": "belongs_to",
|
|
"evidence": "张三是 TechCorp 的员工"}]
|
|
|
|
# 执行查询
|
|
print("1. 执行 RAG 查询...")
|
|
query_text = "Project Alpha 项目有哪些人参与?使用了什么技术?"
|
|
|
|
try:
|
|
result = await manager.query_kg_rag(
|
|
rag_id=rag_id,
|
|
query=query_text,
|
|
project_entities=project_entities,
|
|
project_relations=project_relations
|
|
)
|
|
|
|
print(f" 查询: {result.query}")
|
|
print(f" 回答: {result.answer[:200]}...")
|
|
print(f" 置信度: {result.confidence}")
|
|
print(f" 来源: {len(result.sources)} 个实体")
|
|
print(f" 延迟: {result.latency_ms}ms")
|
|
except Exception as e:
|
|
print(f" 查询失败: {e}")
|
|
|
|
async def test_smart_summary():
|
|
"""测试智能摘要"""
|
|
print("\n=== 测试智能摘要 ===")
|
|
|
|
manager = get_ai_manager()
|
|
|
|
# 模拟转录文本
|
|
transcript_text = """
|
|
今天的会议主要讨论了 Project Alpha 的进展情况。张三作为项目经理,
|
|
汇报了当前的项目进度,表示已经完成了 80% 的开发工作。李四提出了
|
|
一些关于 Kubernetes 部署的问题,建议我们采用新的部署策略。
|
|
会议还讨论了下一步的工作计划,包括测试、文档编写和上线准备。
|
|
大家一致认为项目进展顺利,预计可以按时交付。
|
|
"""
|
|
|
|
content_data = {
|
|
"text": transcript_text,
|
|
"entities": [
|
|
{"name": "张三", "type": "PERSON"},
|
|
{"name": "李四", "type": "PERSON"},
|
|
{"name": "Project Alpha", "type": "PROJECT"},
|
|
{"name": "Kubernetes", "type": "TECH"}
|
|
]
|
|
}
|
|
|
|
# 生成不同类型的摘要
|
|
summary_types = ["extractive", "abstractive", "key_points"]
|
|
|
|
for summary_type in summary_types:
|
|
print(f"1. 生成 {summary_type} 类型摘要...")
|
|
try:
|
|
summary = await manager.generate_smart_summary(
|
|
tenant_id="tenant_001",
|
|
project_id="project_001",
|
|
source_type="transcript",
|
|
source_id="transcript_001",
|
|
summary_type=summary_type,
|
|
content_data=content_data
|
|
)
|
|
|
|
print(f" 摘要类型: {summary.summary_type}")
|
|
print(f" 内容: {summary.content[:150]}...")
|
|
print(f" 关键要点: {summary.key_points[:3]}")
|
|
print(f" 置信度: {summary.confidence}")
|
|
except Exception as e:
|
|
print(f" 生成失败: {e}")
|
|
|
|
async def main():
|
|
"""主测试函数"""
|
|
print("=" * 60)
|
|
print("InsightFlow Phase 8 Task 4 - AI 能力增强测试")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
# 测试自定义模型
|
|
model_id = test_custom_model()
|
|
|
|
# 测试训练和预测
|
|
await test_train_and_predict(model_id)
|
|
|
|
# 测试预测模型
|
|
trend_model_id, anomaly_model_id = test_prediction_models()
|
|
|
|
# 测试预测功能
|
|
await test_predictions(trend_model_id, anomaly_model_id)
|
|
|
|
# 测试知识图谱 RAG
|
|
rag_id = test_kg_rag()
|
|
|
|
# 测试 RAG 查询
|
|
await test_kg_rag_query(rag_id)
|
|
|
|
# 测试智能摘要
|
|
await test_smart_summary()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("所有测试完成!")
|
|
print("=" * 60)
|
|
|
|
except Exception as e:
|
|
print(f"\n测试失败: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|