#!/usr/bin/env python3 """ InsightFlow Phase 8 Task 4 测试脚本 测试 AI 能力增强功能 """ from ai_manager import ( get_ai_manager, ModelType, PredictionType ) import asyncio import sys import os # Add backend directory to path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) def test_custom_model(): """测试自定义模型功能""" print("\n=== 测试自定义模型 ===") manager = get_ai_manager() # 1. 创建自定义模型 print("1. 创建自定义模型...") model = manager.create_custom_model( tenant_id="tenant_001", name="领域实体识别模型", description="用于识别医疗领域实体的自定义模型", model_type=ModelType.CUSTOM_NER, training_data={ "entity_types": ["DISEASE", "SYMPTOM", "DRUG", "TREATMENT"], "domain": "medical" }, hyperparameters={ "epochs": 15, "learning_rate": 0.001, "batch_size": 32 }, created_by="user_001" ) print(f" 创建成功: {model.id}, 状态: {model.status.value}") # 2. 添加训练样本 print("2. 添加训练样本...") samples = [ { "text": "患者张三患有高血压,正在服用降压药治疗。", "entities": [ {"start": 2, "end": 4, "label": "PERSON", "text": "张三"}, {"start": 6, "end": 9, "label": "DISEASE", "text": "高血压"}, {"start": 14, "end": 17, "label": "DRUG", "text": "降压药"} ] }, { "text": "李四因感冒发烧到医院就诊,医生开具了退烧药。", "entities": [ {"start": 0, "end": 2, "label": "PERSON", "text": "李四"}, {"start": 3, "end": 5, "label": "SYMPTOM", "text": "感冒"}, {"start": 5, "end": 7, "label": "SYMPTOM", "text": "发烧"}, {"start": 21, "end": 24, "label": "DRUG", "text": "退烧药"} ] }, { "text": "王五接受了心脏搭桥手术,术后恢复良好。", "entities": [ {"start": 0, "end": 2, "label": "PERSON", "text": "王五"}, {"start": 5, "end": 11, "label": "TREATMENT", "text": "心脏搭桥手术"} ] } ] for sample_data in samples: sample = manager.add_training_sample( model_id=model.id, text=sample_data["text"], entities=sample_data["entities"], metadata={"source": "manual"} ) print(f" 添加样本: {sample.id}") # 3. 获取训练样本 print("3. 获取训练样本...") all_samples = manager.get_training_samples(model.id) print(f" 共有 {len(all_samples)} 个训练样本") # 4. 列出自定义模型 print("4. 列出自定义模型...") models = manager.list_custom_models(tenant_id="tenant_001") print(f" 找到 {len(models)} 个模型") for m in models: print(f" - {m.name} ({m.model_type.value}): {m.status.value}") return model.id async def test_train_and_predict(model_id: str): """测试训练和预测""" print("\n=== 测试模型训练和预测 ===") manager = get_ai_manager() # 1. 训练模型 print("1. 训练模型...") try: trained_model = await manager.train_custom_model(model_id) print(f" 训练完成: {trained_model.status.value}") print(f" 指标: {trained_model.metrics}") except Exception as e: print(f" 训练失败: {e}") return # 2. 使用模型预测 print("2. 使用模型预测...") test_text = "赵六患有糖尿病,正在使用胰岛素治疗。" try: entities = await manager.predict_with_custom_model(model_id, test_text) print(f" 输入: {test_text}") print(f" 预测实体: {entities}") except Exception as e: print(f" 预测失败: {e}") def test_prediction_models(): """测试预测模型""" print("\n=== 测试预测模型 ===") manager = get_ai_manager() # 1. 创建趋势预测模型 print("1. 创建趋势预测模型...") trend_model = manager.create_prediction_model( tenant_id="tenant_001", project_id="project_001", name="实体数量趋势预测", prediction_type=PredictionType.TREND, target_entity_type="PERSON", features=["entity_count", "time_period", "document_count"], model_config={ "algorithm": "linear_regression", "window_size": 7 } ) print(f" 创建成功: {trend_model.id}") # 2. 创建异常检测模型 print("2. 创建异常检测模型...") anomaly_model = manager.create_prediction_model( tenant_id="tenant_001", project_id="project_001", name="实体增长异常检测", prediction_type=PredictionType.ANOMALY, target_entity_type=None, features=["daily_growth", "weekly_growth"], model_config={ "threshold": 2.5, "sensitivity": "medium" } ) print(f" 创建成功: {anomaly_model.id}") # 3. 列出预测模型 print("3. 列出预测模型...") models = manager.list_prediction_models(tenant_id="tenant_001") print(f" 找到 {len(models)} 个预测模型") for m in models: print(f" - {m.name} ({m.prediction_type.value})") return trend_model.id, anomaly_model.id async def test_predictions(trend_model_id: str, anomaly_model_id: str): """测试预测功能""" print("\n=== 测试预测功能 ===") manager = get_ai_manager() # 1. 训练趋势预测模型 print("1. 训练趋势预测模型...") historical_data = [ {"date": "2024-01-01", "value": 10}, {"date": "2024-01-02", "value": 12}, {"date": "2024-01-03", "value": 15}, {"date": "2024-01-04", "value": 14}, {"date": "2024-01-05", "value": 18}, {"date": "2024-01-06", "value": 20}, {"date": "2024-01-07", "value": 22} ] trained = await manager.train_prediction_model(trend_model_id, historical_data) print(f" 训练完成,准确率: {trained.accuracy}") # 2. 趋势预测 print("2. 趋势预测...") trend_result = await manager.predict( trend_model_id, {"historical_values": [10, 12, 15, 14, 18, 20, 22]} ) print(f" 预测结果: {trend_result.prediction_data}") # 3. 异常检测 print("3. 异常检测...") anomaly_result = await manager.predict( anomaly_model_id, { "value": 50, "historical_values": [10, 12, 11, 13, 12, 14, 13] } ) print(f" 检测结果: {anomaly_result.prediction_data}") def test_kg_rag(): """测试知识图谱 RAG""" print("\n=== 测试知识图谱 RAG ===") manager = get_ai_manager() # 创建 RAG 配置 print("1. 创建知识图谱 RAG 配置...") rag = manager.create_kg_rag( tenant_id="tenant_001", project_id="project_001", name="项目知识问答", description="基于项目知识图谱的智能问答", kg_config={ "entity_types": ["PERSON", "ORG", "PROJECT", "TECH"], "relation_types": ["works_with", "belongs_to", "depends_on"] }, retrieval_config={ "top_k": 5, "similarity_threshold": 0.7, "expand_relations": True }, generation_config={ "temperature": 0.3, "max_tokens": 1000, "include_sources": True } ) print(f" 创建成功: {rag.id}") # 列出 RAG 配置 print("2. 列出 RAG 配置...") rags = manager.list_kg_rags(tenant_id="tenant_001") print(f" 找到 {len(rags)} 个配置") return rag.id async def test_kg_rag_query(rag_id: str): """测试 RAG 查询""" print("\n=== 测试知识图谱 RAG 查询 ===") manager = get_ai_manager() # 模拟项目实体和关系 project_entities = [ {"id": "e1", "name": "张三", "type": "PERSON", "definition": "项目经理"}, {"id": "e2", "name": "李四", "type": "PERSON", "definition": "技术负责人"}, {"id": "e3", "name": "Project Alpha", "type": "PROJECT", "definition": "核心产品项目"}, {"id": "e4", "name": "Kubernetes", "type": "TECH", "definition": "容器编排平台"}, {"id": "e5", "name": "TechCorp", "type": "ORG", "definition": "科技公司"} ] project_relations = [{"source_entity_id": "e1", "target_entity_id": "e3", "source_name": "张三", "target_name": "Project Alpha", "relation_type": "works_with", "evidence": "张三负责 Project Alpha 的管理工作"}, {"source_entity_id": "e2", "target_entity_id": "e3", "source_name": "李四", "target_name": "Project Alpha", "relation_type": "works_with", "evidence": "李四负责 Project Alpha 的技术架构"}, {"source_entity_id": "e3", "target_entity_id": "e4", "source_name": "Project Alpha", "target_name": "Kubernetes", "relation_type": "depends_on", "evidence": "项目使用 Kubernetes 进行部署"}, {"source_entity_id": "e1", "target_entity_id": "e5", "source_name": "张三", "target_name": "TechCorp", "relation_type": "belongs_to", "evidence": "张三是 TechCorp 的员工"}] # 执行查询 print("1. 执行 RAG 查询...") query_text = "Project Alpha 项目有哪些人参与?使用了什么技术?" try: result = await manager.query_kg_rag( rag_id=rag_id, query=query_text, project_entities=project_entities, project_relations=project_relations ) print(f" 查询: {result.query}") print(f" 回答: {result.answer[:200]}...") print(f" 置信度: {result.confidence}") print(f" 来源: {len(result.sources)} 个实体") print(f" 延迟: {result.latency_ms}ms") except Exception as e: print(f" 查询失败: {e}") async def test_smart_summary(): """测试智能摘要""" print("\n=== 测试智能摘要 ===") manager = get_ai_manager() # 模拟转录文本 transcript_text = """ 今天的会议主要讨论了 Project Alpha 的进展情况。张三作为项目经理, 汇报了当前的项目进度,表示已经完成了 80% 的开发工作。李四提出了 一些关于 Kubernetes 部署的问题,建议我们采用新的部署策略。 会议还讨论了下一步的工作计划,包括测试、文档编写和上线准备。 大家一致认为项目进展顺利,预计可以按时交付。 """ content_data = { "text": transcript_text, "entities": [ {"name": "张三", "type": "PERSON"}, {"name": "李四", "type": "PERSON"}, {"name": "Project Alpha", "type": "PROJECT"}, {"name": "Kubernetes", "type": "TECH"} ] } # 生成不同类型的摘要 summary_types = ["extractive", "abstractive", "key_points"] for summary_type in summary_types: print(f"1. 生成 {summary_type} 类型摘要...") try: summary = await manager.generate_smart_summary( tenant_id="tenant_001", project_id="project_001", source_type="transcript", source_id="transcript_001", summary_type=summary_type, content_data=content_data ) print(f" 摘要类型: {summary.summary_type}") print(f" 内容: {summary.content[:150]}...") print(f" 关键要点: {summary.key_points[:3]}") print(f" 置信度: {summary.confidence}") except Exception as e: print(f" 生成失败: {e}") async def main(): """主测试函数""" print("=" * 60) print("InsightFlow Phase 8 Task 4 - AI 能力增强测试") print("=" * 60) try: # 测试自定义模型 model_id = test_custom_model() # 测试训练和预测 await test_train_and_predict(model_id) # 测试预测模型 trend_model_id, anomaly_model_id = test_prediction_models() # 测试预测功能 await test_predictions(trend_model_id, anomaly_model_id) # 测试知识图谱 RAG rag_id = test_kg_rag() # 测试 RAG 查询 await test_kg_rag_query(rag_id) # 测试智能摘要 await test_smart_summary() print("\n" + "=" * 60) print("所有测试完成!") print("=" * 60) except Exception as e: print(f"\n测试失败: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(main())