insightflow/backend/test_phase8_task4.py

#!/usr/bin/env python3
"""
InsightFlow Phase 8 Task 4 测试脚本
测试 AI 能力增强功能
"""

import asyncio
import os
import sys

from ai_manager import ModelType, PredictionType, get_ai_manager

# Add backend directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))


def test_custom_model() -> None:
    """测试自定义模型功能"""
    print("\n=== 测试自定义模型 ===")

    manager  = get_ai_manager()

    # 1. 创建自定义模型
    print("1. 创建自定义模型...")
    model  = manager.create_custom_model(
        tenant_id = "tenant_001",
        name = "领域实体识别模型",
        description = "用于识别医疗领域实体的自定义模型",
        model_type = ModelType.CUSTOM_NER,
        training_data = {
            "entity_types": ["DISEASE", "SYMPTOM", "DRUG", "TREATMENT"],
            "domain": "medical",
        },
        hyperparameters = {"epochs": 15, "learning_rate": 0.001, "batch_size": 32},
        created_by = "user_001",
    )
    print(f"   创建成功: {model.id}, 状态: {model.status.value}")

    # 2. 添加训练样本
    print("2. 添加训练样本...")
    samples  = [
        {
            "text": "患者张三患有高血压，正在服用降压药治疗。",
            "entities": [
                {"start": 2, "end": 4, "label": "PERSON", "text": "张三"},
                {"start": 6, "end": 9, "label": "DISEASE", "text": "高血压"},
                {"start": 14, "end": 17, "label": "DRUG", "text": "降压药"},
            ],
        },
        {
            "text": "李四因感冒发烧到医院就诊，医生开具了退烧药。",
            "entities": [
                {"start": 0, "end": 2, "label": "PERSON", "text": "李四"},
                {"start": 3, "end": 5, "label": "SYMPTOM", "text": "感冒"},
                {"start": 5, "end": 7, "label": "SYMPTOM", "text": "发烧"},
                {"start": 21, "end": 24, "label": "DRUG", "text": "退烧药"},
            ],
        },
        {
            "text": "王五接受了心脏搭桥手术，术后恢复良好。",
            "entities": [
                {"start": 0, "end": 2, "label": "PERSON", "text": "王五"},
                {"start": 5, "end": 11, "label": "TREATMENT", "text": "心脏搭桥手术"},
            ],
        },
    ]

    for sample_data in samples:
        sample  = manager.add_training_sample(
            model_id = model.id,
            text = sample_data["text"],
            entities = sample_data["entities"],
            metadata = {"source": "manual"},
        )
        print(f"   添加样本: {sample.id}")

    # 3. 获取训练样本
    print("3. 获取训练样本...")
    all_samples  = manager.get_training_samples(model.id)
    print(f"   共有 {len(all_samples)} 个训练样本")

    # 4. 列出自定义模型
    print("4. 列出自定义模型...")
    models  = manager.list_custom_models(tenant_id = "tenant_001")
    print(f"   找到 {len(models)} 个模型")
    for m in models:
        print(f"   - {m.name} ({m.model_type.value}): {m.status.value}")

    return model.id


async def test_train_and_predict(model_id: str) -> None:
    """测试训练和预测"""
    print("\n=== 测试模型训练和预测 ===")

    manager  = get_ai_manager()

    # 1. 训练模型
    print("1. 训练模型...")
    try:
        trained_model  = await manager.train_custom_model(model_id)
        print(f"   训练完成: {trained_model.status.value}")
        print(f"   指标: {trained_model.metrics}")
    except Exception as e:
        print(f"   训练失败: {e}")
        return

    # 2. 使用模型预测
    print("2. 使用模型预测...")
    test_text  = "赵六患有糖尿病，正在使用胰岛素治疗。"
    try:
        entities  = await manager.predict_with_custom_model(model_id, test_text)
        print(f"   输入: {test_text}")
        print(f"   预测实体: {entities}")
    except Exception as e:
        print(f"   预测失败: {e}")


def test_prediction_models() -> None:
    """测试预测模型"""
    print("\n=== 测试预测模型 ===")

    manager  = get_ai_manager()

    # 1. 创建趋势预测模型
    print("1. 创建趋势预测模型...")
    trend_model  = manager.create_prediction_model(
        tenant_id = "tenant_001",
        project_id = "project_001",
        name = "实体数量趋势预测",
        prediction_type = PredictionType.TREND,
        target_entity_type = "PERSON",
        features = ["entity_count", "time_period", "document_count"],
        model_config = {"algorithm": "linear_regression", "window_size": 7},
    )
    print(f"   创建成功: {trend_model.id}")

    # 2. 创建异常检测模型
    print("2. 创建异常检测模型...")
    anomaly_model  = manager.create_prediction_model(
        tenant_id = "tenant_001",
        project_id = "project_001",
        name = "实体增长异常检测",
        prediction_type = PredictionType.ANOMALY,
        target_entity_type = None,
        features = ["daily_growth", "weekly_growth"],
        model_config = {"threshold": 2.5, "sensitivity": "medium"},
    )
    print(f"   创建成功: {anomaly_model.id}")

    # 3. 列出预测模型
    print("3. 列出预测模型...")
    models  = manager.list_prediction_models(tenant_id = "tenant_001")
    print(f"   找到 {len(models)} 个预测模型")
    for m in models:
        print(f"   - {m.name} ({m.prediction_type.value})")

    return trend_model.id, anomaly_model.id


async def test_predictions(trend_model_id: str, anomaly_model_id: str) -> None:
    """测试预测功能"""
    print("\n=== 测试预测功能 ===")

    manager  = get_ai_manager()

    # 1. 训练趋势预测模型
    print("1. 训练趋势预测模型...")
    historical_data  = [
        {"date": "2024-01-01", "value": 10},
        {"date": "2024-01-02", "value": 12},
        {"date": "2024-01-03", "value": 15},
        {"date": "2024-01-04", "value": 14},
        {"date": "2024-01-05", "value": 18},
        {"date": "2024-01-06", "value": 20},
        {"date": "2024-01-07", "value": 22},
    ]
    trained  = await manager.train_prediction_model(trend_model_id, historical_data)
    print(f"   训练完成，准确率: {trained.accuracy}")

    # 2. 趋势预测
    print("2. 趋势预测...")
    trend_result  = await manager.predict(
        trend_model_id, {"historical_values": [10, 12, 15, 14, 18, 20, 22]}
    )
    print(f"   预测结果: {trend_result.prediction_data}")

    # 3. 异常检测
    print("3. 异常检测...")
    anomaly_result  = await manager.predict(
        anomaly_model_id, {"value": 50, "historical_values": [10, 12, 11, 13, 12, 14, 13]}
    )
    print(f"   检测结果: {anomaly_result.prediction_data}")


def test_kg_rag() -> None:
    """测试知识图谱 RAG"""
    print("\n=== 测试知识图谱 RAG ===")

    manager  = get_ai_manager()

    # 创建 RAG 配置
    print("1. 创建知识图谱 RAG 配置...")
    rag  = manager.create_kg_rag(
        tenant_id = "tenant_001",
        project_id = "project_001",
        name = "项目知识问答",
        description = "基于项目知识图谱的智能问答",
        kg_config = {
            "entity_types": ["PERSON", "ORG", "PROJECT", "TECH"],
            "relation_types": ["works_with", "belongs_to", "depends_on"],
        },
        retrieval_config = {"top_k": 5, "similarity_threshold": 0.7, "expand_relations": True},
        generation_config = {"temperature": 0.3, "max_tokens": 1000, "include_sources": True},
    )
    print(f"   创建成功: {rag.id}")

    # 列出 RAG 配置
    print("2. 列出 RAG 配置...")
    rags  = manager.list_kg_rags(tenant_id = "tenant_001")
    print(f"   找到 {len(rags)} 个配置")

    return rag.id


async def test_kg_rag_query(rag_id: str) -> None:
    """测试 RAG 查询"""
    print("\n=== 测试知识图谱 RAG 查询 ===")

    manager  = get_ai_manager()

    # 模拟项目实体和关系
    project_entities  = [
        {"id": "e1", "name": "张三", "type": "PERSON", "definition": "项目经理"},
        {"id": "e2", "name": "李四", "type": "PERSON", "definition": "技术负责人"},
        {"id": "e3", "name": "Project Alpha", "type": "PROJECT", "definition": "核心产品项目"},
        {"id": "e4", "name": "Kubernetes", "type": "TECH", "definition": "容器编排平台"},
        {"id": "e5", "name": "TechCorp", "type": "ORG", "definition": "科技公司"},
    ]

    project_relations  = [
        {
            "source_entity_id": "e1",
            "target_entity_id": "e3",
            "source_name": "张三",
            "target_name": "Project Alpha",
            "relation_type": "works_with",
            "evidence": "张三负责 Project Alpha 的管理工作",
        },
        {
            "source_entity_id": "e2",
            "target_entity_id": "e3",
            "source_name": "李四",
            "target_name": "Project Alpha",
            "relation_type": "works_with",
            "evidence": "李四负责 Project Alpha 的技术架构",
        },
        {
            "source_entity_id": "e3",
            "target_entity_id": "e4",
            "source_name": "Project Alpha",
            "target_name": "Kubernetes",
            "relation_type": "depends_on",
            "evidence": "项目使用 Kubernetes 进行部署",
        },
        {
            "source_entity_id": "e1",
            "target_entity_id": "e5",
            "source_name": "张三",
            "target_name": "TechCorp",
            "relation_type": "belongs_to",
            "evidence": "张三是 TechCorp 的员工",
        },
    ]

    # 执行查询
    print("1. 执行 RAG 查询...")
    query_text  = "Project Alpha 项目有哪些人参与？使用了什么技术？"

    try:
        result  = await manager.query_kg_rag(
            rag_id = rag_id,
            query = query_text,
            project_entities = project_entities,
            project_relations = project_relations,
        )

        print(f"   查询: {result.query}")
        print(f"   回答: {result.answer[:200]}...")
        print(f"   置信度: {result.confidence}")
        print(f"   来源: {len(result.sources)} 个实体")
        print(f"   延迟: {result.latency_ms}ms")
    except Exception as e:
        print(f"   查询失败: {e}")


async def test_smart_summary() -> None:
    """测试智能摘要"""
    print("\n=== 测试智能摘要 ===")

    manager  = get_ai_manager()

    # 模拟转录文本
    transcript_text  = """
    今天的会议主要讨论了 Project Alpha 的进展情况。张三作为项目经理，
    汇报了当前的项目进度，表示已经完成了 80% 的开发工作。李四提出了
    一些关于 Kubernetes 部署的问题，建议我们采用新的部署策略。
    会议还讨论了下一步的工作计划，包括测试、文档编写和上线准备。
    大家一致认为项目进展顺利，预计可以按时交付。
    """

    content_data  = {
        "text": transcript_text,
        "entities": [
            {"name": "张三", "type": "PERSON"},
            {"name": "李四", "type": "PERSON"},
            {"name": "Project Alpha", "type": "PROJECT"},
            {"name": "Kubernetes", "type": "TECH"},
        ],
    }

    # 生成不同类型的摘要
    summary_types  = ["extractive", "abstractive", "key_points"]

    for summary_type in summary_types:
        print(f"1. 生成 {summary_type} 类型摘要...")
        try:
            summary  = await manager.generate_smart_summary(
                tenant_id = "tenant_001",
                project_id = "project_001",
                source_type = "transcript",
                source_id = "transcript_001",
                summary_type = summary_type,
                content_data = content_data,
            )

            print(f"   摘要类型: {summary.summary_type}")
            print(f"   内容: {summary.content[:150]}...")
            print(f"   关键要点: {summary.key_points[:3]}")
            print(f"   置信度: {summary.confidence}")
        except Exception as e:
            print(f"   生成失败: {e}")


async def main() -> None:
    """主测试函数"""
    print(" = " * 60)
    print("InsightFlow Phase 8 Task 4 - AI 能力增强测试")
    print(" = " * 60)

    try:
        # 测试自定义模型
        model_id  = test_custom_model()

        # 测试训练和预测
        await test_train_and_predict(model_id)

        # 测试预测模型
        trend_model_id, anomaly_model_id  = test_prediction_models()

        # 测试预测功能
        await test_predictions(trend_model_id, anomaly_model_id)

        # 测试知识图谱 RAG
        rag_id  = test_kg_rag()

        # 测试 RAG 查询
        await test_kg_rag_query(rag_id)

        # 测试智能摘要
        await test_smart_summary()

        print("\n" + " = " * 60)
        print("所有测试完成！")
        print(" = " * 60)

    except Exception as e:
        print(f"\n测试失败: {e}")
        import traceback

        traceback.print_exc()


if __name__ == "__main__":
    asyncio.run(main())