fix: auto-fix code issues (cron)

- 修复重复导入/字段 - 修复异常处理 - 修复PEP8格式问题 - 添加类型注解
2026-02-27 18:09:24 +08:00
parent 646b64daf7
commit 17bda3dbce
38 changed files with 1993 additions and 1972 deletions
--- a/backend/ai_manager.py
+++ b/backend/ai_manager.py
@@ -8,25 +8,25 @@ AI 能力增强模块
 - 预测性分析（趋势预测、异常检测）
 """

-import os
-import json
-import sqlite3
-import httpx
 import asyncio
+import json
+import os
 import random
+import sqlite3
 import statistics
-from typing import List, Dict, Optional
+import uuid
+from collections import defaultdict
 from dataclasses import dataclass
 from datetime import datetime
-from enum import Enum
-from collections import defaultdict
-import uuid
+from enum import StrEnum
+
+import httpx

 # Database path
 DB_PATH = os.path.join(os.path.dirname(__file__), "insightflow.db")


-class ModelType(str, Enum):
+class ModelType(StrEnum):
    """模型类型"""

    CUSTOM_NER = "custom_ner"  # 自定义实体识别
@@ -35,7 +35,7 @@ class ModelType(str, Enum):
    PREDICTION = "prediction"  # 预测


-class ModelStatus(str, Enum):
+class ModelStatus(StrEnum):
    """模型状态"""

    PENDING = "pending"
@@ -45,7 +45,7 @@ class ModelStatus(str, Enum):
    ARCHIVED = "archived"


-class MultimodalProvider(str, Enum):
+class MultimodalProvider(StrEnum):
    """多模态模型提供商"""

    GPT4V = "gpt-4-vision"
@@ -54,7 +54,7 @@ class MultimodalProvider(str, Enum):
    KIMI_VL = "kimi-vl"


-class PredictionType(str, Enum):
+class PredictionType(StrEnum):
    """预测类型"""

    TREND = "trend"  # 趋势预测
@@ -73,13 +73,13 @@ class CustomModel:
    description: str
    model_type: ModelType
    status: ModelStatus
-    training_data: Dict  # 训练数据配置
-    hyperparameters: Dict  # 超参数
-    metrics: Dict  # 训练指标
-    model_path: Optional[str]  # 模型文件路径
+    training_data: dict  # 训练数据配置
+    hyperparameters: dict  # 超参数
+    metrics: dict  # 训练指标
+    model_path: str | None  # 模型文件路径
    created_at: str
    updated_at: str
-    trained_at: Optional[str]
+    trained_at: str | None
    created_by: str


@@ -90,8 +90,8 @@ class TrainingSample:
    id: str
    model_id: str
    text: str
-    entities: List[Dict]  # [{"start": 0, "end": 5, "label": "PERSON", "text": "张三"}]
-    metadata: Dict
+    entities: list[dict]  # [{"start": 0, "end": 5, "label": "PERSON", "text": "张三"}]
+    metadata: dict
    created_at: str


@@ -104,9 +104,9 @@ class MultimodalAnalysis:
    project_id: str
    provider: MultimodalProvider
    input_type: str  # image, video, audio, mixed
-    input_urls: List[str]
+    input_urls: list[str]
    prompt: str
-    result: Dict  # 分析结果
+    result: dict  # 分析结果
    tokens_used: int
    cost: float
    created_at: str
@@ -121,9 +121,9 @@ class KnowledgeGraphRAG:
    project_id: str
    name: str
    description: str
-    kg_config: Dict  # 知识图谱配置
-    retrieval_config: Dict  # 检索配置
-    generation_config: Dict  # 生成配置
+    kg_config: dict  # 知识图谱配置
+    retrieval_config: dict  # 检索配置
+    generation_config: dict  # 生成配置
    is_active: bool
    created_at: str
    updated_at: str
@@ -136,9 +136,9 @@ class RAGQuery:
    id: str
    rag_id: str
    query: str
-    context: Dict  # 检索到的上下文
+    context: dict  # 检索到的上下文
    answer: str
-    sources: List[Dict]  # 来源信息
+    sources: list[dict]  # 来源信息
    confidence: float
    tokens_used: int
    latency_ms: int
@@ -154,11 +154,11 @@ class PredictionModel:
    project_id: str
    name: str
    prediction_type: PredictionType
-    target_entity_type: Optional[str]  # 目标实体类型
-    features: List[str]  # 特征列表
-    model_config: Dict  # 模型配置
-    accuracy: Optional[float]
-    last_trained_at: Optional[str]
+    target_entity_type: str | None  # 目标实体类型
+    features: list[str]  # 特征列表
+    model_config: dict  # 模型配置
+    accuracy: float | None
+    last_trained_at: str | None
    prediction_count: int
    is_active: bool
    created_at: str
@@ -172,12 +172,12 @@ class PredictionResult:
    id: str
    model_id: str
    prediction_type: PredictionType
-    target_id: Optional[str]  # 预测目标ID
-    prediction_data: Dict  # 预测数据
+    target_id: str | None  # 预测目标ID
+    prediction_data: dict  # 预测数据
    confidence: float
    explanation: str  # 预测解释
-    actual_value: Optional[str]  # 实际值（用于验证）
-    is_correct: Optional[bool]
+    actual_value: str | None  # 实际值（用于验证）
+    is_correct: bool | None
    created_at: str


@@ -192,8 +192,8 @@ class SmartSummary:
    source_id: str
    summary_type: str  # extractive, abstractive, key_points, timeline
    content: str
-    key_points: List[str]
-    entities_mentioned: List[str]
+    key_points: list[str]
+    entities_mentioned: list[str]
    confidence: float
    tokens_used: int
    created_at: str
@@ -223,8 +223,8 @@ class AIManager:
        name: str,
        description: str,
        model_type: ModelType,
-        training_data: Dict,
-        hyperparameters: Dict,
+        training_data: dict,
+        hyperparameters: dict,
        created_by: str,
    ) -> CustomModel:
        """创建自定义模型"""
@@ -277,7 +277,7 @@ class AIManager:

        return model

-    def get_custom_model(self, model_id: str) -> Optional[CustomModel]:
+    def get_custom_model(self, model_id: str) -> CustomModel | None:
        """获取自定义模型"""
        with self._get_db() as conn:
            row = conn.execute("SELECT * FROM custom_models WHERE id = ?", (model_id,)).fetchone()
@@ -288,8 +288,8 @@ class AIManager:
            return self._row_to_custom_model(row)

    def list_custom_models(
-        self, tenant_id: str, model_type: Optional[ModelType] = None, status: Optional[ModelStatus] = None
-    ) -> List[CustomModel]:
+        self, tenant_id: str, model_type: ModelType | None = None, status: ModelStatus | None = None
+    ) -> list[CustomModel]:
        """列出自定义模型"""
        query = "SELECT * FROM custom_models WHERE tenant_id = ?"
        params = [tenant_id]
@@ -308,7 +308,7 @@ class AIManager:
            return [self._row_to_custom_model(row) for row in rows]

    def add_training_sample(
-        self, model_id: str, text: str, entities: List[Dict], metadata: Dict = None
+        self, model_id: str, text: str, entities: list[dict], metadata: dict = None
    ) -> TrainingSample:
        """添加训练样本"""
        sample_id = f"ts_{uuid.uuid4().hex[:16]}"
@@ -338,7 +338,7 @@ class AIManager:

        return sample

-    def get_training_samples(self, model_id: str) -> List[TrainingSample]:
+    def get_training_samples(self, model_id: str) -> list[TrainingSample]:
        """获取训练样本"""
        with self._get_db() as conn:
            rows = conn.execute(
@@ -410,7 +410,7 @@ class AIManager:
                conn.commit()
            raise e

-    async def predict_with_custom_model(self, model_id: str, text: str) -> List[Dict]:
+    async def predict_with_custom_model(self, model_id: str, text: str) -> list[dict]:
        """使用自定义模型进行预测"""
        model = self.get_custom_model(model_id)
        if not model or model.status != ModelStatus.READY:
@@ -461,7 +461,7 @@ class AIManager:
        project_id: str,
        provider: MultimodalProvider,
        input_type: str,
-        input_urls: List[str],
+        input_urls: list[str],
        prompt: str,
    ) -> MultimodalAnalysis:
        """多模态分析"""
@@ -517,7 +517,7 @@ class AIManager:

        return analysis

-    async def _call_gpt4v(self, image_urls: List[str], prompt: str) -> Dict:
+    async def _call_gpt4v(self, image_urls: list[str], prompt: str) -> dict:
        """调用 GPT-4V"""
        headers = {"Authorization": f"Bearer {self.openai_api_key}", "Content-Type": "application/json"}

@@ -544,7 +544,7 @@ class AIManager:
                "cost": result["usage"]["total_tokens"] * 0.00001,  # 估算成本
            }

-    async def _call_claude3(self, image_urls: List[str], prompt: str) -> Dict:
+    async def _call_claude3(self, image_urls: list[str], prompt: str) -> dict:
        """调用 Claude 3"""
        headers = {
            "x-api-key": self.anthropic_api_key,
@@ -576,7 +576,7 @@ class AIManager:
                "cost": (result["usage"]["input_tokens"] + result["usage"]["output_tokens"]) * 0.000015,
            }

-    async def _call_kimi_multimodal(self, image_urls: List[str], prompt: str) -> Dict:
+    async def _call_kimi_multimodal(self, image_urls: list[str], prompt: str) -> dict:
        """调用 Kimi 多模态模型"""
        headers = {"Authorization": f"Bearer {self.kimi_api_key}", "Content-Type": "application/json"}

@@ -600,7 +600,7 @@ class AIManager:
                "cost": result["usage"]["total_tokens"] * 0.000005,
            }

-    def get_multimodal_analyses(self, tenant_id: str, project_id: Optional[str] = None) -> List[MultimodalAnalysis]:
+    def get_multimodal_analyses(self, tenant_id: str, project_id: str | None = None) -> list[MultimodalAnalysis]:
        """获取多模态分析历史"""
        query = "SELECT * FROM multimodal_analyses WHERE tenant_id = ?"
        params = [tenant_id]
@@ -623,9 +623,9 @@ class AIManager:
        project_id: str,
        name: str,
        description: str,
-        kg_config: Dict,
-        retrieval_config: Dict,
-        generation_config: Dict,
+        kg_config: dict,
+        retrieval_config: dict,
+        generation_config: dict,
    ) -> KnowledgeGraphRAG:
        """创建知识图谱 RAG 配置"""
        rag_id = f"kgr_{uuid.uuid4().hex[:16]}"
@@ -671,7 +671,7 @@ class AIManager:

        return rag

-    def get_kg_rag(self, rag_id: str) -> Optional[KnowledgeGraphRAG]:
+    def get_kg_rag(self, rag_id: str) -> KnowledgeGraphRAG | None:
        """获取知识图谱 RAG 配置"""
        with self._get_db() as conn:
            row = conn.execute("SELECT * FROM kg_rag_configs WHERE id = ?", (rag_id,)).fetchone()
@@ -681,7 +681,7 @@ class AIManager:

            return self._row_to_kg_rag(row)

-    def list_kg_rags(self, tenant_id: str, project_id: Optional[str] = None) -> List[KnowledgeGraphRAG]:
+    def list_kg_rags(self, tenant_id: str, project_id: str | None = None) -> list[KnowledgeGraphRAG]:
        """列出知识图谱 RAG 配置"""
        query = "SELECT * FROM kg_rag_configs WHERE tenant_id = ?"
        params = [tenant_id]
@@ -697,7 +697,7 @@ class AIManager:
            return [self._row_to_kg_rag(row) for row in rows]

    async def query_kg_rag(
-        self, rag_id: str, query: str, project_entities: List[Dict], project_relations: List[Dict]
+        self, rag_id: str, query: str, project_entities: list[dict], project_relations: list[dict]
    ) -> RAGQuery:
        """基于知识图谱的 RAG 查询"""
        import time
@@ -832,7 +832,7 @@ class AIManager:

        return rag_query

-    def _build_kg_context(self, entities: List[Dict], relations: List[Dict]) -> str:
+    def _build_kg_context(self, entities: list[dict], relations: list[dict]) -> str:
        """构建知识图谱上下文文本"""
        context = []

@@ -858,7 +858,7 @@ class AIManager:
        return "\n".join(context)

    async def generate_smart_summary(
-        self, tenant_id: str, project_id: str, source_type: str, source_id: str, summary_type: str, content_data: Dict
+        self, tenant_id: str, project_id: str, source_type: str, source_id: str, summary_type: str, content_data: dict
    ) -> SmartSummary:
        """生成智能摘要"""
        summary_id = f"ss_{uuid.uuid4().hex[:16]}"
@@ -999,9 +999,9 @@ class AIManager:
        project_id: str,
        name: str,
        prediction_type: PredictionType,
-        target_entity_type: Optional[str],
-        features: List[str],
-        model_config: Dict,
+        target_entity_type: str | None,
+        features: list[str],
+        model_config: dict,
    ) -> PredictionModel:
        """创建预测模型"""
        model_id = f"pm_{uuid.uuid4().hex[:16]}"
@@ -1053,7 +1053,7 @@ class AIManager:

        return model

-    def get_prediction_model(self, model_id: str) -> Optional[PredictionModel]:
+    def get_prediction_model(self, model_id: str) -> PredictionModel | None:
        """获取预测模型"""
        with self._get_db() as conn:
            row = conn.execute("SELECT * FROM prediction_models WHERE id = ?", (model_id,)).fetchone()
@@ -1063,7 +1063,7 @@ class AIManager:

            return self._row_to_prediction_model(row)

-    def list_prediction_models(self, tenant_id: str, project_id: Optional[str] = None) -> List[PredictionModel]:
+    def list_prediction_models(self, tenant_id: str, project_id: str | None = None) -> list[PredictionModel]:
        """列出预测模型"""
        query = "SELECT * FROM prediction_models WHERE tenant_id = ?"
        params = [tenant_id]
@@ -1078,7 +1078,7 @@ class AIManager:
            rows = conn.execute(query, params).fetchall()
            return [self._row_to_prediction_model(row) for row in rows]

-    async def train_prediction_model(self, model_id: str, historical_data: List[Dict]) -> PredictionModel:
+    async def train_prediction_model(self, model_id: str, historical_data: list[dict]) -> PredictionModel:
        """训练预测模型"""
        model = self.get_prediction_model(model_id)
        if not model:
@@ -1105,7 +1105,7 @@ class AIManager:

        return self.get_prediction_model(model_id)

-    async def predict(self, model_id: str, input_data: Dict) -> PredictionResult:
+    async def predict(self, model_id: str, input_data: dict) -> PredictionResult:
        """进行预测"""
        model = self.get_prediction_model(model_id)
        if not model or not model.is_active:
@@ -1172,7 +1172,7 @@ class AIManager:

        return result

-    def _predict_trend(self, input_data: Dict, model: PredictionModel) -> Dict:
+    def _predict_trend(self, input_data: dict, model: PredictionModel) -> dict:
        """趋势预测"""
        historical_values = input_data.get("historical_values", [])

@@ -1211,7 +1211,7 @@ class AIManager:
            "explanation": f"基于{len(historical_values)}个历史数据点，预测趋势为{trend}",
        }

-    def _detect_anomaly(self, input_data: Dict, model: PredictionModel) -> Dict:
+    def _detect_anomaly(self, input_data: dict, model: PredictionModel) -> dict:
        """异常检测"""
        value = input_data.get("value")
        historical_values = input_data.get("historical_values", [])
@@ -1245,7 +1245,7 @@ class AIManager:
            "explanation": f"当前值偏离均值{z_score:.2f}个标准差，{'检测到异常' if is_anomaly else '处于正常范围'}",
        }

-    def _predict_entity_growth(self, input_data: Dict, model: PredictionModel) -> Dict:
+    def _predict_entity_growth(self, input_data: dict, model: PredictionModel) -> dict:
        """实体增长预测"""
        entity_history = input_data.get("entity_history", [])

@@ -1273,7 +1273,7 @@ class AIManager:
            "explanation": f"基于过去{len(entity_history)}个周期的数据，预测增长率{avg_growth_rate * 100:.1f}%",
        }

-    def _predict_relation_evolution(self, input_data: Dict, model: PredictionModel) -> Dict:
+    def _predict_relation_evolution(self, input_data: dict, model: PredictionModel) -> dict:
        """关系演变预测"""
        relation_history = input_data.get("relation_history", [])

@@ -1299,7 +1299,7 @@ class AIManager:
            "explanation": f"基于{len(relation_history)}个历史快照分析关系演变趋势",
        }

-    def get_prediction_results(self, model_id: str, limit: int = 100) -> List[PredictionResult]:
+    def get_prediction_results(self, model_id: str, limit: int = 100) -> list[PredictionResult]:
        """获取预测结果历史"""
        with self._get_db() as conn:
            rows = conn.execute(