insightflow/backend/db_manager.py

#!/usr/bin/env python3
"""
InsightFlow Database Manager - Phase 5
处理项目、实体、关系的持久化
支持实体属性扩展
"""

import json
import os
import sqlite3
import uuid
from dataclasses import dataclass
from datetime import datetime

DB_PATH = os.getenv("DB_PATH", "/app/data/insightflow.db")

# Constants
UUID_LENGTH = 8  # UUID 截断长度


@dataclass
class Project:
    id: str
    name: str
    description: str = ""
    created_at: str = ""
    updated_at: str = ""


@dataclass
class Entity:
    id: str
    project_id: str
    name: str
    type: str
    definition: str = ""
    canonical_name: str = ""
    aliases: list[str] = None
    embedding: str = ""  # Phase 3: 实体嵌入向量
    attributes: dict = None  # Phase 5: 实体属性
    created_at: str = ""
    updated_at: str = ""

    def __post_init__(self):
        if self.aliases is None:
            self.aliases = []
        if self.attributes is None:
            self.attributes = {}


@dataclass
class AttributeTemplate:
    """属性模板定义"""

    id: str
    project_id: str
    name: str
    type: str  # text, number, date, select, multiselect, boolean
    options: list[str] = None  # 用于 select/multiselect
    default_value: str = ""
    description: str = ""
    is_required: bool = False
    sort_order: int = 0
    created_at: str = ""
    updated_at: str = ""

    def __post_init__(self):
        if self.options is None:
            self.options = []


@dataclass
class EntityAttribute:
    """实体属性值"""

    id: str
    entity_id: str
    template_id: str | None = None
    name: str = ""  # 属性名称
    type: str = "text"  # 属性类型
    value: str = ""
    options: list[str] = None  # 选项列表
    template_name: str = ""  # 关联查询时填充
    template_type: str = ""  # 关联查询时填充
    created_at: str = ""
    updated_at: str = ""

    def __post_init__(self):
        if self.options is None:
            self.options = []


@dataclass
class AttributeHistory:
    """属性变更历史"""

    id: str
    entity_id: str
    attribute_name: str = ""  # 属性名称
    old_value: str = ""
    new_value: str = ""
    changed_by: str = ""
    changed_at: str = ""
    change_reason: str = ""


@dataclass
class EntityMention:
    id: str
    entity_id: str
    transcript_id: str
    start_pos: int
    end_pos: int
    text_snippet: str
    confidence: float = 1.0


class DatabaseManager:
    def __init__(self, db_path: str = DB_PATH):
        self.db_path = db_path
        os.makedirs(os.path.dirname(db_path), exist_ok=True)
        self.init_db()

    def get_conn(self):
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        return conn

    def init_db(self) -> None:
        """初始化数据库表"""
        with open(os.path.join(os.path.dirname(__file__), "schema.sql")) as f:
            schema = f.read()

        conn = self.get_conn()
        conn.executescript(schema)
        conn.commit()
        conn.close()

    # ==================== Project Operations ====================

    def create_project(self, project_id: str, name: str, description: str = "") -> Project:
        conn = self.get_conn()
        now = datetime.now().isoformat()
        conn.execute(
            """INSERT INTO projects (id, name, description, created_at, updated_at)
            VALUES (?, ?, ?, ?, ?)""",
            (project_id, name, description, now, now),
        )
        conn.commit()
        conn.close()
        return Project(
            id=project_id, name=name, description=description, created_at=now, updated_at=now
        )

    def get_project(self, project_id: str) -> Project | None:
        conn = self.get_conn()
        row = conn.execute("SELECT * FROM projects WHERE id = ?", (project_id,)).fetchone()
        conn.close()
        if row:
            return Project(**dict(row))
        return None

    def list_projects(self) -> list[Project]:
        conn = self.get_conn()
        rows = conn.execute("SELECT * FROM projects ORDER BY updated_at DESC").fetchall()
        conn.close()
        return [Project(**dict(r)) for r in rows]

    # ==================== Entity Operations ====================

    def create_entity(self, entity: Entity) -> Entity:
        conn = self.get_conn()
        conn.execute(
            """INSERT INTO entities (id, project_id, name, canonical_name, type,
               definition, aliases, created_at, updated_at)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                entity.id,
                entity.project_id,
                entity.name,
                entity.canonical_name,
                entity.type,
                entity.definition,
                json.dumps(entity.aliases),
                datetime.now().isoformat(),
                datetime.now().isoformat(),
            ),
        )
        conn.commit()
        conn.close()
        return entity

    def get_entity_by_name(self, project_id: str, name: str) -> Entity | None:
        """通过名称查找实体（用于对齐）"""
        conn = self.get_conn()
        row = conn.execute(
            """SELECT * FROM entities WHERE project_id = ?
            AND (name = ? OR canonical_name = ? OR aliases LIKE ?)""",
            (project_id, name, name, f'%"{name}"%'),
        ).fetchone()
        conn.close()
        if row:
            data = dict(row)
            data["aliases"] = json.loads(data["aliases"]) if data["aliases"] else []
            return Entity(**data)
        return None

    def find_similar_entities(
        self, project_id: str, name: str, threshold: float = 0.8
    ) -> list[Entity]:
        """查找相似实体"""
        conn = self.get_conn()
        rows = conn.execute(
            "SELECT * FROM entities WHERE project_id = ? AND name LIKE ?", (project_id, f"%{name}%")
        ).fetchall()
        conn.close()

        entities = []
        for row in rows:
            data = dict(row)
            data["aliases"] = json.loads(data["aliases"]) if data["aliases"] else []
            entities.append(Entity(**data))
        return entities

    def merge_entities(self, target_id: str, source_id: str) -> Entity:
        """合并两个实体"""
        conn = self.get_conn()

        target = conn.execute("SELECT * FROM entities WHERE id = ?", (target_id,)).fetchone()
        source = conn.execute("SELECT * FROM entities WHERE id = ?", (source_id,)).fetchone()

        if not target or not source:
            conn.close()
            raise ValueError("Entity not found")

        target_aliases = set(json.loads(target["aliases"]) if target["aliases"] else [])
        target_aliases.add(source["name"])
        target_aliases.update(json.loads(source["aliases"]) if source["aliases"] else [])

        conn.execute(
            "UPDATE entities SET aliases = ?, updated_at = ? WHERE id = ?",
            (json.dumps(list(target_aliases)), datetime.now().isoformat(), target_id),
        )
        conn.execute(
            "UPDATE entity_mentions SET entity_id = ? WHERE entity_id = ?", (target_id, source_id)
        )
        conn.execute(
            "UPDATE entity_relations SET source_entity_id = ? WHERE source_entity_id = ?",
            (target_id, source_id),
        )
        conn.execute(
            "UPDATE entity_relations SET target_entity_id = ? WHERE target_entity_id = ?",
            (target_id, source_id),
        )
        conn.execute("DELETE FROM entities WHERE id = ?", (source_id,))

        conn.commit()
        conn.close()
        return self.get_entity(target_id)

    def get_entity(self, entity_id: str) -> Entity | None:
        conn = self.get_conn()
        row = conn.execute("SELECT * FROM entities WHERE id = ?", (entity_id,)).fetchone()
        conn.close()
        if row:
            data = dict(row)
            data["aliases"] = json.loads(data["aliases"]) if data["aliases"] else []
            return Entity(**data)
        return None

    def list_project_entities(self, project_id: str) -> list[Entity]:
        conn = self.get_conn()
        rows = conn.execute(
            "SELECT * FROM entities WHERE project_id = ? ORDER BY updated_at DESC", (project_id,)
        ).fetchall()
        conn.close()

        entities = []
        for row in rows:
            data = dict(row)
            data["aliases"] = json.loads(data["aliases"]) if data["aliases"] else []
            entities.append(Entity(**data))
        return entities

    def update_entity(self, entity_id: str, **kwargs) -> Entity:
        """更新实体信息"""
        conn = self.get_conn()

        allowed_fields = ["name", "type", "definition", "canonical_name"]
        updates = []
        values = []

        for field in allowed_fields:
            if field in kwargs:
                updates.append(f"{field} = ?")
                values.append(kwargs[field])

        if "aliases" in kwargs:
            updates.append("aliases = ?")
            values.append(json.dumps(kwargs["aliases"]))

        if not updates:
            conn.close()
            return self.get_entity(entity_id)

        updates.append("updated_at = ?")
        values.append(datetime.now().isoformat())
        values.append(entity_id)

        query = f"UPDATE entities SET {', '.join(updates)} WHERE id = ?"
        conn.execute(query, values)
        conn.commit()
        conn.close()
        return self.get_entity(entity_id)

    def delete_entity(self, entity_id: str) -> None:
        """删除实体及其关联数据"""
        conn = self.get_conn()
        conn.execute("DELETE FROM entity_mentions WHERE entity_id = ?", (entity_id,))
        conn.execute(
            "DELETE FROM entity_relations WHERE source_entity_id = ? OR target_entity_id = ?",
            (entity_id, entity_id),
        )
        conn.execute("DELETE FROM entity_attributes WHERE entity_id = ?", (entity_id,))
        conn.execute("DELETE FROM entities WHERE id = ?", (entity_id,))
        conn.commit()
        conn.close()

    # ==================== Mention Operations ====================

    def add_mention(self, mention: EntityMention) -> EntityMention:
        conn = self.get_conn()
        conn.execute(
            """INSERT INTO entity_mentions
            (id, entity_id, transcript_id, start_pos, end_pos, text_snippet, confidence)
            VALUES (?, ?, ?, ?, ?, ?, ?)""",
            (
                mention.id,
                mention.entity_id,
                mention.transcript_id,
                mention.start_pos,
                mention.end_pos,
                mention.text_snippet,
                mention.confidence,
            ),
        )
        conn.commit()
        conn.close()
        return mention

    def get_entity_mentions(self, entity_id: str) -> list[EntityMention]:
        conn = self.get_conn()
        rows = conn.execute(
            "SELECT * FROM entity_mentions WHERE entity_id = ? ORDER BY transcript_id, start_pos",
            (entity_id,),
        ).fetchall()
        conn.close()
        return [EntityMention(**dict(r)) for r in rows]

    # ==================== Transcript Operations ====================

    def save_transcript(
        self,
        transcript_id: str,
        project_id: str,
        filename: str,
        full_text: str,
        transcript_type: str = "audio",
    ):
        conn = self.get_conn()
        now = datetime.now().isoformat()
        conn.execute(
            """INSERT INTO transcripts
            (id, project_id, filename, full_text, type, created_at)
            VALUES (?, ?, ?, ?, ?, ?)""",
            (transcript_id, project_id, filename, full_text, transcript_type, now),
        )
        conn.commit()
        conn.close()

    def get_transcript(self, transcript_id: str) -> dict | None:
        conn = self.get_conn()
        row = conn.execute("SELECT * FROM transcripts WHERE id = ?", (transcript_id,)).fetchone()
        conn.close()
        return dict(row) if row else None

    def list_project_transcripts(self, project_id: str) -> list[dict]:
        conn = self.get_conn()
        rows = conn.execute(
            "SELECT * FROM transcripts WHERE project_id = ? ORDER BY created_at DESC", (project_id,)
        ).fetchall()
        conn.close()
        return [dict(r) for r in rows]

    def update_transcript(self, transcript_id: str, full_text: str) -> dict:
        conn = self.get_conn()
        now = datetime.now().isoformat()
        conn.execute(
            "UPDATE transcripts SET full_text = ?, updated_at = ? WHERE id = ?",
            (full_text, now, transcript_id),
        )
        conn.commit()
        row = conn.execute("SELECT * FROM transcripts WHERE id = ?", (transcript_id,)).fetchone()
        conn.close()
        return dict(row) if row else None

    # ==================== Relation Operations ====================

    def create_relation(
        self,
        project_id: str,
        source_entity_id: str,
        target_entity_id: str,
        relation_type: str = "related",
        evidence: str = "",
        transcript_id: str = "",
    ):
        conn = self.get_conn()
        relation_id = str(uuid.uuid4())[:UUID_LENGTH]
        now = datetime.now().isoformat()
        conn.execute(
            """INSERT INTO entity_relations
            (id, project_id, source_entity_id, target_entity_id, relation_type,
            evidence, transcript_id, created_at)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                relation_id,
                project_id,
                source_entity_id,
                target_entity_id,
                relation_type,
                evidence,
                transcript_id,
                now,
            ),
        )
        conn.commit()
        conn.close()
        return relation_id

    def get_entity_relations(self, entity_id: str) -> list[dict]:
        conn = self.get_conn()
        rows = conn.execute(
            """SELECT * FROM entity_relations
               WHERE source_entity_id = ? OR target_entity_id = ?
               ORDER BY created_at DESC""",
            (entity_id, entity_id),
        ).fetchall()
        conn.close()
        return [dict(r) for r in rows]

    def list_project_relations(self, project_id: str) -> list[dict]:
        conn = self.get_conn()
        rows = conn.execute(
            "SELECT * FROM entity_relations WHERE project_id = ? ORDER BY created_at DESC",
            (project_id,),
        ).fetchall()
        conn.close()
        return [dict(r) for r in rows]

    def update_relation(self, relation_id: str, **kwargs) -> dict:
        conn = self.get_conn()
        allowed_fields = ["relation_type", "evidence"]
        updates = []
        values = []

        for field in allowed_fields:
            if field in kwargs:
                updates.append(f"{field} = ?")
                values.append(kwargs[field])

        if updates:
            query = f"UPDATE entity_relations SET {', '.join(updates)} WHERE id = ?"
            values.append(relation_id)
            conn.execute(query, values)
            conn.commit()

        row = conn.execute("SELECT * FROM entity_relations WHERE id = ?", (relation_id,)).fetchone()
        conn.close()
        return dict(row) if row else None

    def delete_relation(self, relation_id: str):
        conn = self.get_conn()
        conn.execute("DELETE FROM entity_relations WHERE id = ?", (relation_id,))
        conn.commit()
        conn.close()

    # ==================== Glossary Operations ====================

    def add_glossary_term(self, project_id: str, term: str, pronunciation: str = "") -> str:
        conn = self.get_conn()
        existing = conn.execute(
            "SELECT * FROM glossary WHERE project_id = ? AND term = ?", (project_id, term)
        ).fetchone()

        if existing:
            conn.execute(
                "UPDATE glossary SET frequency = frequency + 1 WHERE id = ?", (existing["id"],)
            )
            conn.commit()
            conn.close()
            return existing["id"]

        term_id = str(uuid.uuid4())[:UUID_LENGTH]
        conn.execute(
            """INSERT INTO glossary
            (id, project_id, term, pronunciation, frequency)
            VALUES (?, ?, ?, ?, ?)""",
            (term_id, project_id, term, pronunciation, 1),
        )
        conn.commit()
        conn.close()
        return term_id

    def list_glossary(self, project_id: str) -> list[dict]:
        conn = self.get_conn()
        rows = conn.execute(
            "SELECT * FROM glossary WHERE project_id = ? ORDER BY frequency DESC", (project_id,)
        ).fetchall()
        conn.close()
        return [dict(r) for r in rows]

    def delete_glossary_term(self, term_id: str):
        conn = self.get_conn()
        conn.execute("DELETE FROM glossary WHERE id = ?", (term_id,))
        conn.commit()
        conn.close()

    # ==================== Phase 4: Agent & Provenance ====================

    def get_relation_with_details(self, relation_id: str) -> dict | None:
        conn = self.get_conn()
        row = conn.execute(
            """SELECT r.*,
                s.name as source_name, t.name as target_name,
                tr.filename as transcript_filename, tr.full_text as transcript_text
               FROM entity_relations r
               JOIN entities s ON r.source_entity_id = s.id
               JOIN entities t ON r.target_entity_id = t.id
               LEFT JOIN transcripts tr ON r.transcript_id = tr.id
               WHERE r.id = ?""",
            (relation_id,),
        ).fetchone()
        conn.close()
        return dict(row) if row else None

    def get_entity_with_mentions(self, entity_id: str) -> dict | None:
        conn = self.get_conn()
        entity_row = conn.execute("SELECT * FROM entities WHERE id = ?", (entity_id,)).fetchone()
        if not entity_row:
            conn.close()
            return None

        entity = dict(entity_row)
        entity["aliases"] = json.loads(entity["aliases"]) if entity["aliases"] else []

        mentions = conn.execute(
            """SELECT m.*, t.filename, t.created_at as transcript_date
               FROM entity_mentions m
               JOIN transcripts t ON m.transcript_id = t.id
               WHERE m.entity_id = ? ORDER BY t.created_at, m.start_pos""",
            (entity_id,),
        ).fetchall()
        entity["mentions"] = [dict(m) for m in mentions]
        entity["mention_count"] = len(mentions)

        relations = conn.execute(
            """SELECT r.*, s.name as source_name, t.name as target_name
               FROM entity_relations r
               JOIN entities s ON r.source_entity_id = s.id
               JOIN entities t ON r.target_entity_id = t.id
               WHERE r.source_entity_id = ? OR r.target_entity_id = ?
               ORDER BY r.created_at DESC""",
            (entity_id, entity_id),
        ).fetchall()
        entity["relations"] = [dict(r) for r in relations]

        conn.close()
        return entity

    def search_entities(self, project_id: str, query: str) -> list[Entity]:
        conn = self.get_conn()
        rows = conn.execute(
            """SELECT * FROM entities
               WHERE project_id = ? AND
               (name LIKE ? OR definition LIKE ? OR aliases LIKE ?)
               ORDER BY name""",
            (project_id, f"%{query}%", f"%{query}%", f"%{query}%"),
        ).fetchall()
        conn.close()

        entities = []
        for row in rows:
            data = dict(row)
            data["aliases"] = json.loads(data["aliases"]) if data["aliases"] else []
            entities.append(Entity(**data))
        return entities

    def get_project_summary(self, project_id: str) -> dict:
        conn = self.get_conn()
        project = conn.execute("SELECT * FROM projects WHERE id = ?", (project_id,)).fetchone()

        entity_count = conn.execute(
            "SELECT COUNT(*) as count FROM entities WHERE project_id = ?", (project_id,)
        ).fetchone()["count"]

        transcript_count = conn.execute(
            "SELECT COUNT(*) as count FROM transcripts WHERE project_id = ?", (project_id,)
        ).fetchone()["count"]

        relation_count = conn.execute(
            "SELECT COUNT(*) as count FROM entity_relations WHERE project_id = ?", (project_id,)
        ).fetchone()["count"]

        recent_transcripts = conn.execute(
            """SELECT filename, full_text, created_at FROM transcripts
               WHERE project_id = ? ORDER BY created_at DESC LIMIT 5""",
            (project_id,),
        ).fetchall()

        top_entities = conn.execute(
            """SELECT e.name, e.type, e.definition, COUNT(m.id) as mention_count
               FROM entities e
               LEFT JOIN entity_mentions m ON e.id = m.entity_id
               WHERE e.project_id = ?
               GROUP BY e.id ORDER BY mention_count DESC LIMIT 10""",
            (project_id,),
        ).fetchall()

        conn.close()

        return {
            "project": dict(project) if project else {},
            "statistics": {
                "entity_count": entity_count,
                "transcript_count": transcript_count,
                "relation_count": relation_count,
            },
            "recent_transcripts": [dict(t) for t in recent_transcripts],
            "top_entities": [dict(e) for e in top_entities],
        }

    def get_transcript_context(
        self, transcript_id: str, position: int, context_chars: int = 200
    ) -> str:
        conn = self.get_conn()
        row = conn.execute(
            "SELECT full_text FROM transcripts WHERE id = ?", (transcript_id,)
        ).fetchone()
        conn.close()
        if not row:
            return ""
        text = row["full_text"]
        start = max(0, position - context_chars)
        end = min(len(text), position + context_chars)
        return text[start:end]

    # ==================== Phase 5: Timeline Operations ====================

    def get_project_timeline(
        self, project_id: str, entity_id: str = None, start_date: str = None, end_date: str = None
    ) -> list[dict]:
        conn = self.get_conn()

        conditions = ["t.project_id = ?"]
        params = [project_id]

        if entity_id:
            conditions.append("m.entity_id = ?")
            params.append(entity_id)
        if start_date:
            conditions.append("t.created_at >= ?")
            params.append(start_date)
        if end_date:
            conditions.append("t.created_at <= ?")
            params.append(end_date)

        where_clause = " AND ".join(conditions)

        mentions = conn.execute(
            f"""SELECT m.*, e.name as entity_name, e.type as entity_type, e.definition,
                       t.filename, t.created_at as event_date, t.type as source_type
               FROM entity_mentions m
               JOIN entities e ON m.entity_id = e.id
               JOIN transcripts t ON m.transcript_id = t.id
               WHERE {where_clause} ORDER BY t.created_at, m.start_pos""",
            params,
        ).fetchall()

        timeline_events = []
        for m in mentions:
            timeline_events.append(
                {
                    "id": m["id"],
                    "type": "mention",
                    "event_date": m["event_date"],
                    "entity_id": m["entity_id"],
                    "entity_name": m["entity_name"],
                    "entity_type": m["entity_type"],
                    "text_snippet": m["text_snippet"],
                    "confidence": m["confidence"],
                    "source": {
                        "transcript_id": m["transcript_id"],
                        "filename": m["filename"],
                        "type": m["source_type"],
                    },
                }
            )

        conn.close()
        timeline_events.sort(key=lambda x: x["event_date"])
        return timeline_events

    def get_entity_timeline_summary(self, project_id: str) -> dict:
        conn = self.get_conn()

        daily_stats = conn.execute(
            """SELECT DATE(t.created_at) as date, COUNT(*) as count
               FROM entity_mentions m
               JOIN transcripts t ON m.transcript_id = t.id
               WHERE t.project_id = ? GROUP BY DATE(t.created_at) ORDER BY date""",
            (project_id,),
        ).fetchall()

        entity_stats = conn.execute(
            """SELECT e.name, e.type, COUNT(m.id) as mention_count,
                       MIN(t.created_at) as first_mentioned,
                       MAX(t.created_at) as last_mentioned
               FROM entities e
               LEFT JOIN entity_mentions m ON e.id = m.entity_id
               LEFT JOIN transcripts t ON m.transcript_id = t.id
               WHERE e.project_id = ?
               GROUP BY e.id ORDER BY mention_count DESC LIMIT 20""",
            (project_id,),
        ).fetchall()

        conn.close()

        return {
            "daily_activity": [dict(d) for d in daily_stats],
            "top_entities": [dict(e) for e in entity_stats],
        }

    # ==================== Phase 5: Entity Attributes ====================

    def create_attribute_template(self, template: AttributeTemplate) -> AttributeTemplate:
        conn = self.get_conn()
        now = datetime.now().isoformat()
        conn.execute(
            """INSERT INTO attribute_templates
               (id, project_id, name, type, options, default_value, description,
                is_required, sort_order, created_at, updated_at)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                template.id,
                template.project_id,
                template.name,
                template.type,
                json.dumps(template.options) if template.options else None,
                template.default_value,
                template.description,
                template.is_required,
                template.sort_order,
                now,
                now,
            ),
        )
        conn.commit()
        conn.close()
        return template

    def get_attribute_template(self, template_id: str) -> AttributeTemplate | None:
        conn = self.get_conn()
        row = conn.execute(
            "SELECT * FROM attribute_templates WHERE id = ?", (template_id,)
        ).fetchone()
        conn.close()
        if row:
            data = dict(row)
            data["options"] = json.loads(data["options"]) if data["options"] else []
            return AttributeTemplate(**data)
        return None

    def list_attribute_templates(self, project_id: str) -> list[AttributeTemplate]:
        conn = self.get_conn()
        rows = conn.execute(
            """SELECT * FROM attribute_templates WHERE project_id = ?
               ORDER BY sort_order, created_at""",
            (project_id,),
        ).fetchall()
        conn.close()

        templates = []
        for row in rows:
            data = dict(row)
            data["options"] = json.loads(data["options"]) if data["options"] else []
            templates.append(AttributeTemplate(**data))
        return templates

    def update_attribute_template(self, template_id: str, **kwargs) -> AttributeTemplate | None:
        conn = self.get_conn()
        allowed_fields = [
            "name",
            "type",
            "options",
            "default_value",
            "description",
            "is_required",
            "sort_order",
        ]
        updates = []
        values = []

        for field in allowed_fields:
            if field in kwargs:
                updates.append(f"{field} = ?")
                if field == "options":
                    values.append(json.dumps(kwargs[field]) if kwargs[field] else None)
                else:
                    values.append(kwargs[field])

        if updates:
            updates.append("updated_at = ?")
            values.append(datetime.now().isoformat())
            values.append(template_id)
            query = f"UPDATE attribute_templates SET {', '.join(updates)} WHERE id = ?"
            conn.execute(query, values)
            conn.commit()

        conn.close()
        return self.get_attribute_template(template_id)

    def delete_attribute_template(self, template_id: str):
        conn = self.get_conn()
        conn.execute("DELETE FROM attribute_templates WHERE id = ?", (template_id,))
        conn.commit()
        conn.close()

    def set_entity_attribute(
        self, attr: EntityAttribute, changed_by: str = "system", change_reason: str = ""
    ) -> EntityAttribute:
        conn = self.get_conn()
        now = datetime.now().isoformat()

        old_row = conn.execute(
            "SELECT value FROM entity_attributes WHERE entity_id = ? AND template_id = ?",
            (attr.entity_id, attr.template_id),
        ).fetchone()
        old_value = old_row["value"] if old_row else None

        if old_value != attr.value:
            conn.execute(
                """INSERT INTO attribute_history
                (id, entity_id, template_id, old_value, new_value,
                changed_by, changed_at, change_reason)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    str(uuid.uuid4())[:UUID_LENGTH],
                    attr.entity_id,
                    attr.template_id,
                    old_value,
                    attr.value,
                    changed_by,
                    now,
                    change_reason,
                ),
            )

        conn.execute(
            """INSERT OR REPLACE INTO entity_attributes
            (id, entity_id, template_id, value, created_at, updated_at)
            VALUES (
                COALESCE(
                    (SELECT id FROM entity_attributes
                    WHERE entity_id = ? AND template_id = ?), ?
                ),
                ?, ?, ?,
                COALESCE(
                    (SELECT created_at FROM entity_attributes
                    WHERE entity_id = ? AND template_id = ?), ?
                ),
                ?)""",
            (
                attr.entity_id,
                attr.template_id,
                attr.id,
                attr.entity_id,
                attr.template_id,
                attr.value,
                attr.entity_id,
                attr.template_id,
                now,
                now,
            ),
        )

        conn.commit()
        conn.close()
        return attr

    def get_entity_attributes(self, entity_id: str) -> list[EntityAttribute]:
        conn = self.get_conn()
        rows = conn.execute(
            """SELECT ea.*, at.name as template_name, at.type as template_type
               FROM entity_attributes ea
               LEFT JOIN attribute_templates at ON ea.template_id = at.id
               WHERE ea.entity_id = ? ORDER BY ea.created_at""",
            (entity_id,),
        ).fetchall()
        conn.close()
        return [EntityAttribute(**dict(r)) for r in rows]

    def get_entity_with_attributes(self, entity_id: str) -> Entity | None:
        entity = self.get_entity(entity_id)
        if not entity:
            return None
        attrs = self.get_entity_attributes(entity_id)
        entity.attributes = {
            attr.template_name: {
                "value": attr.value,
                "type": attr.template_type,
                "template_id": attr.template_id,
            }
            for attr in attrs
        }
        return entity

    def delete_entity_attribute(
        self, entity_id: str, template_id: str, changed_by: str = "system", change_reason: str = ""
    ):
        conn = self.get_conn()
        old_row = conn.execute(
            """SELECT value FROM entity_attributes
            WHERE entity_id = ? AND template_id = ?""",
            (entity_id, template_id),
        ).fetchone()

        if old_row:
            conn.execute(
                """INSERT INTO attribute_history
                (id, entity_id, template_id, old_value, new_value,
                changed_by, changed_at, change_reason)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    str(uuid.uuid4())[:UUID_LENGTH],
                    entity_id,
                    template_id,
                    old_row["value"],
                    None,
                    changed_by,
                    datetime.now().isoformat(),
                    change_reason or "属性删除",
                ),
            )
            conn.execute(
                "DELETE FROM entity_attributes WHERE entity_id = ? AND template_id = ?",
                (entity_id, template_id),
            )
            conn.commit()
        conn.close()

    def get_attribute_history(
        self, entity_id: str = None, template_id: str = None, limit: int = 50
    ) -> list[AttributeHistory]:
        conn = self.get_conn()
        conditions = []
        params = []

        if entity_id:
            conditions.append("ah.entity_id = ?")
            params.append(entity_id)
        if template_id:
            conditions.append("ah.template_id = ?")
            params.append(template_id)

        where_clause = " AND ".join(conditions) if conditions else "1=1"

        rows = conn.execute(
            f"""SELECT ah.*
                FROM attribute_history ah
                WHERE {where_clause}
                ORDER BY ah.changed_at DESC LIMIT ?""",
            params + [limit],
        ).fetchall()
        conn.close()
        return [AttributeHistory(**dict(r)) for r in rows]

    def search_entities_by_attributes(
        self, project_id: str, attribute_filters: dict[str, str]
    ) -> list[Entity]:
        entities = self.list_project_entities(project_id)
        if not attribute_filters:
            return entities

        entity_ids = [e.id for e in entities]
        if not entity_ids:
            return []

        conn = self.get_conn()
        placeholders = ",".join(["?" for _ in entity_ids])
        rows = conn.execute(
            f"""SELECT ea.*, at.name as template_name
                FROM entity_attributes ea
                JOIN attribute_templates at ON ea.template_id = at.id
                WHERE ea.entity_id IN ({placeholders})""",
            entity_ids,
        ).fetchall()
        conn.close()

        entity_attrs = {}
        for row in rows:
            eid = row["entity_id"]
            if eid not in entity_attrs:
                entity_attrs[eid] = {}
            entity_attrs[eid][row["template_name"]] = row["value"]

        filtered = []
        for entity in entities:
            attrs = entity_attrs.get(entity.id, {})
            match = True
            for attr_name, attr_value in attribute_filters.items():
                if attrs.get(attr_name) != attr_value:
                    match = False
                    break
            if match:
                entity.attributes = attrs
                filtered.append(entity)
        return filtered

    # ==================== Phase 7: Multimodal Support ====================

    def create_video(
        self,
        video_id: str,
        project_id: str,
        filename: str,
        duration: float = 0,
        fps: float = 0,
        resolution: dict = None,
        audio_transcript_id: str = None,
        full_ocr_text: str = "",
        extracted_entities: list[dict] = None,
        extracted_relations: list[dict] = None,
    ) -> str:
        """创建视频记录"""
        conn = self.get_conn()
        now = datetime.now().isoformat()

        conn.execute(
            """INSERT INTO videos
               (id, project_id, filename, duration, fps, resolution,
                audio_transcript_id, full_ocr_text, extracted_entities,
                extracted_relations, status, created_at, updated_at)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                video_id,
                project_id,
                filename,
                duration,
                fps,
                json.dumps(resolution) if resolution else None,
                audio_transcript_id,
                full_ocr_text,
                json.dumps(extracted_entities or []),
                json.dumps(extracted_relations or []),
                "completed",
                now,
                now,
            ),
        )
        conn.commit()
        conn.close()
        return video_id

    def get_video(self, video_id: str) -> dict | None:
        """获取视频信息"""
        conn = self.get_conn()
        row = conn.execute("SELECT * FROM videos WHERE id = ?", (video_id,)).fetchone()
        conn.close()

        if row:
            data = dict(row)
            data["resolution"] = json.loads(data["resolution"]) if data["resolution"] else None
            data["extracted_entities"] = (
                json.loads(data["extracted_entities"]) if data["extracted_entities"] else []
            )
            data["extracted_relations"] = (
                json.loads(data["extracted_relations"]) if data["extracted_relations"] else []
            )
            return data
        return None

    def list_project_videos(self, project_id: str) -> list[dict]:
        """获取项目的所有视频"""
        conn = self.get_conn()
        rows = conn.execute(
            "SELECT * FROM videos WHERE project_id = ? ORDER BY created_at DESC", (project_id,)
        ).fetchall()
        conn.close()

        videos = []
        for row in rows:
            data = dict(row)
            data["resolution"] = json.loads(data["resolution"]) if data["resolution"] else None
            data["extracted_entities"] = (
                json.loads(data["extracted_entities"]) if data["extracted_entities"] else []
            )
            data["extracted_relations"] = (
                json.loads(data["extracted_relations"]) if data["extracted_relations"] else []
            )
            videos.append(data)
        return videos

    def create_video_frame(
        self,
        frame_id: str,
        video_id: str,
        frame_number: int,
        timestamp: float,
        image_url: str = None,
        ocr_text: str = None,
        extracted_entities: list[dict] = None,
    ) -> str:
        """创建视频帧记录"""
        conn = self.get_conn()
        now = datetime.now().isoformat()

        conn.execute(
            """INSERT INTO video_frames
            (id, video_id, frame_number, timestamp, image_url, ocr_text,
            extracted_entities, created_at)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                frame_id,
                video_id,
                frame_number,
                timestamp,
                image_url,
                ocr_text,
                json.dumps(extracted_entities or []),
                now,
            ),
        )
        conn.commit()
        conn.close()
        return frame_id

    def get_video_frames(self, video_id: str) -> list[dict]:
        """获取视频的所有帧"""
        conn = self.get_conn()
        rows = conn.execute(
            """SELECT * FROM video_frames WHERE video_id = ? ORDER BY timestamp""", (video_id,)
        ).fetchall()
        conn.close()

        frames = []
        for row in rows:
            data = dict(row)
            data["extracted_entities"] = (
                json.loads(data["extracted_entities"]) if data["extracted_entities"] else []
            )
            frames.append(data)
        return frames

    def create_image(
        self,
        image_id: str,
        project_id: str,
        filename: str,
        ocr_text: str = "",
        description: str = "",
        extracted_entities: list[dict] = None,
        extracted_relations: list[dict] = None,
    ) -> str:
        """创建图片记录"""
        conn = self.get_conn()
        now = datetime.now().isoformat()

        conn.execute(
            """INSERT INTO images
               (id, project_id, filename, ocr_text, description,
                extracted_entities, extracted_relations, status, created_at, updated_at)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                image_id,
                project_id,
                filename,
                ocr_text,
                description,
                json.dumps(extracted_entities or []),
                json.dumps(extracted_relations or []),
                "completed",
                now,
                now,
            ),
        )
        conn.commit()
        conn.close()
        return image_id

    def get_image(self, image_id: str) -> dict | None:
        """获取图片信息"""
        conn = self.get_conn()
        row = conn.execute("SELECT * FROM images WHERE id = ?", (image_id,)).fetchone()
        conn.close()

        if row:
            data = dict(row)
            data["extracted_entities"] = (
                json.loads(data["extracted_entities"]) if data["extracted_entities"] else []
            )
            data["extracted_relations"] = (
                json.loads(data["extracted_relations"]) if data["extracted_relations"] else []
            )
            return data
        return None

    def list_project_images(self, project_id: str) -> list[dict]:
        """获取项目的所有图片"""
        conn = self.get_conn()
        rows = conn.execute(
            "SELECT * FROM images WHERE project_id = ? ORDER BY created_at DESC", (project_id,)
        ).fetchall()
        conn.close()

        images = []
        for row in rows:
            data = dict(row)
            data["extracted_entities"] = (
                json.loads(data["extracted_entities"]) if data["extracted_entities"] else []
            )
            data["extracted_relations"] = (
                json.loads(data["extracted_relations"]) if data["extracted_relations"] else []
            )
            images.append(data)
        return images

    def create_multimodal_mention(
        self,
        mention_id: str,
        project_id: str,
        entity_id: str,
        modality: str,
        source_id: str,
        source_type: str,
        text_snippet: str = "",
        confidence: float = 1.0,
    ) -> str:
        """创建多模态实体提及记录"""
        conn = self.get_conn()
        now = datetime.now().isoformat()

        conn.execute(
            """INSERT OR REPLACE INTO multimodal_mentions
               (id, project_id, entity_id, modality, source_id, source_type,
                text_snippet, confidence, created_at)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                mention_id,
                project_id,
                entity_id,
                modality,
                source_id,
                source_type,
                text_snippet,
                confidence,
                now,
            ),
        )
        conn.commit()
        conn.close()
        return mention_id

    def get_entity_multimodal_mentions(self, entity_id: str) -> list[dict]:
        """获取实体的多模态提及"""
        conn = self.get_conn()
        rows = conn.execute(
            """SELECT m.*, e.name as entity_name
               FROM multimodal_mentions m
               JOIN entities e ON m.entity_id = e.id
               WHERE m.entity_id = ? ORDER BY m.created_at DESC""",
            (entity_id,),
        ).fetchall()
        conn.close()
        return [dict(r) for r in rows]

    def get_project_multimodal_mentions(self, project_id: str, modality: str = None) -> list[dict]:
        """获取项目的多模态提及"""
        conn = self.get_conn()

        if modality:
            rows = conn.execute(
                """SELECT m.*, e.name as entity_name
                   FROM multimodal_mentions m
                   JOIN entities e ON m.entity_id = e.id
                   WHERE m.project_id = ? AND m.modality = ?
                   ORDER BY m.created_at DESC""",
                (project_id, modality),
            ).fetchall()
        else:
            rows = conn.execute(
                """SELECT m.*, e.name as entity_name
                   FROM multimodal_mentions m
                   JOIN entities e ON m.entity_id = e.id
                   WHERE m.project_id = ? ORDER BY m.created_at DESC""",
                (project_id,),
            ).fetchall()

        conn.close()
        return [dict(r) for r in rows]

    def create_multimodal_entity_link(
        self,
        link_id: str,
        entity_id: str,
        linked_entity_id: str,
        link_type: str,
        confidence: float = 1.0,
        evidence: str = "",
        modalities: list[str] = None,
    ) -> str:
        """创建多模态实体关联"""
        conn = self.get_conn()
        now = datetime.now().isoformat()

        conn.execute(
            """INSERT OR REPLACE INTO multimodal_entity_links
               (id, entity_id, linked_entity_id, link_type, confidence,
                evidence, modalities, created_at)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                link_id,
                entity_id,
                linked_entity_id,
                link_type,
                confidence,
                evidence,
                json.dumps(modalities or []),
                now,
            ),
        )
        conn.commit()
        conn.close()
        return link_id

    def get_entity_multimodal_links(self, entity_id: str) -> list[dict]:
        """获取实体的多模态关联"""
        conn = self.get_conn()
        rows = conn.execute(
            """SELECT l.*, e1.name as entity_name, e2.name as linked_entity_name
               FROM multimodal_entity_links l
               JOIN entities e1 ON l.entity_id = e1.id
               JOIN entities e2 ON l.linked_entity_id = e2.id
               WHERE l.entity_id = ? OR l.linked_entity_id = ?""",
            (entity_id, entity_id),
        ).fetchall()
        conn.close()

        links = []
        for row in rows:
            data = dict(row)
            data["modalities"] = json.loads(data["modalities"]) if data["modalities"] else []
            links.append(data)
        return links

    def get_project_multimodal_stats(self, project_id: str) -> dict:
        """获取项目多模态统计信息"""
        conn = self.get_conn()

        stats = {
            "video_count": 0,
            "image_count": 0,
            "multimodal_entity_count": 0,
            "cross_modal_links": 0,
            "modality_distribution": {},
        }

        # 视频数量
        row = conn.execute(
            "SELECT COUNT(*) as count FROM videos WHERE project_id = ?", (project_id,)
        ).fetchone()
        stats["video_count"] = row["count"]

        # 图片数量
        row = conn.execute(
            "SELECT COUNT(*) as count FROM images WHERE project_id = ?", (project_id,)
        ).fetchone()
        stats["image_count"] = row["count"]

        # 多模态实体数量
        row = conn.execute(
            """SELECT COUNT(DISTINCT entity_id) as count
               FROM multimodal_mentions WHERE project_id = ?""",
            (project_id,),
        ).fetchone()
        stats["multimodal_entity_count"] = row["count"]

        # 跨模态关联数量
        row = conn.execute(
            """SELECT COUNT(*) as count FROM multimodal_entity_links
               WHERE entity_id IN (SELECT id FROM entities WHERE project_id = ?)""",
            (project_id,),
        ).fetchone()
        stats["cross_modal_links"] = row["count"]

        # 模态分布
        for modality in ["audio", "video", "image", "document"]:
            row = conn.execute(
                """SELECT COUNT(*) as count FROM multimodal_mentions
                   WHERE project_id = ? AND modality = ?""",
                (project_id, modality),
            ).fetchone()
            stats["modality_distribution"][modality] = row["count"]

        conn.close()
        return stats


# Singleton instance
_db_manager = None


def get_db_manager() -> DatabaseManager:
    global _db_manager
    if _db_manager is None:
        _db_manager = DatabaseManager()
    return _db_manager