Files
insightflow/backend/schema.sql
2026-02-23 00:15:32 +08:00

225 lines
8.5 KiB
SQL

-- InsightFlow Phase 3 - Database Schema
-- 支持知识生长与多文件融合
-- 项目表
CREATE TABLE IF NOT EXISTS projects (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
description TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 文件/转录表
CREATE TABLE IF NOT EXISTS transcripts (
id TEXT PRIMARY KEY,
project_id TEXT NOT NULL,
filename TEXT,
full_text TEXT,
type TEXT DEFAULT 'audio', -- 'audio' 或 'document'
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (project_id) REFERENCES projects(id)
);
-- 全局实体表(跨文件共享)
CREATE TABLE IF NOT EXISTS entities (
id TEXT PRIMARY KEY,
project_id TEXT NOT NULL,
name TEXT NOT NULL,
canonical_name TEXT, -- 规范名称(用于对齐)
type TEXT,
definition TEXT,
aliases TEXT, -- JSON 数组:["别名1", "别名2"]
embedding TEXT, -- JSON 数组:实体名称+定义的 embedding
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (project_id) REFERENCES projects(id)
);
-- 实体提及表(文件中的具体位置)
CREATE TABLE IF NOT EXISTS entity_mentions (
id TEXT PRIMARY KEY,
entity_id TEXT NOT NULL,
transcript_id TEXT NOT NULL,
start_pos INTEGER,
end_pos INTEGER,
text_snippet TEXT,
confidence REAL DEFAULT 1.0,
FOREIGN KEY (entity_id) REFERENCES entities(id),
FOREIGN KEY (transcript_id) REFERENCES transcripts(id)
);
-- 实体关系表
CREATE TABLE IF NOT EXISTS entity_relations (
id TEXT PRIMARY KEY,
project_id TEXT NOT NULL,
source_entity_id TEXT NOT NULL,
target_entity_id TEXT NOT NULL,
relation_type TEXT, -- "belongs_to", "works_with", "depends_on" 等
evidence TEXT, -- 关系来源文本
transcript_id TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (project_id) REFERENCES projects(id),
FOREIGN KEY (source_entity_id) REFERENCES entities(id),
FOREIGN KEY (target_entity_id) REFERENCES entities(id)
);
-- 术语表(项目级热词,用于 ASR 优化)
CREATE TABLE IF NOT EXISTS glossary (
id TEXT PRIMARY KEY,
project_id TEXT NOT NULL,
term TEXT NOT NULL,
pronunciation TEXT, -- 发音提示,如 "K8s" -> "Kubernetes"
frequency INTEGER DEFAULT 1,
FOREIGN KEY (project_id) REFERENCES projects(id)
);
-- Phase 5: 属性模板表
CREATE TABLE IF NOT EXISTS attribute_templates (
id TEXT PRIMARY KEY,
project_id TEXT NOT NULL,
name TEXT NOT NULL,
type TEXT NOT NULL, -- text/number/date/select/multiselect/boolean
description TEXT,
options TEXT, -- JSON 数组,用于 select/multiselect 类型
is_required INTEGER DEFAULT 0,
default_value TEXT,
sort_order INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (project_id) REFERENCES projects(id)
);
-- Phase 5: 实体属性值表
CREATE TABLE IF NOT EXISTS entity_attributes (
id TEXT PRIMARY KEY,
entity_id TEXT NOT NULL,
template_id TEXT,
name TEXT NOT NULL,
type TEXT NOT NULL, -- text/number/date/select/multiselect
value TEXT, -- 存储实际值
options TEXT, -- JSON 数组,用于 select/multiselect
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE,
FOREIGN KEY (template_id) REFERENCES attribute_templates(id) ON DELETE SET NULL,
UNIQUE(entity_id, name)
);
-- Phase 5: 属性变更历史表
CREATE TABLE IF NOT EXISTS attribute_history (
id TEXT PRIMARY KEY,
entity_id TEXT NOT NULL,
template_id TEXT,
attribute_name TEXT NOT NULL,
old_value TEXT,
new_value TEXT,
changed_by TEXT, -- 用户ID或系统
changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
change_reason TEXT,
FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE,
FOREIGN KEY (template_id) REFERENCES attribute_templates(id) ON DELETE CASCADE
);
-- 创建索引以提高查询性能
CREATE INDEX IF NOT EXISTS idx_entities_project ON entities(project_id);
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name);
CREATE INDEX IF NOT EXISTS idx_transcripts_project ON transcripts(project_id);
CREATE INDEX IF NOT EXISTS idx_mentions_entity ON entity_mentions(entity_id);
CREATE INDEX IF NOT EXISTS idx_mentions_transcript ON entity_mentions(transcript_id);
CREATE INDEX IF NOT EXISTS idx_relations_project ON entity_relations(project_id);
CREATE INDEX IF NOT EXISTS idx_glossary_project ON glossary(project_id);
-- Phase 5: 属性相关索引
CREATE INDEX IF NOT EXISTS idx_attr_templates_project ON attribute_templates(project_id);
CREATE INDEX IF NOT EXISTS idx_entity_attributes_entity ON entity_attributes(entity_id);
CREATE INDEX IF NOT EXISTS idx_entity_attributes_template ON entity_attributes(template_id);
CREATE INDEX IF NOT EXISTS idx_attr_history_entity ON attribute_history(entity_id);
-- Phase 7: 工作流相关表
-- 工作流配置表
CREATE TABLE IF NOT EXISTS workflows (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
description TEXT,
workflow_type TEXT NOT NULL, -- auto_analyze, auto_align, auto_relation, scheduled_report, custom
project_id TEXT NOT NULL,
status TEXT DEFAULT 'active', -- active, paused, error, completed
schedule TEXT, -- cron expression or interval minutes
schedule_type TEXT DEFAULT 'manual', -- manual, cron, interval
config TEXT, -- JSON: workflow specific configuration
webhook_ids TEXT, -- JSON array of webhook config IDs
is_active BOOLEAN DEFAULT 1,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_run_at TIMESTAMP,
next_run_at TIMESTAMP,
run_count INTEGER DEFAULT 0,
success_count INTEGER DEFAULT 0,
fail_count INTEGER DEFAULT 0,
FOREIGN KEY (project_id) REFERENCES projects(id)
);
-- 工作流任务表
CREATE TABLE IF NOT EXISTS workflow_tasks (
id TEXT PRIMARY KEY,
workflow_id TEXT NOT NULL,
name TEXT NOT NULL,
task_type TEXT NOT NULL, -- analyze, align, discover_relations, notify, custom
config TEXT, -- JSON: task specific configuration
task_order INTEGER DEFAULT 0,
depends_on TEXT, -- JSON array of task IDs
timeout_seconds INTEGER DEFAULT 300,
retry_count INTEGER DEFAULT 3,
retry_delay INTEGER DEFAULT 5,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
);
-- Webhook 配置表
CREATE TABLE IF NOT EXISTS webhook_configs (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
webhook_type TEXT NOT NULL, -- feishu, dingtalk, slack, custom
url TEXT NOT NULL,
secret TEXT, -- for signature verification
headers TEXT, -- JSON: custom headers
template TEXT, -- message template
is_active BOOLEAN DEFAULT 1,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_used_at TIMESTAMP,
success_count INTEGER DEFAULT 0,
fail_count INTEGER DEFAULT 0
);
-- 工作流执行日志表
CREATE TABLE IF NOT EXISTS workflow_logs (
id TEXT PRIMARY KEY,
workflow_id TEXT NOT NULL,
task_id TEXT, -- NULL if workflow-level log
status TEXT DEFAULT 'pending', -- pending, running, success, failed, cancelled
start_time TIMESTAMP,
end_time TIMESTAMP,
duration_ms INTEGER,
input_data TEXT, -- JSON: input parameters
output_data TEXT, -- JSON: execution results
error_message TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE,
FOREIGN KEY (task_id) REFERENCES workflow_tasks(id) ON DELETE SET NULL
);
-- Phase 7: 工作流相关索引
CREATE INDEX IF NOT EXISTS idx_workflows_project ON workflows(project_id);
CREATE INDEX IF NOT EXISTS idx_workflows_status ON workflows(status);
CREATE INDEX IF NOT EXISTS idx_workflows_type ON workflows(workflow_type);
CREATE INDEX IF NOT EXISTS idx_workflow_tasks_workflow ON workflow_tasks(workflow_id);
CREATE INDEX IF NOT EXISTS idx_workflow_logs_workflow ON workflow_logs(workflow_id);
CREATE INDEX IF NOT EXISTS idx_workflow_logs_task ON workflow_logs(task_id);
CREATE INDEX IF NOT EXISTS idx_workflow_logs_status ON workflow_logs(status);
CREATE INDEX IF NOT EXISTS idx_workflow_logs_created ON workflow_logs(created_at);