Phase 7 Task 6 & 8: 高级搜索与发现 + 性能优化与扩展
- 新增 search_manager.py 搜索管理模块
- FullTextSearch: 全文搜索引擎 (FTS5)
- SemanticSearch: 语义搜索引擎 (sentence-transformers)
- EntityPathDiscovery: 实体关系路径发现 (BFS/DFS)
- KnowledgeGapDetector: 知识缺口检测器
- 新增 performance_manager.py 性能管理模块
- CacheManager: Redis 缓存层 (支持内存回退)
- DatabaseSharding: 数据库分片管理
- TaskQueue: 异步任务队列 (Celery + Redis)
- PerformanceMonitor: 性能监控器
- 更新 schema.sql 添加新表
- search_indexes, embeddings, fts_transcripts
- cache_stats, task_queue, performance_metrics, shard_mappings
- 更新 main.py 添加 API 端点
- 搜索: /search/fulltext, /search/semantic, /entities/{id}/paths
- 性能: /cache/stats, /performance/metrics, /tasks, /health
- 更新 requirements.txt 添加依赖
- sentence-transformers==2.5.1
- redis==5.0.1
- celery==5.3.6
- 创建测试脚本和文档
- test_phase7_task6_8.py
- docs/PHASE7_TASK6_8_SUMMARY.md
Phase 7 全部完成!
This commit is contained in:
@@ -725,3 +725,123 @@ CREATE INDEX IF NOT EXISTS idx_change_history_entity ON change_history(entity_ty
|
||||
CREATE INDEX IF NOT EXISTS idx_change_history_session ON change_history(session_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_team_members_project ON team_members(project_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_team_members_user ON team_members(user_id);
|
||||
|
||||
-- ============================================
|
||||
-- Phase 7 Task 6: 高级搜索与发现
|
||||
-- ============================================
|
||||
|
||||
-- 搜索索引表
|
||||
CREATE TABLE IF NOT EXISTS search_indexes (
|
||||
id TEXT PRIMARY KEY,
|
||||
project_id TEXT NOT NULL,
|
||||
content_type TEXT NOT NULL, -- transcript, entity, relation
|
||||
content_id TEXT NOT NULL,
|
||||
content_text TEXT NOT NULL,
|
||||
tokens TEXT, -- JSON array of tokens
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (project_id) REFERENCES projects(id),
|
||||
UNIQUE(project_id, content_type, content_id)
|
||||
);
|
||||
|
||||
-- 文本 Embedding 表
|
||||
CREATE TABLE IF NOT EXISTS embeddings (
|
||||
id TEXT PRIMARY KEY,
|
||||
project_id TEXT NOT NULL,
|
||||
content_type TEXT NOT NULL, -- transcript, entity
|
||||
content_id TEXT NOT NULL,
|
||||
text TEXT NOT NULL,
|
||||
embedding TEXT NOT NULL, -- JSON array of floats
|
||||
model TEXT NOT NULL, -- 使用的模型名称
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (project_id) REFERENCES projects(id),
|
||||
UNIQUE(project_id, content_type, content_id)
|
||||
);
|
||||
|
||||
-- 全文搜索虚拟表 (FTS5)
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS fts_transcripts USING fts5(
|
||||
content_text,
|
||||
content_id UNINDEXED,
|
||||
project_id UNINDEXED,
|
||||
content_type UNINDEXED
|
||||
);
|
||||
|
||||
-- 搜索相关索引
|
||||
CREATE INDEX IF NOT EXISTS idx_search_indexes_project ON search_indexes(project_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_search_indexes_type ON search_indexes(content_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_search_indexes_content ON search_indexes(content_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_embeddings_project ON embeddings(project_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_embeddings_type ON embeddings(content_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_embeddings_content ON embeddings(content_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_embeddings_model ON embeddings(model);
|
||||
|
||||
-- ============================================
|
||||
-- Phase 7 Task 8: 性能优化与扩展
|
||||
-- ============================================
|
||||
|
||||
-- 缓存统计表
|
||||
CREATE TABLE IF NOT EXISTS cache_stats (
|
||||
id TEXT PRIMARY KEY,
|
||||
stat_date DATE NOT NULL,
|
||||
cache_type TEXT NOT NULL, -- redis, memory
|
||||
total_keys INTEGER DEFAULT 0,
|
||||
memory_usage INTEGER DEFAULT 0, -- bytes
|
||||
hit_count INTEGER DEFAULT 0,
|
||||
miss_count INTEGER DEFAULT 0,
|
||||
evicted_count INTEGER DEFAULT 0,
|
||||
expired_count INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(stat_date, cache_type)
|
||||
);
|
||||
|
||||
-- 任务队列表
|
||||
CREATE TABLE IF NOT EXISTS task_queue (
|
||||
id TEXT PRIMARY KEY,
|
||||
task_type TEXT NOT NULL, -- audio_analysis, report_generation, entity_extraction
|
||||
status TEXT NOT NULL, -- pending, running, success, failure, retry, revoked
|
||||
project_id TEXT NOT NULL,
|
||||
params TEXT NOT NULL, -- JSON
|
||||
result TEXT, -- JSON
|
||||
error TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
started_at TIMESTAMP,
|
||||
completed_at TIMESTAMP,
|
||||
retry_count INTEGER DEFAULT 0,
|
||||
priority INTEGER DEFAULT 5, -- 0=high, 5=normal, 10=low
|
||||
worker_id TEXT, -- 执行任务的 worker
|
||||
FOREIGN KEY (project_id) REFERENCES projects(id)
|
||||
);
|
||||
|
||||
-- 性能指标表
|
||||
CREATE TABLE IF NOT EXISTS performance_metrics (
|
||||
id TEXT PRIMARY KEY,
|
||||
metric_type TEXT NOT NULL, -- api_response, db_query, cache_operation
|
||||
endpoint TEXT NOT NULL,
|
||||
duration_ms REAL NOT NULL,
|
||||
status_code INTEGER,
|
||||
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
metadata TEXT -- JSON: 额外信息
|
||||
);
|
||||
|
||||
-- 数据库分片映射表
|
||||
CREATE TABLE IF NOT EXISTS shard_mappings (
|
||||
id TEXT PRIMARY KEY,
|
||||
project_id TEXT NOT NULL UNIQUE,
|
||||
shard_id TEXT NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
migrated_at TIMESTAMP,
|
||||
FOREIGN KEY (project_id) REFERENCES projects(id)
|
||||
);
|
||||
|
||||
-- 性能相关索引
|
||||
CREATE INDEX IF NOT EXISTS idx_cache_stats_date ON cache_stats(stat_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_task_queue_project ON task_queue(project_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_task_queue_status ON task_queue(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_task_queue_type ON task_queue(task_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_task_queue_created ON task_queue(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_metrics_type ON performance_metrics(metric_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_metrics_endpoint ON performance_metrics(endpoint);
|
||||
CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON performance_metrics(timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_shard_mappings_project ON shard_mappings(project_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_shard_mappings_shard ON shard_mappings(shard_id);
|
||||
|
||||
Reference in New Issue
Block a user