From 6318cd0af96b579946e0dad89c01455c4208c5f4 Mon Sep 17 00:00:00 2001 From: OpenClaw Bot Date: Fri, 20 Feb 2026 06:06:23 +0800 Subject: [PATCH] =?UTF-8?q?Phase=205:=20=E5=AE=8C=E6=88=90=E5=AF=BC?= =?UTF-8?q?=E5=87=BA=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 export_manager.py 导出管理模块 - 知识图谱导出 SVG/PNG - 实体数据导出 Excel/CSV - 关系数据导出 CSV - 项目报告导出 PDF - 转录文本导出 Markdown - 项目完整数据导出 JSON - 前端添加导出面板和功能 - 更新依赖: pandas, openpyxl, reportlab, cairosvg --- STATUS.md | 16 +- backend/export_manager.py | 572 ++++++++++++++++++++++++++++++++++++++ backend/main.py | 412 +++++++++++++++++++++++++++ backend/requirements.txt | 6 + frontend/app.js | 216 ++++++++++++++ frontend/workbench.html | 144 ++++++++++ 6 files changed, 1365 insertions(+), 1 deletion(-) create mode 100644 backend/export_manager.py diff --git a/STATUS.md b/STATUS.md index 308cc28..1a8f9d0 100644 --- a/STATUS.md +++ b/STATUS.md @@ -121,11 +121,13 @@ Phase 5: 高级功能 - **进行中 🚧** - [x] 知识推理与问答增强 ✅ (2026-02-19 完成) - [x] 实体属性扩展 ✅ (2026-02-20 完成) - [x] 时间线视图 ✅ (2026-02-19 完成) -- [ ] 导出功能 +- [x] 导出功能 ✅ (2026-02-20 完成) - 知识图谱导出 PNG/SVG - 项目报告导出 PDF - 实体数据导出 Excel/CSV + - 关系数据导出 CSV - 转录文本导出 Markdown + - 项目完整数据导出 JSON - [ ] 协作功能 - 多用户支持 - 项目权限管理 @@ -147,6 +149,18 @@ Phase 5: 高级功能 - **进行中 🚧** ## 最近更新 +### 2026-02-20 (晚间) +- 完成 Phase 5 导出功能 + - 新增 export_manager.py 导出管理模块 + - 知识图谱导出 SVG/PNG (支持矢量图和图片格式) + - 实体数据导出 Excel/CSV (包含所有自定义属性) + - 关系数据导出 CSV + - 项目报告导出 PDF (包含统计、实体列表、关系列表) + - 转录文本导出 Markdown (带实体标注) + - 项目完整数据导出 JSON (备份/迁移用) + - 前端知识库面板添加导出入口 + - 新增依赖: pandas, openpyxl, reportlab, cairosvg + ### 2026-02-20 - 完成 Phase 5 实体属性扩展功能 - 数据库层: diff --git a/backend/export_manager.py b/backend/export_manager.py new file mode 100644 index 0000000..8dad828 --- /dev/null +++ b/backend/export_manager.py @@ -0,0 +1,572 @@ +""" +InsightFlow Export Module - Phase 5 +支持导出知识图谱、项目报告、实体数据和转录文本 +""" + +import os +import io +import json +import base64 +from datetime import datetime +from typing import List, Dict, Optional, Any +from dataclasses import dataclass + +try: + import pandas as pd + PANDAS_AVAILABLE = True +except ImportError: + PANDAS_AVAILABLE = False + +try: + from reportlab.lib import colors + from reportlab.lib.pagesizes import A4 + from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle + from reportlab.lib.units import inch + from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak + from reportlab.pdfbase import pdfmetrics + from reportlab.pdfbase.ttfonts import TTFont + REPORTLAB_AVAILABLE = True +except ImportError: + REPORTLAB_AVAILABLE = False + + +@dataclass +class ExportEntity: + id: str + name: str + type: str + definition: str + aliases: List[str] + mention_count: int + attributes: Dict[str, Any] + + +@dataclass +class ExportRelation: + id: str + source: str + target: str + relation_type: str + confidence: float + evidence: str + + +@dataclass +class ExportTranscript: + id: str + name: str + type: str # audio/document + content: str + segments: List[Dict] + entity_mentions: List[Dict] + + +class ExportManager: + """导出管理器 - 处理各种导出需求""" + + def __init__(self, db_manager=None): + self.db = db_manager + + def export_knowledge_graph_svg(self, project_id: str, entities: List[ExportEntity], + relations: List[ExportRelation]) -> str: + """ + 导出知识图谱为 SVG 格式 + + Returns: + SVG 字符串 + """ + # 计算布局参数 + width = 1200 + height = 800 + center_x = width / 2 + center_y = height / 2 + radius = 300 + + # 按类型分组实体 + entities_by_type = {} + for e in entities: + if e.type not in entities_by_type: + entities_by_type[e.type] = [] + entities_by_type[e.type].append(e) + + # 颜色映射 + type_colors = { + "PERSON": "#FF6B6B", + "ORGANIZATION": "#4ECDC4", + "LOCATION": "#45B7D1", + "PRODUCT": "#96CEB4", + "TECHNOLOGY": "#FFEAA7", + "EVENT": "#DDA0DD", + "CONCEPT": "#98D8C8", + "default": "#BDC3C7" + } + + # 计算实体位置 + entity_positions = {} + angle_step = 2 * 3.14159 / max(len(entities), 1) + + for i, entity in enumerate(entities): + angle = i * angle_step + x = center_x + radius * 0.8 * (i % 3 - 1) * 150 + (i // 3) * 50 + y = center_y + radius * 0.6 * ((i % 6) - 3) * 80 + entity_positions[entity.id] = (x, y) + + # 生成 SVG + svg_parts = [ + f'', + '', + ' ', + ' ', + ' ', + '', + f'', + f'知识图谱 - {project_id}', + ] + + # 绘制关系连线 + for rel in relations: + if rel.source in entity_positions and rel.target in entity_positions: + x1, y1 = entity_positions[rel.source] + x2, y2 = entity_positions[rel.target] + + # 计算箭头终点(避免覆盖节点) + dx = x2 - x1 + dy = y2 - y1 + dist = (dx**2 + dy**2) ** 0.5 + if dist > 0: + offset = 40 + x2 = x2 - dx * offset / dist + y2 = y2 - dy * offset / dist + + svg_parts.append( + f'' + ) + + # 关系标签 + mid_x = (x1 + x2) / 2 + mid_y = (y1 + y2) / 2 + svg_parts.append( + f'' + ) + svg_parts.append( + f'{rel.relation_type}' + ) + + # 绘制实体节点 + for entity in entities: + if entity.id in entity_positions: + x, y = entity_positions[entity.id] + color = type_colors.get(entity.type, type_colors["default"]) + + # 节点圆圈 + svg_parts.append( + f'' + ) + + # 实体名称 + svg_parts.append( + f'{entity.name[:8]}' + ) + + # 实体类型 + svg_parts.append( + f'{entity.type}' + ) + + # 图例 + legend_x = width - 150 + legend_y = 80 + svg_parts.append(f'') + svg_parts.append(f'实体类型') + + for i, (etype, color) in enumerate(type_colors.items()): + if etype != "default": + y_pos = legend_y + 25 + i * 20 + svg_parts.append(f'') + svg_parts.append(f'{etype}') + + svg_parts.append('') + return '\n'.join(svg_parts) + + def export_knowledge_graph_png(self, project_id: str, entities: List[ExportEntity], + relations: List[ExportRelation]) -> bytes: + """ + 导出知识图谱为 PNG 格式 + + Returns: + PNG 图像字节 + """ + try: + import cairosvg + svg_content = self.export_knowledge_graph_svg(project_id, entities, relations) + png_bytes = cairosvg.svg2png(bytestring=svg_content.encode('utf-8')) + return png_bytes + except ImportError: + # 如果没有 cairosvg,返回 SVG 的 base64 + svg_content = self.export_knowledge_graph_svg(project_id, entities, relations) + return base64.b64encode(svg_content.encode('utf-8')) + + def export_entities_excel(self, entities: List[ExportEntity]) -> bytes: + """ + 导出实体数据为 Excel 格式 + + Returns: + Excel 文件字节 + """ + if not PANDAS_AVAILABLE: + raise ImportError("pandas is required for Excel export") + + # 准备数据 + data = [] + for e in entities: + row = { + 'ID': e.id, + '名称': e.name, + '类型': e.type, + '定义': e.definition, + '别名': ', '.join(e.aliases), + '提及次数': e.mention_count + } + # 添加属性 + for attr_name, attr_value in e.attributes.items(): + row[f'属性:{attr_name}'] = attr_value + data.append(row) + + df = pd.DataFrame(data) + + # 写入 Excel + output = io.BytesIO() + with pd.ExcelWriter(output, engine='openpyxl') as writer: + df.to_excel(writer, sheet_name='实体列表', index=False) + + # 调整列宽 + worksheet = writer.sheets['实体列表'] + for column in worksheet.columns: + max_length = 0 + column_letter = column[0].column_letter + for cell in column: + try: + if len(str(cell.value)) > max_length: + max_length = len(str(cell.value)) + except: + pass + adjusted_width = min(max_length + 2, 50) + worksheet.column_dimensions[column_letter].width = adjusted_width + + return output.getvalue() + + def export_entities_csv(self, entities: List[ExportEntity]) -> str: + """ + 导出实体数据为 CSV 格式 + + Returns: + CSV 字符串 + """ + import csv + + output = io.StringIO() + + # 收集所有可能的属性列 + all_attrs = set() + for e in entities: + all_attrs.update(e.attributes.keys()) + + # 表头 + headers = ['ID', '名称', '类型', '定义', '别名', '提及次数'] + [f'属性:{a}' for a in sorted(all_attrs)] + + writer = csv.writer(output) + writer.writerow(headers) + + # 数据行 + for e in entities: + row = [e.id, e.name, e.type, e.definition, ', '.join(e.aliases), e.mention_count] + for attr in sorted(all_attrs): + row.append(e.attributes.get(attr, '')) + writer.writerow(row) + + return output.getvalue() + + def export_relations_csv(self, relations: List[ExportRelation]) -> str: + """ + 导出关系数据为 CSV 格式 + + Returns: + CSV 字符串 + """ + import csv + + output = io.StringIO() + writer = csv.writer(output) + writer.writerow(['ID', '源实体', '目标实体', '关系类型', '置信度', '证据']) + + for r in relations: + writer.writerow([r.id, r.source, r.target, r.relation_type, r.confidence, r.evidence]) + + return output.getvalue() + + def export_transcript_markdown(self, transcript: ExportTranscript, + entities_map: Dict[str, ExportEntity]) -> str: + """ + 导出转录文本为 Markdown 格式 + + Returns: + Markdown 字符串 + """ + lines = [ + f"# {transcript.name}", + "", + f"**类型**: {transcript.type}", + f"**ID**: {transcript.id}", + "", + "---", + "", + "## 内容", + "", + transcript.content, + "", + "---", + "", + ] + + if transcript.segments: + lines.extend([ + "## 分段详情", + "", + ]) + for seg in transcript.segments: + speaker = seg.get('speaker', 'Unknown') + start = seg.get('start', 0) + end = seg.get('end', 0) + text = seg.get('text', '') + lines.append(f"**[{start:.1f}s - {end:.1f}s] {speaker}**: {text}") + lines.append("") + + if transcript.entity_mentions: + lines.extend([ + "", + "## 实体提及", + "", + "| 实体 | 类型 | 位置 | 上下文 |", + "|------|------|------|--------|", + ]) + for mention in transcript.entity_mentions: + entity_id = mention.get('entity_id', '') + entity = entities_map.get(entity_id) + entity_name = entity.name if entity else mention.get('entity_name', 'Unknown') + entity_type = entity.type if entity else 'Unknown' + position = mention.get('position', '') + context = mention.get('context', '')[:50] + '...' if mention.get('context') else '' + lines.append(f"| {entity_name} | {entity_type} | {position} | {context} |") + + return '\n'.join(lines) + + def export_project_report_pdf(self, project_id: str, project_name: str, + entities: List[ExportEntity], + relations: List[ExportRelation], + transcripts: List[ExportTranscript], + summary: str = "") -> bytes: + """ + 导出项目报告为 PDF 格式 + + Returns: + PDF 文件字节 + """ + if not REPORTLAB_AVAILABLE: + raise ImportError("reportlab is required for PDF export") + + output = io.BytesIO() + doc = SimpleDocTemplate( + output, + pagesize=A4, + rightMargin=72, + leftMargin=72, + topMargin=72, + bottomMargin=18 + ) + + # 样式 + styles = getSampleStyleSheet() + title_style = ParagraphStyle( + 'CustomTitle', + parent=styles['Heading1'], + fontSize=24, + spaceAfter=30, + textColor=colors.HexColor('#2c3e50') + ) + heading_style = ParagraphStyle( + 'CustomHeading', + parent=styles['Heading2'], + fontSize=16, + spaceAfter=12, + textColor=colors.HexColor('#34495e') + ) + + story = [] + + # 标题页 + story.append(Paragraph(f"InsightFlow 项目报告", title_style)) + story.append(Paragraph(f"项目名称: {project_name}", styles['Heading2'])) + story.append(Paragraph(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M')}", styles['Normal'])) + story.append(Spacer(1, 0.3*inch)) + + # 统计概览 + story.append(Paragraph("项目概览", heading_style)) + stats_data = [ + ['指标', '数值'], + ['实体数量', str(len(entities))], + ['关系数量', str(len(relations))], + ['文档数量', str(len(transcripts))], + ] + + # 按类型统计实体 + type_counts = {} + for e in entities: + type_counts[e.type] = type_counts.get(e.type, 0) + 1 + + for etype, count in sorted(type_counts.items()): + stats_data.append([f'{etype} 实体', str(count)]) + + stats_table = Table(stats_data, colWidths=[3*inch, 2*inch]) + stats_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#34495e')), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, 0), 12), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#ecf0f1')), + ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#bdc3c7')) + ])) + story.append(stats_table) + story.append(Spacer(1, 0.3*inch)) + + # 项目总结 + if summary: + story.append(Paragraph("项目总结", heading_style)) + story.append(Paragraph(summary, styles['Normal'])) + story.append(Spacer(1, 0.3*inch)) + + # 实体列表 + if entities: + story.append(PageBreak()) + story.append(Paragraph("实体列表", heading_style)) + + entity_data = [['名称', '类型', '提及次数', '定义']] + for e in sorted(entities, key=lambda x: x.mention_count, reverse=True)[:50]: # 限制前50个 + entity_data.append([ + e.name, + e.type, + str(e.mention_count), + (e.definition[:100] + '...') if len(e.definition) > 100 else e.definition + ]) + + entity_table = Table(entity_data, colWidths=[1.5*inch, 1*inch, 1*inch, 2.5*inch]) + entity_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#34495e')), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'LEFT'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, 0), 10), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#ecf0f1')), + ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#bdc3c7')), + ('VALIGN', (0, 0), (-1, -1), 'TOP'), + ])) + story.append(entity_table) + + # 关系列表 + if relations: + story.append(PageBreak()) + story.append(Paragraph("关系列表", heading_style)) + + relation_data = [['源实体', '关系', '目标实体', '置信度']] + for r in relations[:100]: # 限制前100个 + relation_data.append([ + r.source, + r.relation_type, + r.target, + f"{r.confidence:.2f}" + ]) + + relation_table = Table(relation_data, colWidths=[2*inch, 1.5*inch, 2*inch, 1*inch]) + relation_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#34495e')), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'LEFT'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, 0), 10), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#ecf0f1')), + ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#bdc3c7')), + ])) + story.append(relation_table) + + doc.build(story) + return output.getvalue() + + def export_project_json(self, project_id: str, project_name: str, + entities: List[ExportEntity], + relations: List[ExportRelation], + transcripts: List[ExportTranscript]) -> str: + """ + 导出完整项目数据为 JSON 格式 + + Returns: + JSON 字符串 + """ + data = { + "project_id": project_id, + "project_name": project_name, + "export_time": datetime.now().isoformat(), + "entities": [ + { + "id": e.id, + "name": e.name, + "type": e.type, + "definition": e.definition, + "aliases": e.aliases, + "mention_count": e.mention_count, + "attributes": e.attributes + } + for e in entities + ], + "relations": [ + { + "id": r.id, + "source": r.source, + "target": r.target, + "relation_type": r.relation_type, + "confidence": r.confidence, + "evidence": r.evidence + } + for r in relations + ], + "transcripts": [ + { + "id": t.id, + "name": t.name, + "type": t.type, + "content": t.content, + "segments": t.segments + } + for t in transcripts + ] + } + + return json.dumps(data, ensure_ascii=False, indent=2) + + +# 全局导出管理器实例 +_export_manager = None + +def get_export_manager(db_manager=None): + """获取导出管理器实例""" + global _export_manager + if _export_manager is None: + _export_manager = ExportManager(db_manager) + return _export_manager \ No newline at end of file diff --git a/backend/main.py b/backend/main.py index a7268e2..e27401d 100644 --- a/backend/main.py +++ b/backend/main.py @@ -11,6 +11,7 @@ import json import httpx import uuid import re +import io from fastapi import FastAPI, File, UploadFile, HTTPException, Form from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles @@ -67,6 +68,12 @@ try: except ImportError: REASONER_AVAILABLE = False +try: + from export_manager import get_export_manager, ExportEntity, ExportRelation, ExportTranscript + EXPORT_AVAILABLE = True +except ImportError: + EXPORT_AVAILABLE = False + app = FastAPI(title="InsightFlow", version="0.3.0") app.add_middleware( @@ -1911,6 +1918,411 @@ async def search_entities_by_attributes_endpoint( ] +# ==================== 导出功能 API ==================== + +from fastapi.responses import StreamingResponse, FileResponse + +@app.get("/api/v1/projects/{project_id}/export/graph-svg") +async def export_graph_svg_endpoint(project_id: str): + """导出知识图谱为 SVG""" + if not DB_AVAILABLE or not EXPORT_AVAILABLE: + raise HTTPException(status_code=500, detail="Export functionality not available") + + db = get_db_manager() + project = db.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + # 获取项目数据 + entities_data = db.get_project_entities(project_id) + relations_data = db.get_project_relations(project_id) + + # 转换为导出格式 + entities = [] + for e in entities_data: + attrs = db.get_entity_attributes(e.id) + entities.append(ExportEntity( + id=e.id, + name=e.name, + type=e.type, + definition=e.definition or "", + aliases=json.loads(e.aliases) if e.aliases else [], + mention_count=e.mention_count, + attributes={a.template_name: a.value for a in attrs} + )) + + relations = [] + for r in relations_data: + relations.append(ExportRelation( + id=r.id, + source=r.source_name, + target=r.target_name, + relation_type=r.relation_type, + confidence=r.confidence, + evidence=r.evidence or "" + )) + + export_mgr = get_export_manager() + svg_content = export_mgr.export_knowledge_graph_svg(project_id, entities, relations) + + return StreamingResponse( + io.BytesIO(svg_content.encode('utf-8')), + media_type="image/svg+xml", + headers={"Content-Disposition": f"attachment; filename=insightflow-graph-{project_id}.svg"} + ) + + +@app.get("/api/v1/projects/{project_id}/export/graph-png") +async def export_graph_png_endpoint(project_id: str): + """导出知识图谱为 PNG""" + if not DB_AVAILABLE or not EXPORT_AVAILABLE: + raise HTTPException(status_code=500, detail="Export functionality not available") + + db = get_db_manager() + project = db.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + # 获取项目数据 + entities_data = db.get_project_entities(project_id) + relations_data = db.get_project_relations(project_id) + + # 转换为导出格式 + entities = [] + for e in entities_data: + attrs = db.get_entity_attributes(e.id) + entities.append(ExportEntity( + id=e.id, + name=e.name, + type=e.type, + definition=e.definition or "", + aliases=json.loads(e.aliases) if e.aliases else [], + mention_count=e.mention_count, + attributes={a.template_name: a.value for a in attrs} + )) + + relations = [] + for r in relations_data: + relations.append(ExportRelation( + id=r.id, + source=r.source_name, + target=r.target_name, + relation_type=r.relation_type, + confidence=r.confidence, + evidence=r.evidence or "" + )) + + export_mgr = get_export_manager() + png_bytes = export_mgr.export_knowledge_graph_png(project_id, entities, relations) + + return StreamingResponse( + io.BytesIO(png_bytes), + media_type="image/png", + headers={"Content-Disposition": f"attachment; filename=insightflow-graph-{project_id}.png"} + ) + + +@app.get("/api/v1/projects/{project_id}/export/entities-excel") +async def export_entities_excel_endpoint(project_id: str): + """导出实体数据为 Excel""" + if not DB_AVAILABLE or not EXPORT_AVAILABLE: + raise HTTPException(status_code=500, detail="Export functionality not available") + + db = get_db_manager() + project = db.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + # 获取实体数据 + entities_data = db.get_project_entities(project_id) + + entities = [] + for e in entities_data: + attrs = db.get_entity_attributes(e.id) + entities.append(ExportEntity( + id=e.id, + name=e.name, + type=e.type, + definition=e.definition or "", + aliases=json.loads(e.aliases) if e.aliases else [], + mention_count=e.mention_count, + attributes={a.template_name: a.value for a in attrs} + )) + + export_mgr = get_export_manager() + excel_bytes = export_mgr.export_entities_excel(entities) + + return StreamingResponse( + io.BytesIO(excel_bytes), + media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + headers={"Content-Disposition": f"attachment; filename=insightflow-entities-{project_id}.xlsx"} + ) + + +@app.get("/api/v1/projects/{project_id}/export/entities-csv") +async def export_entities_csv_endpoint(project_id: str): + """导出实体数据为 CSV""" + if not DB_AVAILABLE or not EXPORT_AVAILABLE: + raise HTTPException(status_code=500, detail="Export functionality not available") + + db = get_db_manager() + project = db.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + # 获取实体数据 + entities_data = db.get_project_entities(project_id) + + entities = [] + for e in entities_data: + attrs = db.get_entity_attributes(e.id) + entities.append(ExportEntity( + id=e.id, + name=e.name, + type=e.type, + definition=e.definition or "", + aliases=json.loads(e.aliases) if e.aliases else [], + mention_count=e.mention_count, + attributes={a.template_name: a.value for a in attrs} + )) + + export_mgr = get_export_manager() + csv_content = export_mgr.export_entities_csv(entities) + + return StreamingResponse( + io.BytesIO(csv_content.encode('utf-8')), + media_type="text/csv", + headers={"Content-Disposition": f"attachment; filename=insightflow-entities-{project_id}.csv"} + ) + + +@app.get("/api/v1/projects/{project_id}/export/relations-csv") +async def export_relations_csv_endpoint(project_id: str): + """导出关系数据为 CSV""" + if not DB_AVAILABLE or not EXPORT_AVAILABLE: + raise HTTPException(status_code=500, detail="Export functionality not available") + + db = get_db_manager() + project = db.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + # 获取关系数据 + relations_data = db.get_project_relations(project_id) + + relations = [] + for r in relations_data: + relations.append(ExportRelation( + id=r.id, + source=r.source_name, + target=r.target_name, + relation_type=r.relation_type, + confidence=r.confidence, + evidence=r.evidence or "" + )) + + export_mgr = get_export_manager() + csv_content = export_mgr.export_relations_csv(relations) + + return StreamingResponse( + io.BytesIO(csv_content.encode('utf-8')), + media_type="text/csv", + headers={"Content-Disposition": f"attachment; filename=insightflow-relations-{project_id}.csv"} + ) + + +@app.get("/api/v1/projects/{project_id}/export/report-pdf") +async def export_report_pdf_endpoint(project_id: str): + """导出项目报告为 PDF""" + if not DB_AVAILABLE or not EXPORT_AVAILABLE: + raise HTTPException(status_code=500, detail="Export functionality not available") + + db = get_db_manager() + project = db.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + # 获取项目数据 + entities_data = db.get_project_entities(project_id) + relations_data = db.get_project_relations(project_id) + transcripts_data = db.get_project_transcripts(project_id) + + # 转换为导出格式 + entities = [] + for e in entities_data: + attrs = db.get_entity_attributes(e.id) + entities.append(ExportEntity( + id=e.id, + name=e.name, + type=e.type, + definition=e.definition or "", + aliases=json.loads(e.aliases) if e.aliases else [], + mention_count=e.mention_count, + attributes={a.template_name: a.value for a in attrs} + )) + + relations = [] + for r in relations_data: + relations.append(ExportRelation( + id=r.id, + source=r.source_name, + target=r.target_name, + relation_type=r.relation_type, + confidence=r.confidence, + evidence=r.evidence or "" + )) + + transcripts = [] + for t in transcripts_data: + segments = json.loads(t.segments) if t.segments else [] + transcripts.append(ExportTranscript( + id=t.id, + name=t.name, + type=t.type, + content=t.full_text or "", + segments=segments, + entity_mentions=[] + )) + + # 获取项目总结 + summary = "" + if REASONER_AVAILABLE: + try: + reasoner = get_knowledge_reasoner() + summary_result = reasoner.generate_project_summary(project_id, db) + summary = summary_result.get("summary", "") + except: + pass + + export_mgr = get_export_manager() + pdf_bytes = export_mgr.export_project_report_pdf( + project_id, project.name, entities, relations, transcripts, summary + ) + + return StreamingResponse( + io.BytesIO(pdf_bytes), + media_type="application/pdf", + headers={"Content-Disposition": f"attachment; filename=insightflow-report-{project_id}.pdf"} + ) + + +@app.get("/api/v1/projects/{project_id}/export/project-json") +async def export_project_json_endpoint(project_id: str): + """导出完整项目数据为 JSON""" + if not DB_AVAILABLE or not EXPORT_AVAILABLE: + raise HTTPException(status_code=500, detail="Export functionality not available") + + db = get_db_manager() + project = db.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + # 获取项目数据 + entities_data = db.get_project_entities(project_id) + relations_data = db.get_project_relations(project_id) + transcripts_data = db.get_project_transcripts(project_id) + + # 转换为导出格式 + entities = [] + for e in entities_data: + attrs = db.get_entity_attributes(e.id) + entities.append(ExportEntity( + id=e.id, + name=e.name, + type=e.type, + definition=e.definition or "", + aliases=json.loads(e.aliases) if e.aliases else [], + mention_count=e.mention_count, + attributes={a.template_name: a.value for a in attrs} + )) + + relations = [] + for r in relations_data: + relations.append(ExportRelation( + id=r.id, + source=r.source_name, + target=r.target_name, + relation_type=r.relation_type, + confidence=r.confidence, + evidence=r.evidence or "" + )) + + transcripts = [] + for t in transcripts_data: + segments = json.loads(t.segments) if t.segments else [] + transcripts.append(ExportTranscript( + id=t.id, + name=t.name, + type=t.type, + content=t.full_text or "", + segments=segments, + entity_mentions=[] + )) + + export_mgr = get_export_manager() + json_content = export_mgr.export_project_json( + project_id, project.name, entities, relations, transcripts + ) + + return StreamingResponse( + io.BytesIO(json_content.encode('utf-8')), + media_type="application/json", + headers={"Content-Disposition": f"attachment; filename=insightflow-project-{project_id}.json"} + ) + + +@app.get("/api/v1/transcripts/{transcript_id}/export/markdown") +async def export_transcript_markdown_endpoint(transcript_id: str): + """导出转录文本为 Markdown""" + if not DB_AVAILABLE or not EXPORT_AVAILABLE: + raise HTTPException(status_code=500, detail="Export functionality not available") + + db = get_db_manager() + transcript = db.get_transcript(transcript_id) + if not transcript: + raise HTTPException(status_code=404, detail="Transcript not found") + + # 获取实体提及 + mentions = db.get_transcript_entity_mentions(transcript_id) + + # 获取项目实体用于映射 + entities_data = db.get_project_entities(transcript.project_id) + entities_map = {e.id: ExportEntity( + id=e.id, + name=e.name, + type=e.type, + definition=e.definition or "", + aliases=json.loads(e.aliases) if e.aliases else [], + mention_count=e.mention_count, + attributes={} + ) for e in entities_data} + + segments = json.loads(transcript.segments) if transcript.segments else [] + + export_transcript = ExportTranscript( + id=transcript.id, + name=transcript.name, + type=transcript.type, + content=transcript.full_text or "", + segments=segments, + entity_mentions=[{ + "entity_id": m.entity_id, + "entity_name": m.entity_name, + "position": m.position, + "context": m.context + } for m in mentions] + ) + + export_mgr = get_export_manager() + markdown_content = export_mgr.export_transcript_markdown(export_transcript, entities_map) + + return StreamingResponse( + io.BytesIO(markdown_content.encode('utf-8')), + media_type="text/markdown", + headers={"Content-Disposition": f"attachment; filename=insightflow-transcript-{transcript_id}.md"} + ) + + # Serve frontend - MUST be last to not override API routes app.mount("/", StaticFiles(directory="frontend", html=True), name="frontend") diff --git a/backend/requirements.txt b/backend/requirements.txt index ecec951..b6f4251 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -21,3 +21,9 @@ oss2==2.18.5 # Utilities python-dotenv==1.0.0 + +# Export functionality +pandas==2.2.0 +openpyxl==3.1.2 +reportlab==4.0.9 +cairosvg==2.7.1 diff --git a/frontend/app.js b/frontend/app.js index 7be7949..ec51774 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -1809,3 +1809,219 @@ window.searchByAttributes = async function() { alert('搜索失败'); } }; + +// ==================== Export Functions ==================== + +// Show export panel +window.showExportPanel = function() { + const modal = document.getElementById('exportPanelModal'); + if (modal) { + modal.style.display = 'flex'; + + // Show transcript export section if a transcript is selected + const transcriptSection = document.getElementById('transcriptExportSection'); + if (transcriptSection && currentData && currentData.transcript_id !== 'project_view') { + transcriptSection.style.display = 'block'; + } else if (transcriptSection) { + transcriptSection.style.display = 'none'; + } + } +}; + +// Hide export panel +window.hideExportPanel = function() { + const modal = document.getElementById('exportPanelModal'); + if (modal) { + modal.style.display = 'none'; + } +}; + +// Helper function to download file +function downloadFile(url, filename) { + const link = document.createElement('a'); + link.href = url; + link.download = filename; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); +} + +// Export knowledge graph as SVG +window.exportGraph = async function(format) { + if (!currentProject) return; + + try { + const endpoint = format === 'svg' ? 'graph-svg' : 'graph-png'; + const mimeType = format === 'svg' ? 'image/svg+xml' : 'image/png'; + const ext = format === 'svg' ? 'svg' : 'png'; + + const res = await fetch(`${API_BASE}/projects/${currentProject.id}/export/${endpoint}`); + + if (!res.ok) throw new Error(`Export ${format} failed`); + + const blob = await res.blob(); + const url = URL.createObjectURL(blob); + downloadFile(url, `insightflow-graph-${currentProject.id}.${ext}`); + URL.revokeObjectURL(url); + + showNotification(`图谱已导出为 ${format.toUpperCase()}`, 'success'); + } catch (err) { + console.error(`Export ${format} failed:`, err); + alert(`导出失败: ${err.message}`); + } +}; + +// Export entities +window.exportEntities = async function(format) { + if (!currentProject) return; + + try { + const endpoint = format === 'excel' ? 'entities-excel' : 'entities-csv'; + const mimeType = format === 'excel' ? 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' : 'text/csv'; + const ext = format === 'excel' ? 'xlsx' : 'csv'; + + const res = await fetch(`${API_BASE}/projects/${currentProject.id}/export/${endpoint}`); + + if (!res.ok) throw new Error(`Export ${format} failed`); + + const blob = await res.blob(); + const url = URL.createObjectURL(blob); + downloadFile(url, `insightflow-entities-${currentProject.id}.${ext}`); + URL.revokeObjectURL(url); + + showNotification(`实体数据已导出为 ${format.toUpperCase()}`, 'success'); + } catch (err) { + console.error(`Export ${format} failed:`, err); + alert(`导出失败: ${err.message}`); + } +}; + +// Export relations +window.exportRelations = async function(format) { + if (!currentProject) return; + + try { + const res = await fetch(`${API_BASE}/projects/${currentProject.id}/export/relations-csv`); + + if (!res.ok) throw new Error('Export relations failed'); + + const blob = await res.blob(); + const url = URL.createObjectURL(blob); + downloadFile(url, `insightflow-relations-${currentProject.id}.csv`); + URL.revokeObjectURL(url); + + showNotification('关系数据已导出为 CSV', 'success'); + } catch (err) { + console.error('Export relations failed:', err); + alert(`导出失败: ${err.message}`); + } +}; + +// Export project report as PDF +window.exportReport = async function(format) { + if (!currentProject) return; + + try { + const res = await fetch(`${API_BASE}/projects/${currentProject.id}/export/report-pdf`); + + if (!res.ok) throw new Error('Export PDF failed'); + + const blob = await res.blob(); + const url = URL.createObjectURL(blob); + downloadFile(url, `insightflow-report-${currentProject.id}.pdf`); + URL.revokeObjectURL(url); + + showNotification('项目报告已导出为 PDF', 'success'); + } catch (err) { + console.error('Export PDF failed:', err); + alert(`导出失败: ${err.message}`); + } +}; + +// Export project as JSON +window.exportProject = async function(format) { + if (!currentProject) return; + + try { + const res = await fetch(`${API_BASE}/projects/${currentProject.id}/export/project-json`); + + if (!res.ok) throw new Error('Export JSON failed'); + + const blob = await res.blob(); + const url = URL.createObjectURL(blob); + downloadFile(url, `insightflow-project-${currentProject.id}.json`); + URL.revokeObjectURL(url); + + showNotification('项目数据已导出为 JSON', 'success'); + } catch (err) { + console.error('Export JSON failed:', err); + alert(`导出失败: ${err.message}`); + } +}; + +// Export transcript as Markdown +window.exportTranscript = async function(format) { + if (!currentProject || !currentData || currentData.transcript_id === 'project_view') { + alert('请先选择一个转录文件'); + return; + } + + try { + const res = await fetch(`${API_BASE}/transcripts/${currentData.transcript_id}/export/markdown`); + + if (!res.ok) throw new Error('Export Markdown failed'); + + const blob = await res.blob(); + const url = URL.createObjectURL(blob); + downloadFile(url, `insightflow-transcript-${currentData.transcript_id}.md`); + URL.revokeObjectURL(url); + + showNotification('转录文本已导出为 Markdown', 'success'); + } catch (err) { + console.error('Export Markdown failed:', err); + alert(`导出失败: ${err.message}`); + } +}; + +// Show notification +function showNotification(message, type = 'info') { + // Create notification element + const notification = document.createElement('div'); + notification.style.cssText = ` + position: fixed; + top: 20px; + right: 20px; + background: ${type === 'success' ? 'rgba(0, 212, 255, 0.9)' : '#333'}; + color: ${type === 'success' ? '#000' : '#fff'}; + padding: 12px 20px; + border-radius: 8px; + z-index: 10000; + font-size: 0.9rem; + animation: slideIn 0.3s ease; + `; + notification.textContent = message; + + document.body.appendChild(notification); + + // Remove after 3 seconds + setTimeout(() => { + notification.style.animation = 'slideOut 0.3s ease'; + setTimeout(() => { + document.body.removeChild(notification); + }, 300); + }, 3000); +} + +// Add animation styles +const style = document.createElement('style'); +style.textContent = ` + @keyframes slideIn { + from { transform: translateX(100%); opacity: 0; } + to { transform: translateX(0); opacity: 1; } + } + @keyframes slideOut { + from { transform: translateX(0); opacity: 1; } + to { transform: translateX(100%); opacity: 0; } + } +`; +document.head.appendChild(style); diff --git a/frontend/workbench.html b/frontend/workbench.html index 04a25d9..06b614b 100644 --- a/frontend/workbench.html +++ b/frontend/workbench.html @@ -1406,6 +1406,71 @@ font-size: 0.85rem; } + /* Export Panel Styles */ + .export-section { + background: #141414; + border-radius: 8px; + padding: 16px; + } + + .export-options { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); + gap: 12px; + } + + .export-btn { + background: #1a1a1a; + border: 1px solid #333; + border-radius: 8px; + padding: 16px 12px; + cursor: pointer; + transition: all 0.2s; + display: flex; + flex-direction: column; + align-items: center; + text-align: center; + gap: 8px; + } + + .export-btn:hover { + border-color: #00d4ff; + background: #1f1f1f; + transform: translateY(-2px); + } + + .export-icon { + font-size: 1.5rem; + } + + .export-label { + color: #e0e0e0; + font-size: 0.9rem; + font-weight: 500; + } + + .export-desc { + color: #666; + font-size: 0.75rem; + } + + .export-loading { + display: flex; + align-items: center; + justify-content: center; + gap: 8px; + color: #00d4ff; + padding: 20px; + } + + .export-success { + background: rgba(0, 212, 255, 0.1); + border: 1px solid #00d4ff; + border-radius: 8px; + padding: 12px; + color: #00d4ff; + text-align: center; + } @@ -1565,6 +1630,10 @@
0
术语
+
+
📥
+
导出
+
@@ -1957,6 +2026,81 @@
+ + +
✏️ 编辑实体