Phase 5: 完成导出功能

- 新增 export_manager.py 导出管理模块
- 知识图谱导出 SVG/PNG
- 实体数据导出 Excel/CSV
- 关系数据导出 CSV
- 项目报告导出 PDF
- 转录文本导出 Markdown
- 项目完整数据导出 JSON
- 前端添加导出面板和功能
- 更新依赖: pandas, openpyxl, reportlab, cairosvg
This commit is contained in:
OpenClaw Bot
2026-02-20 06:06:23 +08:00
parent 2470064f65
commit 6318cd0af9
6 changed files with 1365 additions and 1 deletions

572
backend/export_manager.py Normal file
View File

@@ -0,0 +1,572 @@
"""
InsightFlow Export Module - Phase 5
支持导出知识图谱、项目报告、实体数据和转录文本
"""
import os
import io
import json
import base64
from datetime import datetime
from typing import List, Dict, Optional, Any
from dataclasses import dataclass
try:
import pandas as pd
PANDAS_AVAILABLE = True
except ImportError:
PANDAS_AVAILABLE = False
try:
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
@dataclass
class ExportEntity:
id: str
name: str
type: str
definition: str
aliases: List[str]
mention_count: int
attributes: Dict[str, Any]
@dataclass
class ExportRelation:
id: str
source: str
target: str
relation_type: str
confidence: float
evidence: str
@dataclass
class ExportTranscript:
id: str
name: str
type: str # audio/document
content: str
segments: List[Dict]
entity_mentions: List[Dict]
class ExportManager:
"""导出管理器 - 处理各种导出需求"""
def __init__(self, db_manager=None):
self.db = db_manager
def export_knowledge_graph_svg(self, project_id: str, entities: List[ExportEntity],
relations: List[ExportRelation]) -> str:
"""
导出知识图谱为 SVG 格式
Returns:
SVG 字符串
"""
# 计算布局参数
width = 1200
height = 800
center_x = width / 2
center_y = height / 2
radius = 300
# 按类型分组实体
entities_by_type = {}
for e in entities:
if e.type not in entities_by_type:
entities_by_type[e.type] = []
entities_by_type[e.type].append(e)
# 颜色映射
type_colors = {
"PERSON": "#FF6B6B",
"ORGANIZATION": "#4ECDC4",
"LOCATION": "#45B7D1",
"PRODUCT": "#96CEB4",
"TECHNOLOGY": "#FFEAA7",
"EVENT": "#DDA0DD",
"CONCEPT": "#98D8C8",
"default": "#BDC3C7"
}
# 计算实体位置
entity_positions = {}
angle_step = 2 * 3.14159 / max(len(entities), 1)
for i, entity in enumerate(entities):
angle = i * angle_step
x = center_x + radius * 0.8 * (i % 3 - 1) * 150 + (i // 3) * 50
y = center_y + radius * 0.6 * ((i % 6) - 3) * 80
entity_positions[entity.id] = (x, y)
# 生成 SVG
svg_parts = [
f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
'<defs>',
' <marker id="arrowhead" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">',
' <polygon points="0 0, 10 3.5, 0 7" fill="#7f8c8d"/>',
' </marker>',
'</defs>',
f'<rect width="{width}" height="{height}" fill="#f8f9fa"/>',
f'<text x="{center_x}" y="30" text-anchor="middle" font-size="20" font-weight="bold" fill="#2c3e50">知识图谱 - {project_id}</text>',
]
# 绘制关系连线
for rel in relations:
if rel.source in entity_positions and rel.target in entity_positions:
x1, y1 = entity_positions[rel.source]
x2, y2 = entity_positions[rel.target]
# 计算箭头终点(避免覆盖节点)
dx = x2 - x1
dy = y2 - y1
dist = (dx**2 + dy**2) ** 0.5
if dist > 0:
offset = 40
x2 = x2 - dx * offset / dist
y2 = y2 - dy * offset / dist
svg_parts.append(
f'<line x1="{x1}" y1="{y1}" x2="{x2}" y2="{y2}" '
f'stroke="#7f8c8d" stroke-width="2" marker-end="url(#arrowhead)" opacity="0.6"/>'
)
# 关系标签
mid_x = (x1 + x2) / 2
mid_y = (y1 + y2) / 2
svg_parts.append(
f'<rect x="{mid_x-30}" y="{mid_y-10}" width="60" height="20" '
f'fill="white" stroke="#bdc3c7" rx="3"/>'
)
svg_parts.append(
f'<text x="{mid_x}" y="{mid_y+5}" text-anchor="middle" '
f'font-size="10" fill="#2c3e50">{rel.relation_type}</text>'
)
# 绘制实体节点
for entity in entities:
if entity.id in entity_positions:
x, y = entity_positions[entity.id]
color = type_colors.get(entity.type, type_colors["default"])
# 节点圆圈
svg_parts.append(
f'<circle cx="{x}" cy="{y}" r="35" fill="{color}" stroke="white" stroke-width="3"/>'
)
# 实体名称
svg_parts.append(
f'<text x="{x}" y="{y+5}" text-anchor="middle" font-size="12" '
f'font-weight="bold" fill="white">{entity.name[:8]}</text>'
)
# 实体类型
svg_parts.append(
f'<text x="{x}" y="{y+55}" text-anchor="middle" font-size="10" '
f'fill="#7f8c8d">{entity.type}</text>'
)
# 图例
legend_x = width - 150
legend_y = 80
svg_parts.append(f'<rect x="{legend_x-10}" y="{legend_y-20}" width="140" height="{len(type_colors)*25+10}" fill="white" stroke="#bdc3c7" rx="5"/>')
svg_parts.append(f'<text x="{legend_x}" y="{legend_y}" font-size="12" font-weight="bold" fill="#2c3e50">实体类型</text>')
for i, (etype, color) in enumerate(type_colors.items()):
if etype != "default":
y_pos = legend_y + 25 + i * 20
svg_parts.append(f'<circle cx="{legend_x+10}" cy="{y_pos}" r="8" fill="{color}"/>')
svg_parts.append(f'<text x="{legend_x+25}" y="{y_pos+4}" font-size="10" fill="#2c3e50">{etype}</text>')
svg_parts.append('</svg>')
return '\n'.join(svg_parts)
def export_knowledge_graph_png(self, project_id: str, entities: List[ExportEntity],
relations: List[ExportRelation]) -> bytes:
"""
导出知识图谱为 PNG 格式
Returns:
PNG 图像字节
"""
try:
import cairosvg
svg_content = self.export_knowledge_graph_svg(project_id, entities, relations)
png_bytes = cairosvg.svg2png(bytestring=svg_content.encode('utf-8'))
return png_bytes
except ImportError:
# 如果没有 cairosvg返回 SVG 的 base64
svg_content = self.export_knowledge_graph_svg(project_id, entities, relations)
return base64.b64encode(svg_content.encode('utf-8'))
def export_entities_excel(self, entities: List[ExportEntity]) -> bytes:
"""
导出实体数据为 Excel 格式
Returns:
Excel 文件字节
"""
if not PANDAS_AVAILABLE:
raise ImportError("pandas is required for Excel export")
# 准备数据
data = []
for e in entities:
row = {
'ID': e.id,
'名称': e.name,
'类型': e.type,
'定义': e.definition,
'别名': ', '.join(e.aliases),
'提及次数': e.mention_count
}
# 添加属性
for attr_name, attr_value in e.attributes.items():
row[f'属性:{attr_name}'] = attr_value
data.append(row)
df = pd.DataFrame(data)
# 写入 Excel
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, sheet_name='实体列表', index=False)
# 调整列宽
worksheet = writer.sheets['实体列表']
for column in worksheet.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = min(max_length + 2, 50)
worksheet.column_dimensions[column_letter].width = adjusted_width
return output.getvalue()
def export_entities_csv(self, entities: List[ExportEntity]) -> str:
"""
导出实体数据为 CSV 格式
Returns:
CSV 字符串
"""
import csv
output = io.StringIO()
# 收集所有可能的属性列
all_attrs = set()
for e in entities:
all_attrs.update(e.attributes.keys())
# 表头
headers = ['ID', '名称', '类型', '定义', '别名', '提及次数'] + [f'属性:{a}' for a in sorted(all_attrs)]
writer = csv.writer(output)
writer.writerow(headers)
# 数据行
for e in entities:
row = [e.id, e.name, e.type, e.definition, ', '.join(e.aliases), e.mention_count]
for attr in sorted(all_attrs):
row.append(e.attributes.get(attr, ''))
writer.writerow(row)
return output.getvalue()
def export_relations_csv(self, relations: List[ExportRelation]) -> str:
"""
导出关系数据为 CSV 格式
Returns:
CSV 字符串
"""
import csv
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(['ID', '源实体', '目标实体', '关系类型', '置信度', '证据'])
for r in relations:
writer.writerow([r.id, r.source, r.target, r.relation_type, r.confidence, r.evidence])
return output.getvalue()
def export_transcript_markdown(self, transcript: ExportTranscript,
entities_map: Dict[str, ExportEntity]) -> str:
"""
导出转录文本为 Markdown 格式
Returns:
Markdown 字符串
"""
lines = [
f"# {transcript.name}",
"",
f"**类型**: {transcript.type}",
f"**ID**: {transcript.id}",
"",
"---",
"",
"## 内容",
"",
transcript.content,
"",
"---",
"",
]
if transcript.segments:
lines.extend([
"## 分段详情",
"",
])
for seg in transcript.segments:
speaker = seg.get('speaker', 'Unknown')
start = seg.get('start', 0)
end = seg.get('end', 0)
text = seg.get('text', '')
lines.append(f"**[{start:.1f}s - {end:.1f}s] {speaker}**: {text}")
lines.append("")
if transcript.entity_mentions:
lines.extend([
"",
"## 实体提及",
"",
"| 实体 | 类型 | 位置 | 上下文 |",
"|------|------|------|--------|",
])
for mention in transcript.entity_mentions:
entity_id = mention.get('entity_id', '')
entity = entities_map.get(entity_id)
entity_name = entity.name if entity else mention.get('entity_name', 'Unknown')
entity_type = entity.type if entity else 'Unknown'
position = mention.get('position', '')
context = mention.get('context', '')[:50] + '...' if mention.get('context') else ''
lines.append(f"| {entity_name} | {entity_type} | {position} | {context} |")
return '\n'.join(lines)
def export_project_report_pdf(self, project_id: str, project_name: str,
entities: List[ExportEntity],
relations: List[ExportRelation],
transcripts: List[ExportTranscript],
summary: str = "") -> bytes:
"""
导出项目报告为 PDF 格式
Returns:
PDF 文件字节
"""
if not REPORTLAB_AVAILABLE:
raise ImportError("reportlab is required for PDF export")
output = io.BytesIO()
doc = SimpleDocTemplate(
output,
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18
)
# 样式
styles = getSampleStyleSheet()
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
spaceAfter=30,
textColor=colors.HexColor('#2c3e50')
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading2'],
fontSize=16,
spaceAfter=12,
textColor=colors.HexColor('#34495e')
)
story = []
# 标题页
story.append(Paragraph(f"InsightFlow 项目报告", title_style))
story.append(Paragraph(f"项目名称: {project_name}", styles['Heading2']))
story.append(Paragraph(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M')}", styles['Normal']))
story.append(Spacer(1, 0.3*inch))
# 统计概览
story.append(Paragraph("项目概览", heading_style))
stats_data = [
['指标', '数值'],
['实体数量', str(len(entities))],
['关系数量', str(len(relations))],
['文档数量', str(len(transcripts))],
]
# 按类型统计实体
type_counts = {}
for e in entities:
type_counts[e.type] = type_counts.get(e.type, 0) + 1
for etype, count in sorted(type_counts.items()):
stats_data.append([f'{etype} 实体', str(count)])
stats_table = Table(stats_data, colWidths=[3*inch, 2*inch])
stats_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#34495e')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 12),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#ecf0f1')),
('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#bdc3c7'))
]))
story.append(stats_table)
story.append(Spacer(1, 0.3*inch))
# 项目总结
if summary:
story.append(Paragraph("项目总结", heading_style))
story.append(Paragraph(summary, styles['Normal']))
story.append(Spacer(1, 0.3*inch))
# 实体列表
if entities:
story.append(PageBreak())
story.append(Paragraph("实体列表", heading_style))
entity_data = [['名称', '类型', '提及次数', '定义']]
for e in sorted(entities, key=lambda x: x.mention_count, reverse=True)[:50]: # 限制前50个
entity_data.append([
e.name,
e.type,
str(e.mention_count),
(e.definition[:100] + '...') if len(e.definition) > 100 else e.definition
])
entity_table = Table(entity_data, colWidths=[1.5*inch, 1*inch, 1*inch, 2.5*inch])
entity_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#34495e')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 10),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#ecf0f1')),
('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#bdc3c7')),
('VALIGN', (0, 0), (-1, -1), 'TOP'),
]))
story.append(entity_table)
# 关系列表
if relations:
story.append(PageBreak())
story.append(Paragraph("关系列表", heading_style))
relation_data = [['源实体', '关系', '目标实体', '置信度']]
for r in relations[:100]: # 限制前100个
relation_data.append([
r.source,
r.relation_type,
r.target,
f"{r.confidence:.2f}"
])
relation_table = Table(relation_data, colWidths=[2*inch, 1.5*inch, 2*inch, 1*inch])
relation_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#34495e')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 10),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#ecf0f1')),
('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#bdc3c7')),
]))
story.append(relation_table)
doc.build(story)
return output.getvalue()
def export_project_json(self, project_id: str, project_name: str,
entities: List[ExportEntity],
relations: List[ExportRelation],
transcripts: List[ExportTranscript]) -> str:
"""
导出完整项目数据为 JSON 格式
Returns:
JSON 字符串
"""
data = {
"project_id": project_id,
"project_name": project_name,
"export_time": datetime.now().isoformat(),
"entities": [
{
"id": e.id,
"name": e.name,
"type": e.type,
"definition": e.definition,
"aliases": e.aliases,
"mention_count": e.mention_count,
"attributes": e.attributes
}
for e in entities
],
"relations": [
{
"id": r.id,
"source": r.source,
"target": r.target,
"relation_type": r.relation_type,
"confidence": r.confidence,
"evidence": r.evidence
}
for r in relations
],
"transcripts": [
{
"id": t.id,
"name": t.name,
"type": t.type,
"content": t.content,
"segments": t.segments
}
for t in transcripts
]
}
return json.dumps(data, ensure_ascii=False, indent=2)
# 全局导出管理器实例
_export_manager = None
def get_export_manager(db_manager=None):
"""获取导出管理器实例"""
global _export_manager
if _export_manager is None:
_export_manager = ExportManager(db_manager)
return _export_manager