Phase 5: 完成导出功能
- 新增 export_manager.py 导出管理模块 - 知识图谱导出 SVG/PNG - 实体数据导出 Excel/CSV - 关系数据导出 CSV - 项目报告导出 PDF - 转录文本导出 Markdown - 项目完整数据导出 JSON - 前端添加导出面板和功能 - 更新依赖: pandas, openpyxl, reportlab, cairosvg
This commit is contained in:
572
backend/export_manager.py
Normal file
572
backend/export_manager.py
Normal file
@@ -0,0 +1,572 @@
|
||||
"""
|
||||
InsightFlow Export Module - Phase 5
|
||||
支持导出知识图谱、项目报告、实体数据和转录文本
|
||||
"""
|
||||
|
||||
import os
|
||||
import io
|
||||
import json
|
||||
import base64
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Optional, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
PANDAS_AVAILABLE = True
|
||||
except ImportError:
|
||||
PANDAS_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from reportlab.lib import colors
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import inch
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
REPORTLAB_AVAILABLE = True
|
||||
except ImportError:
|
||||
REPORTLAB_AVAILABLE = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExportEntity:
|
||||
id: str
|
||||
name: str
|
||||
type: str
|
||||
definition: str
|
||||
aliases: List[str]
|
||||
mention_count: int
|
||||
attributes: Dict[str, Any]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExportRelation:
|
||||
id: str
|
||||
source: str
|
||||
target: str
|
||||
relation_type: str
|
||||
confidence: float
|
||||
evidence: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExportTranscript:
|
||||
id: str
|
||||
name: str
|
||||
type: str # audio/document
|
||||
content: str
|
||||
segments: List[Dict]
|
||||
entity_mentions: List[Dict]
|
||||
|
||||
|
||||
class ExportManager:
|
||||
"""导出管理器 - 处理各种导出需求"""
|
||||
|
||||
def __init__(self, db_manager=None):
|
||||
self.db = db_manager
|
||||
|
||||
def export_knowledge_graph_svg(self, project_id: str, entities: List[ExportEntity],
|
||||
relations: List[ExportRelation]) -> str:
|
||||
"""
|
||||
导出知识图谱为 SVG 格式
|
||||
|
||||
Returns:
|
||||
SVG 字符串
|
||||
"""
|
||||
# 计算布局参数
|
||||
width = 1200
|
||||
height = 800
|
||||
center_x = width / 2
|
||||
center_y = height / 2
|
||||
radius = 300
|
||||
|
||||
# 按类型分组实体
|
||||
entities_by_type = {}
|
||||
for e in entities:
|
||||
if e.type not in entities_by_type:
|
||||
entities_by_type[e.type] = []
|
||||
entities_by_type[e.type].append(e)
|
||||
|
||||
# 颜色映射
|
||||
type_colors = {
|
||||
"PERSON": "#FF6B6B",
|
||||
"ORGANIZATION": "#4ECDC4",
|
||||
"LOCATION": "#45B7D1",
|
||||
"PRODUCT": "#96CEB4",
|
||||
"TECHNOLOGY": "#FFEAA7",
|
||||
"EVENT": "#DDA0DD",
|
||||
"CONCEPT": "#98D8C8",
|
||||
"default": "#BDC3C7"
|
||||
}
|
||||
|
||||
# 计算实体位置
|
||||
entity_positions = {}
|
||||
angle_step = 2 * 3.14159 / max(len(entities), 1)
|
||||
|
||||
for i, entity in enumerate(entities):
|
||||
angle = i * angle_step
|
||||
x = center_x + radius * 0.8 * (i % 3 - 1) * 150 + (i // 3) * 50
|
||||
y = center_y + radius * 0.6 * ((i % 6) - 3) * 80
|
||||
entity_positions[entity.id] = (x, y)
|
||||
|
||||
# 生成 SVG
|
||||
svg_parts = [
|
||||
f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
|
||||
'<defs>',
|
||||
' <marker id="arrowhead" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">',
|
||||
' <polygon points="0 0, 10 3.5, 0 7" fill="#7f8c8d"/>',
|
||||
' </marker>',
|
||||
'</defs>',
|
||||
f'<rect width="{width}" height="{height}" fill="#f8f9fa"/>',
|
||||
f'<text x="{center_x}" y="30" text-anchor="middle" font-size="20" font-weight="bold" fill="#2c3e50">知识图谱 - {project_id}</text>',
|
||||
]
|
||||
|
||||
# 绘制关系连线
|
||||
for rel in relations:
|
||||
if rel.source in entity_positions and rel.target in entity_positions:
|
||||
x1, y1 = entity_positions[rel.source]
|
||||
x2, y2 = entity_positions[rel.target]
|
||||
|
||||
# 计算箭头终点(避免覆盖节点)
|
||||
dx = x2 - x1
|
||||
dy = y2 - y1
|
||||
dist = (dx**2 + dy**2) ** 0.5
|
||||
if dist > 0:
|
||||
offset = 40
|
||||
x2 = x2 - dx * offset / dist
|
||||
y2 = y2 - dy * offset / dist
|
||||
|
||||
svg_parts.append(
|
||||
f'<line x1="{x1}" y1="{y1}" x2="{x2}" y2="{y2}" '
|
||||
f'stroke="#7f8c8d" stroke-width="2" marker-end="url(#arrowhead)" opacity="0.6"/>'
|
||||
)
|
||||
|
||||
# 关系标签
|
||||
mid_x = (x1 + x2) / 2
|
||||
mid_y = (y1 + y2) / 2
|
||||
svg_parts.append(
|
||||
f'<rect x="{mid_x-30}" y="{mid_y-10}" width="60" height="20" '
|
||||
f'fill="white" stroke="#bdc3c7" rx="3"/>'
|
||||
)
|
||||
svg_parts.append(
|
||||
f'<text x="{mid_x}" y="{mid_y+5}" text-anchor="middle" '
|
||||
f'font-size="10" fill="#2c3e50">{rel.relation_type}</text>'
|
||||
)
|
||||
|
||||
# 绘制实体节点
|
||||
for entity in entities:
|
||||
if entity.id in entity_positions:
|
||||
x, y = entity_positions[entity.id]
|
||||
color = type_colors.get(entity.type, type_colors["default"])
|
||||
|
||||
# 节点圆圈
|
||||
svg_parts.append(
|
||||
f'<circle cx="{x}" cy="{y}" r="35" fill="{color}" stroke="white" stroke-width="3"/>'
|
||||
)
|
||||
|
||||
# 实体名称
|
||||
svg_parts.append(
|
||||
f'<text x="{x}" y="{y+5}" text-anchor="middle" font-size="12" '
|
||||
f'font-weight="bold" fill="white">{entity.name[:8]}</text>'
|
||||
)
|
||||
|
||||
# 实体类型
|
||||
svg_parts.append(
|
||||
f'<text x="{x}" y="{y+55}" text-anchor="middle" font-size="10" '
|
||||
f'fill="#7f8c8d">{entity.type}</text>'
|
||||
)
|
||||
|
||||
# 图例
|
||||
legend_x = width - 150
|
||||
legend_y = 80
|
||||
svg_parts.append(f'<rect x="{legend_x-10}" y="{legend_y-20}" width="140" height="{len(type_colors)*25+10}" fill="white" stroke="#bdc3c7" rx="5"/>')
|
||||
svg_parts.append(f'<text x="{legend_x}" y="{legend_y}" font-size="12" font-weight="bold" fill="#2c3e50">实体类型</text>')
|
||||
|
||||
for i, (etype, color) in enumerate(type_colors.items()):
|
||||
if etype != "default":
|
||||
y_pos = legend_y + 25 + i * 20
|
||||
svg_parts.append(f'<circle cx="{legend_x+10}" cy="{y_pos}" r="8" fill="{color}"/>')
|
||||
svg_parts.append(f'<text x="{legend_x+25}" y="{y_pos+4}" font-size="10" fill="#2c3e50">{etype}</text>')
|
||||
|
||||
svg_parts.append('</svg>')
|
||||
return '\n'.join(svg_parts)
|
||||
|
||||
def export_knowledge_graph_png(self, project_id: str, entities: List[ExportEntity],
|
||||
relations: List[ExportRelation]) -> bytes:
|
||||
"""
|
||||
导出知识图谱为 PNG 格式
|
||||
|
||||
Returns:
|
||||
PNG 图像字节
|
||||
"""
|
||||
try:
|
||||
import cairosvg
|
||||
svg_content = self.export_knowledge_graph_svg(project_id, entities, relations)
|
||||
png_bytes = cairosvg.svg2png(bytestring=svg_content.encode('utf-8'))
|
||||
return png_bytes
|
||||
except ImportError:
|
||||
# 如果没有 cairosvg,返回 SVG 的 base64
|
||||
svg_content = self.export_knowledge_graph_svg(project_id, entities, relations)
|
||||
return base64.b64encode(svg_content.encode('utf-8'))
|
||||
|
||||
def export_entities_excel(self, entities: List[ExportEntity]) -> bytes:
|
||||
"""
|
||||
导出实体数据为 Excel 格式
|
||||
|
||||
Returns:
|
||||
Excel 文件字节
|
||||
"""
|
||||
if not PANDAS_AVAILABLE:
|
||||
raise ImportError("pandas is required for Excel export")
|
||||
|
||||
# 准备数据
|
||||
data = []
|
||||
for e in entities:
|
||||
row = {
|
||||
'ID': e.id,
|
||||
'名称': e.name,
|
||||
'类型': e.type,
|
||||
'定义': e.definition,
|
||||
'别名': ', '.join(e.aliases),
|
||||
'提及次数': e.mention_count
|
||||
}
|
||||
# 添加属性
|
||||
for attr_name, attr_value in e.attributes.items():
|
||||
row[f'属性:{attr_name}'] = attr_value
|
||||
data.append(row)
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# 写入 Excel
|
||||
output = io.BytesIO()
|
||||
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
||||
df.to_excel(writer, sheet_name='实体列表', index=False)
|
||||
|
||||
# 调整列宽
|
||||
worksheet = writer.sheets['实体列表']
|
||||
for column in worksheet.columns:
|
||||
max_length = 0
|
||||
column_letter = column[0].column_letter
|
||||
for cell in column:
|
||||
try:
|
||||
if len(str(cell.value)) > max_length:
|
||||
max_length = len(str(cell.value))
|
||||
except:
|
||||
pass
|
||||
adjusted_width = min(max_length + 2, 50)
|
||||
worksheet.column_dimensions[column_letter].width = adjusted_width
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
def export_entities_csv(self, entities: List[ExportEntity]) -> str:
|
||||
"""
|
||||
导出实体数据为 CSV 格式
|
||||
|
||||
Returns:
|
||||
CSV 字符串
|
||||
"""
|
||||
import csv
|
||||
|
||||
output = io.StringIO()
|
||||
|
||||
# 收集所有可能的属性列
|
||||
all_attrs = set()
|
||||
for e in entities:
|
||||
all_attrs.update(e.attributes.keys())
|
||||
|
||||
# 表头
|
||||
headers = ['ID', '名称', '类型', '定义', '别名', '提及次数'] + [f'属性:{a}' for a in sorted(all_attrs)]
|
||||
|
||||
writer = csv.writer(output)
|
||||
writer.writerow(headers)
|
||||
|
||||
# 数据行
|
||||
for e in entities:
|
||||
row = [e.id, e.name, e.type, e.definition, ', '.join(e.aliases), e.mention_count]
|
||||
for attr in sorted(all_attrs):
|
||||
row.append(e.attributes.get(attr, ''))
|
||||
writer.writerow(row)
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
def export_relations_csv(self, relations: List[ExportRelation]) -> str:
|
||||
"""
|
||||
导出关系数据为 CSV 格式
|
||||
|
||||
Returns:
|
||||
CSV 字符串
|
||||
"""
|
||||
import csv
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output)
|
||||
writer.writerow(['ID', '源实体', '目标实体', '关系类型', '置信度', '证据'])
|
||||
|
||||
for r in relations:
|
||||
writer.writerow([r.id, r.source, r.target, r.relation_type, r.confidence, r.evidence])
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
def export_transcript_markdown(self, transcript: ExportTranscript,
|
||||
entities_map: Dict[str, ExportEntity]) -> str:
|
||||
"""
|
||||
导出转录文本为 Markdown 格式
|
||||
|
||||
Returns:
|
||||
Markdown 字符串
|
||||
"""
|
||||
lines = [
|
||||
f"# {transcript.name}",
|
||||
"",
|
||||
f"**类型**: {transcript.type}",
|
||||
f"**ID**: {transcript.id}",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 内容",
|
||||
"",
|
||||
transcript.content,
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
]
|
||||
|
||||
if transcript.segments:
|
||||
lines.extend([
|
||||
"## 分段详情",
|
||||
"",
|
||||
])
|
||||
for seg in transcript.segments:
|
||||
speaker = seg.get('speaker', 'Unknown')
|
||||
start = seg.get('start', 0)
|
||||
end = seg.get('end', 0)
|
||||
text = seg.get('text', '')
|
||||
lines.append(f"**[{start:.1f}s - {end:.1f}s] {speaker}**: {text}")
|
||||
lines.append("")
|
||||
|
||||
if transcript.entity_mentions:
|
||||
lines.extend([
|
||||
"",
|
||||
"## 实体提及",
|
||||
"",
|
||||
"| 实体 | 类型 | 位置 | 上下文 |",
|
||||
"|------|------|------|--------|",
|
||||
])
|
||||
for mention in transcript.entity_mentions:
|
||||
entity_id = mention.get('entity_id', '')
|
||||
entity = entities_map.get(entity_id)
|
||||
entity_name = entity.name if entity else mention.get('entity_name', 'Unknown')
|
||||
entity_type = entity.type if entity else 'Unknown'
|
||||
position = mention.get('position', '')
|
||||
context = mention.get('context', '')[:50] + '...' if mention.get('context') else ''
|
||||
lines.append(f"| {entity_name} | {entity_type} | {position} | {context} |")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
def export_project_report_pdf(self, project_id: str, project_name: str,
|
||||
entities: List[ExportEntity],
|
||||
relations: List[ExportRelation],
|
||||
transcripts: List[ExportTranscript],
|
||||
summary: str = "") -> bytes:
|
||||
"""
|
||||
导出项目报告为 PDF 格式
|
||||
|
||||
Returns:
|
||||
PDF 文件字节
|
||||
"""
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
raise ImportError("reportlab is required for PDF export")
|
||||
|
||||
output = io.BytesIO()
|
||||
doc = SimpleDocTemplate(
|
||||
output,
|
||||
pagesize=A4,
|
||||
rightMargin=72,
|
||||
leftMargin=72,
|
||||
topMargin=72,
|
||||
bottomMargin=18
|
||||
)
|
||||
|
||||
# 样式
|
||||
styles = getSampleStyleSheet()
|
||||
title_style = ParagraphStyle(
|
||||
'CustomTitle',
|
||||
parent=styles['Heading1'],
|
||||
fontSize=24,
|
||||
spaceAfter=30,
|
||||
textColor=colors.HexColor('#2c3e50')
|
||||
)
|
||||
heading_style = ParagraphStyle(
|
||||
'CustomHeading',
|
||||
parent=styles['Heading2'],
|
||||
fontSize=16,
|
||||
spaceAfter=12,
|
||||
textColor=colors.HexColor('#34495e')
|
||||
)
|
||||
|
||||
story = []
|
||||
|
||||
# 标题页
|
||||
story.append(Paragraph(f"InsightFlow 项目报告", title_style))
|
||||
story.append(Paragraph(f"项目名称: {project_name}", styles['Heading2']))
|
||||
story.append(Paragraph(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M')}", styles['Normal']))
|
||||
story.append(Spacer(1, 0.3*inch))
|
||||
|
||||
# 统计概览
|
||||
story.append(Paragraph("项目概览", heading_style))
|
||||
stats_data = [
|
||||
['指标', '数值'],
|
||||
['实体数量', str(len(entities))],
|
||||
['关系数量', str(len(relations))],
|
||||
['文档数量', str(len(transcripts))],
|
||||
]
|
||||
|
||||
# 按类型统计实体
|
||||
type_counts = {}
|
||||
for e in entities:
|
||||
type_counts[e.type] = type_counts.get(e.type, 0) + 1
|
||||
|
||||
for etype, count in sorted(type_counts.items()):
|
||||
stats_data.append([f'{etype} 实体', str(count)])
|
||||
|
||||
stats_table = Table(stats_data, colWidths=[3*inch, 2*inch])
|
||||
stats_table.setStyle(TableStyle([
|
||||
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#34495e')),
|
||||
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
||||
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||
('FONTSIZE', (0, 0), (-1, 0), 12),
|
||||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||
('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#ecf0f1')),
|
||||
('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#bdc3c7'))
|
||||
]))
|
||||
story.append(stats_table)
|
||||
story.append(Spacer(1, 0.3*inch))
|
||||
|
||||
# 项目总结
|
||||
if summary:
|
||||
story.append(Paragraph("项目总结", heading_style))
|
||||
story.append(Paragraph(summary, styles['Normal']))
|
||||
story.append(Spacer(1, 0.3*inch))
|
||||
|
||||
# 实体列表
|
||||
if entities:
|
||||
story.append(PageBreak())
|
||||
story.append(Paragraph("实体列表", heading_style))
|
||||
|
||||
entity_data = [['名称', '类型', '提及次数', '定义']]
|
||||
for e in sorted(entities, key=lambda x: x.mention_count, reverse=True)[:50]: # 限制前50个
|
||||
entity_data.append([
|
||||
e.name,
|
||||
e.type,
|
||||
str(e.mention_count),
|
||||
(e.definition[:100] + '...') if len(e.definition) > 100 else e.definition
|
||||
])
|
||||
|
||||
entity_table = Table(entity_data, colWidths=[1.5*inch, 1*inch, 1*inch, 2.5*inch])
|
||||
entity_table.setStyle(TableStyle([
|
||||
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#34495e')),
|
||||
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
||||
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||
('FONTSIZE', (0, 0), (-1, 0), 10),
|
||||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||
('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#ecf0f1')),
|
||||
('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#bdc3c7')),
|
||||
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
||||
]))
|
||||
story.append(entity_table)
|
||||
|
||||
# 关系列表
|
||||
if relations:
|
||||
story.append(PageBreak())
|
||||
story.append(Paragraph("关系列表", heading_style))
|
||||
|
||||
relation_data = [['源实体', '关系', '目标实体', '置信度']]
|
||||
for r in relations[:100]: # 限制前100个
|
||||
relation_data.append([
|
||||
r.source,
|
||||
r.relation_type,
|
||||
r.target,
|
||||
f"{r.confidence:.2f}"
|
||||
])
|
||||
|
||||
relation_table = Table(relation_data, colWidths=[2*inch, 1.5*inch, 2*inch, 1*inch])
|
||||
relation_table.setStyle(TableStyle([
|
||||
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#34495e')),
|
||||
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
||||
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||
('FONTSIZE', (0, 0), (-1, 0), 10),
|
||||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||
('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#ecf0f1')),
|
||||
('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#bdc3c7')),
|
||||
]))
|
||||
story.append(relation_table)
|
||||
|
||||
doc.build(story)
|
||||
return output.getvalue()
|
||||
|
||||
def export_project_json(self, project_id: str, project_name: str,
|
||||
entities: List[ExportEntity],
|
||||
relations: List[ExportRelation],
|
||||
transcripts: List[ExportTranscript]) -> str:
|
||||
"""
|
||||
导出完整项目数据为 JSON 格式
|
||||
|
||||
Returns:
|
||||
JSON 字符串
|
||||
"""
|
||||
data = {
|
||||
"project_id": project_id,
|
||||
"project_name": project_name,
|
||||
"export_time": datetime.now().isoformat(),
|
||||
"entities": [
|
||||
{
|
||||
"id": e.id,
|
||||
"name": e.name,
|
||||
"type": e.type,
|
||||
"definition": e.definition,
|
||||
"aliases": e.aliases,
|
||||
"mention_count": e.mention_count,
|
||||
"attributes": e.attributes
|
||||
}
|
||||
for e in entities
|
||||
],
|
||||
"relations": [
|
||||
{
|
||||
"id": r.id,
|
||||
"source": r.source,
|
||||
"target": r.target,
|
||||
"relation_type": r.relation_type,
|
||||
"confidence": r.confidence,
|
||||
"evidence": r.evidence
|
||||
}
|
||||
for r in relations
|
||||
],
|
||||
"transcripts": [
|
||||
{
|
||||
"id": t.id,
|
||||
"name": t.name,
|
||||
"type": t.type,
|
||||
"content": t.content,
|
||||
"segments": t.segments
|
||||
}
|
||||
for t in transcripts
|
||||
]
|
||||
}
|
||||
|
||||
return json.dumps(data, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
# 全局导出管理器实例
|
||||
_export_manager = None
|
||||
|
||||
def get_export_manager(db_manager=None):
|
||||
"""获取导出管理器实例"""
|
||||
global _export_manager
|
||||
if _export_manager is None:
|
||||
_export_manager = ExportManager(db_manager)
|
||||
return _export_manager
|
||||
412
backend/main.py
412
backend/main.py
@@ -11,6 +11,7 @@ import json
|
||||
import httpx
|
||||
import uuid
|
||||
import re
|
||||
import io
|
||||
from fastapi import FastAPI, File, UploadFile, HTTPException, Form
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
@@ -67,6 +68,12 @@ try:
|
||||
except ImportError:
|
||||
REASONER_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from export_manager import get_export_manager, ExportEntity, ExportRelation, ExportTranscript
|
||||
EXPORT_AVAILABLE = True
|
||||
except ImportError:
|
||||
EXPORT_AVAILABLE = False
|
||||
|
||||
app = FastAPI(title="InsightFlow", version="0.3.0")
|
||||
|
||||
app.add_middleware(
|
||||
@@ -1911,6 +1918,411 @@ async def search_entities_by_attributes_endpoint(
|
||||
]
|
||||
|
||||
|
||||
# ==================== 导出功能 API ====================
|
||||
|
||||
from fastapi.responses import StreamingResponse, FileResponse
|
||||
|
||||
@app.get("/api/v1/projects/{project_id}/export/graph-svg")
|
||||
async def export_graph_svg_endpoint(project_id: str):
|
||||
"""导出知识图谱为 SVG"""
|
||||
if not DB_AVAILABLE or not EXPORT_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="Export functionality not available")
|
||||
|
||||
db = get_db_manager()
|
||||
project = db.get_project(project_id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
# 获取项目数据
|
||||
entities_data = db.get_project_entities(project_id)
|
||||
relations_data = db.get_project_relations(project_id)
|
||||
|
||||
# 转换为导出格式
|
||||
entities = []
|
||||
for e in entities_data:
|
||||
attrs = db.get_entity_attributes(e.id)
|
||||
entities.append(ExportEntity(
|
||||
id=e.id,
|
||||
name=e.name,
|
||||
type=e.type,
|
||||
definition=e.definition or "",
|
||||
aliases=json.loads(e.aliases) if e.aliases else [],
|
||||
mention_count=e.mention_count,
|
||||
attributes={a.template_name: a.value for a in attrs}
|
||||
))
|
||||
|
||||
relations = []
|
||||
for r in relations_data:
|
||||
relations.append(ExportRelation(
|
||||
id=r.id,
|
||||
source=r.source_name,
|
||||
target=r.target_name,
|
||||
relation_type=r.relation_type,
|
||||
confidence=r.confidence,
|
||||
evidence=r.evidence or ""
|
||||
))
|
||||
|
||||
export_mgr = get_export_manager()
|
||||
svg_content = export_mgr.export_knowledge_graph_svg(project_id, entities, relations)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(svg_content.encode('utf-8')),
|
||||
media_type="image/svg+xml",
|
||||
headers={"Content-Disposition": f"attachment; filename=insightflow-graph-{project_id}.svg"}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/v1/projects/{project_id}/export/graph-png")
|
||||
async def export_graph_png_endpoint(project_id: str):
|
||||
"""导出知识图谱为 PNG"""
|
||||
if not DB_AVAILABLE or not EXPORT_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="Export functionality not available")
|
||||
|
||||
db = get_db_manager()
|
||||
project = db.get_project(project_id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
# 获取项目数据
|
||||
entities_data = db.get_project_entities(project_id)
|
||||
relations_data = db.get_project_relations(project_id)
|
||||
|
||||
# 转换为导出格式
|
||||
entities = []
|
||||
for e in entities_data:
|
||||
attrs = db.get_entity_attributes(e.id)
|
||||
entities.append(ExportEntity(
|
||||
id=e.id,
|
||||
name=e.name,
|
||||
type=e.type,
|
||||
definition=e.definition or "",
|
||||
aliases=json.loads(e.aliases) if e.aliases else [],
|
||||
mention_count=e.mention_count,
|
||||
attributes={a.template_name: a.value for a in attrs}
|
||||
))
|
||||
|
||||
relations = []
|
||||
for r in relations_data:
|
||||
relations.append(ExportRelation(
|
||||
id=r.id,
|
||||
source=r.source_name,
|
||||
target=r.target_name,
|
||||
relation_type=r.relation_type,
|
||||
confidence=r.confidence,
|
||||
evidence=r.evidence or ""
|
||||
))
|
||||
|
||||
export_mgr = get_export_manager()
|
||||
png_bytes = export_mgr.export_knowledge_graph_png(project_id, entities, relations)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(png_bytes),
|
||||
media_type="image/png",
|
||||
headers={"Content-Disposition": f"attachment; filename=insightflow-graph-{project_id}.png"}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/v1/projects/{project_id}/export/entities-excel")
|
||||
async def export_entities_excel_endpoint(project_id: str):
|
||||
"""导出实体数据为 Excel"""
|
||||
if not DB_AVAILABLE or not EXPORT_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="Export functionality not available")
|
||||
|
||||
db = get_db_manager()
|
||||
project = db.get_project(project_id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
# 获取实体数据
|
||||
entities_data = db.get_project_entities(project_id)
|
||||
|
||||
entities = []
|
||||
for e in entities_data:
|
||||
attrs = db.get_entity_attributes(e.id)
|
||||
entities.append(ExportEntity(
|
||||
id=e.id,
|
||||
name=e.name,
|
||||
type=e.type,
|
||||
definition=e.definition or "",
|
||||
aliases=json.loads(e.aliases) if e.aliases else [],
|
||||
mention_count=e.mention_count,
|
||||
attributes={a.template_name: a.value for a in attrs}
|
||||
))
|
||||
|
||||
export_mgr = get_export_manager()
|
||||
excel_bytes = export_mgr.export_entities_excel(entities)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(excel_bytes),
|
||||
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
headers={"Content-Disposition": f"attachment; filename=insightflow-entities-{project_id}.xlsx"}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/v1/projects/{project_id}/export/entities-csv")
|
||||
async def export_entities_csv_endpoint(project_id: str):
|
||||
"""导出实体数据为 CSV"""
|
||||
if not DB_AVAILABLE or not EXPORT_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="Export functionality not available")
|
||||
|
||||
db = get_db_manager()
|
||||
project = db.get_project(project_id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
# 获取实体数据
|
||||
entities_data = db.get_project_entities(project_id)
|
||||
|
||||
entities = []
|
||||
for e in entities_data:
|
||||
attrs = db.get_entity_attributes(e.id)
|
||||
entities.append(ExportEntity(
|
||||
id=e.id,
|
||||
name=e.name,
|
||||
type=e.type,
|
||||
definition=e.definition or "",
|
||||
aliases=json.loads(e.aliases) if e.aliases else [],
|
||||
mention_count=e.mention_count,
|
||||
attributes={a.template_name: a.value for a in attrs}
|
||||
))
|
||||
|
||||
export_mgr = get_export_manager()
|
||||
csv_content = export_mgr.export_entities_csv(entities)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(csv_content.encode('utf-8')),
|
||||
media_type="text/csv",
|
||||
headers={"Content-Disposition": f"attachment; filename=insightflow-entities-{project_id}.csv"}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/v1/projects/{project_id}/export/relations-csv")
|
||||
async def export_relations_csv_endpoint(project_id: str):
|
||||
"""导出关系数据为 CSV"""
|
||||
if not DB_AVAILABLE or not EXPORT_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="Export functionality not available")
|
||||
|
||||
db = get_db_manager()
|
||||
project = db.get_project(project_id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
# 获取关系数据
|
||||
relations_data = db.get_project_relations(project_id)
|
||||
|
||||
relations = []
|
||||
for r in relations_data:
|
||||
relations.append(ExportRelation(
|
||||
id=r.id,
|
||||
source=r.source_name,
|
||||
target=r.target_name,
|
||||
relation_type=r.relation_type,
|
||||
confidence=r.confidence,
|
||||
evidence=r.evidence or ""
|
||||
))
|
||||
|
||||
export_mgr = get_export_manager()
|
||||
csv_content = export_mgr.export_relations_csv(relations)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(csv_content.encode('utf-8')),
|
||||
media_type="text/csv",
|
||||
headers={"Content-Disposition": f"attachment; filename=insightflow-relations-{project_id}.csv"}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/v1/projects/{project_id}/export/report-pdf")
|
||||
async def export_report_pdf_endpoint(project_id: str):
|
||||
"""导出项目报告为 PDF"""
|
||||
if not DB_AVAILABLE or not EXPORT_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="Export functionality not available")
|
||||
|
||||
db = get_db_manager()
|
||||
project = db.get_project(project_id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
# 获取项目数据
|
||||
entities_data = db.get_project_entities(project_id)
|
||||
relations_data = db.get_project_relations(project_id)
|
||||
transcripts_data = db.get_project_transcripts(project_id)
|
||||
|
||||
# 转换为导出格式
|
||||
entities = []
|
||||
for e in entities_data:
|
||||
attrs = db.get_entity_attributes(e.id)
|
||||
entities.append(ExportEntity(
|
||||
id=e.id,
|
||||
name=e.name,
|
||||
type=e.type,
|
||||
definition=e.definition or "",
|
||||
aliases=json.loads(e.aliases) if e.aliases else [],
|
||||
mention_count=e.mention_count,
|
||||
attributes={a.template_name: a.value for a in attrs}
|
||||
))
|
||||
|
||||
relations = []
|
||||
for r in relations_data:
|
||||
relations.append(ExportRelation(
|
||||
id=r.id,
|
||||
source=r.source_name,
|
||||
target=r.target_name,
|
||||
relation_type=r.relation_type,
|
||||
confidence=r.confidence,
|
||||
evidence=r.evidence or ""
|
||||
))
|
||||
|
||||
transcripts = []
|
||||
for t in transcripts_data:
|
||||
segments = json.loads(t.segments) if t.segments else []
|
||||
transcripts.append(ExportTranscript(
|
||||
id=t.id,
|
||||
name=t.name,
|
||||
type=t.type,
|
||||
content=t.full_text or "",
|
||||
segments=segments,
|
||||
entity_mentions=[]
|
||||
))
|
||||
|
||||
# 获取项目总结
|
||||
summary = ""
|
||||
if REASONER_AVAILABLE:
|
||||
try:
|
||||
reasoner = get_knowledge_reasoner()
|
||||
summary_result = reasoner.generate_project_summary(project_id, db)
|
||||
summary = summary_result.get("summary", "")
|
||||
except:
|
||||
pass
|
||||
|
||||
export_mgr = get_export_manager()
|
||||
pdf_bytes = export_mgr.export_project_report_pdf(
|
||||
project_id, project.name, entities, relations, transcripts, summary
|
||||
)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(pdf_bytes),
|
||||
media_type="application/pdf",
|
||||
headers={"Content-Disposition": f"attachment; filename=insightflow-report-{project_id}.pdf"}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/v1/projects/{project_id}/export/project-json")
|
||||
async def export_project_json_endpoint(project_id: str):
|
||||
"""导出完整项目数据为 JSON"""
|
||||
if not DB_AVAILABLE or not EXPORT_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="Export functionality not available")
|
||||
|
||||
db = get_db_manager()
|
||||
project = db.get_project(project_id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
# 获取项目数据
|
||||
entities_data = db.get_project_entities(project_id)
|
||||
relations_data = db.get_project_relations(project_id)
|
||||
transcripts_data = db.get_project_transcripts(project_id)
|
||||
|
||||
# 转换为导出格式
|
||||
entities = []
|
||||
for e in entities_data:
|
||||
attrs = db.get_entity_attributes(e.id)
|
||||
entities.append(ExportEntity(
|
||||
id=e.id,
|
||||
name=e.name,
|
||||
type=e.type,
|
||||
definition=e.definition or "",
|
||||
aliases=json.loads(e.aliases) if e.aliases else [],
|
||||
mention_count=e.mention_count,
|
||||
attributes={a.template_name: a.value for a in attrs}
|
||||
))
|
||||
|
||||
relations = []
|
||||
for r in relations_data:
|
||||
relations.append(ExportRelation(
|
||||
id=r.id,
|
||||
source=r.source_name,
|
||||
target=r.target_name,
|
||||
relation_type=r.relation_type,
|
||||
confidence=r.confidence,
|
||||
evidence=r.evidence or ""
|
||||
))
|
||||
|
||||
transcripts = []
|
||||
for t in transcripts_data:
|
||||
segments = json.loads(t.segments) if t.segments else []
|
||||
transcripts.append(ExportTranscript(
|
||||
id=t.id,
|
||||
name=t.name,
|
||||
type=t.type,
|
||||
content=t.full_text or "",
|
||||
segments=segments,
|
||||
entity_mentions=[]
|
||||
))
|
||||
|
||||
export_mgr = get_export_manager()
|
||||
json_content = export_mgr.export_project_json(
|
||||
project_id, project.name, entities, relations, transcripts
|
||||
)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(json_content.encode('utf-8')),
|
||||
media_type="application/json",
|
||||
headers={"Content-Disposition": f"attachment; filename=insightflow-project-{project_id}.json"}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/v1/transcripts/{transcript_id}/export/markdown")
|
||||
async def export_transcript_markdown_endpoint(transcript_id: str):
|
||||
"""导出转录文本为 Markdown"""
|
||||
if not DB_AVAILABLE or not EXPORT_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="Export functionality not available")
|
||||
|
||||
db = get_db_manager()
|
||||
transcript = db.get_transcript(transcript_id)
|
||||
if not transcript:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
|
||||
# 获取实体提及
|
||||
mentions = db.get_transcript_entity_mentions(transcript_id)
|
||||
|
||||
# 获取项目实体用于映射
|
||||
entities_data = db.get_project_entities(transcript.project_id)
|
||||
entities_map = {e.id: ExportEntity(
|
||||
id=e.id,
|
||||
name=e.name,
|
||||
type=e.type,
|
||||
definition=e.definition or "",
|
||||
aliases=json.loads(e.aliases) if e.aliases else [],
|
||||
mention_count=e.mention_count,
|
||||
attributes={}
|
||||
) for e in entities_data}
|
||||
|
||||
segments = json.loads(transcript.segments) if transcript.segments else []
|
||||
|
||||
export_transcript = ExportTranscript(
|
||||
id=transcript.id,
|
||||
name=transcript.name,
|
||||
type=transcript.type,
|
||||
content=transcript.full_text or "",
|
||||
segments=segments,
|
||||
entity_mentions=[{
|
||||
"entity_id": m.entity_id,
|
||||
"entity_name": m.entity_name,
|
||||
"position": m.position,
|
||||
"context": m.context
|
||||
} for m in mentions]
|
||||
)
|
||||
|
||||
export_mgr = get_export_manager()
|
||||
markdown_content = export_mgr.export_transcript_markdown(export_transcript, entities_map)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(markdown_content.encode('utf-8')),
|
||||
media_type="text/markdown",
|
||||
headers={"Content-Disposition": f"attachment; filename=insightflow-transcript-{transcript_id}.md"}
|
||||
)
|
||||
|
||||
|
||||
# Serve frontend - MUST be last to not override API routes
|
||||
app.mount("/", StaticFiles(directory="frontend", html=True), name="frontend")
|
||||
|
||||
|
||||
@@ -21,3 +21,9 @@ oss2==2.18.5
|
||||
|
||||
# Utilities
|
||||
python-dotenv==1.0.0
|
||||
|
||||
# Export functionality
|
||||
pandas==2.2.0
|
||||
openpyxl==3.1.2
|
||||
reportlab==4.0.9
|
||||
cairosvg==2.7.1
|
||||
|
||||
Reference in New Issue
Block a user