diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..18b1e4e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system deps +RUN apt-get update && apt-get install -y \ + ffmpeg \ + && rm -rf /var/lib/apt/lists/* + +# Install Python deps +COPY backend/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy code +COPY backend/ ./backend/ +COPY frontend/ ./frontend/ + +EXPOSE 8000 + +CMD ["python", "backend/main.py"] diff --git a/README.md b/README.md index a356e81..bd7bd7b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,27 @@ -# insightflow +# InsightFlow -音频与文档的领域知识构建平台 - 将会议录音转化为结构化知识图谱 \ No newline at end of file +音频与文档的领域知识构建平台 + +## 产品定位 +将会议录音和文档转化为结构化的知识图谱,通过人机回圈(Human-in-the-Loop)实现知识持续生长。 + +## 核心特性 +- 🎙️ ASR 语音识别 + 热词注入 +- 🧠 LLM 实体抽取与解释 +- 🔗 双视图联动(文档视图 + 图谱视图) +- 📈 知识生长(多文件实体对齐) + +## 技术栈 +- 前端: Next.js + Tailwind +- 后端: Node.js / Python +- 数据库: MySQL + Neo4j +- ASR: Whisper +- LLM: OpenAI / Kimi + +## 开发阶段 +- [ ] Phase 1: 骨架与单体分析 (MVP) +- [ ] Phase 2: 交互与纠错工作台 +- [ ] Phase 3: 记忆与生长 + +## 文档 +- [PRD v2.0](docs/PRD-v2.0.md) diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..d013d40 --- /dev/null +++ b/backend/main.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +""" +InsightFlow Backend - Phase 1 MVP with Deepgram +ASR: Deepgram (Nova-3) +Speaker Diarization: Deepgram +LLM: Kimi API for entity extraction +""" + +import os +import json +import httpx +from fastapi import FastAPI, File, UploadFile, HTTPException, Form +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel +from typing import List, Optional +from datetime import datetime +from deepgram import DeepgramClient, PrerecordedOptions, FileSource + +app = FastAPI(title="InsightFlow", version="0.1.0") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Models +class Entity(BaseModel): + id: str + name: str + type: str + start: int + end: int + definition: Optional[str] = None + +class TranscriptSegment(BaseModel): + start: float + end: float + text: str + speaker: Optional[str] = "Speaker A" + +class AnalysisResult(BaseModel): + transcript_id: str + segments: List[TranscriptSegment] + entities: List[Entity] + full_text: str + created_at: str + +storage = {} + +# API Keys +DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY", "") +KIMI_API_KEY = os.getenv("KIMI_API_KEY", "") +KIMI_BASE_URL = "https://api.kimi.com/coding" + +def transcribe_with_deepgram(audio_data: bytes, filename: str) -> dict: + """使用 Deepgram 进行转录和说话人分离""" + if not DEEPGRAM_API_KEY: + raise HTTPException(status_code=500, detail="DEEPGRAM_API_KEY not configured") + + deepgram = DeepgramClient(DEEPGRAM_API_KEY) + + payload: FileSource = { + "buffer": audio_data, + "mimetype": "audio/wav" if filename.endswith(".wav") else "audio/mp3" + } + + options = PrerecordedOptions( + model="nova-3", + language="zh", + smart_format=True, + diarize=True, # 说话人分离 + utterances=True, + punctuate=True, + paragraphs=True + ) + + response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options) + + # 解析结果 + result = response.results + full_text = result.channels[0].alternatives[0].transcript if result.channels else "" + + # 提取带说话人的片段 + segments = [] + if result.utterances: + for u in result.utterances: + segments.append({ + "start": u.start, + "end": u.end, + "text": u.transcript, + "speaker": f"Speaker {u.speaker}" + }) + + return { + "full_text": full_text, + "segments": segments + } + +def extract_entities_with_llm(text: str) -> List[Entity]: + """使用 Kimi API 提取实体""" + if not KIMI_API_KEY or not text: + return [] + + prompt = f"""请从以下会议文本中提取关键实体(专有名词、项目名、技术术语、人名等),并以 JSON 格式返回: + +文本:{text[:3000]} # 限制长度避免超限 + +要求: +1. 每个实体包含:name(名称), type(类型: PROJECT/TECH/PERSON/ORG/OTHER), start(起始字符位置), end(结束字符位置), definition(一句话定义) +2. 只返回 JSON 数组,不要其他内容 +3. 确保 start/end 是字符在文本中的位置 + +示例输出: +[ + {{"name": "Project Alpha", "type": "PROJECT", "start": 23, "end": 35, "definition": "Q3季度的核心项目"}}, + {{"name": "K8s", "type": "TECH", "start": 37, "end": 40, "definition": "Kubernetes的缩写"}} +] +""" + + try: + response = httpx.post( + f"{KIMI_BASE_URL}/v1/chat/completions", + headers={"Authorization": f"Bearer {KIMI_API_KEY}", "Content-Type": "application/json"}, + json={ + "model": "k2p5", + "messages": [{"role": "user", "content": prompt}], + "temperature": 0.1 + }, + timeout=60.0 + ) + response.raise_for_status() + result = response.json() + content = result["choices"][0]["message"]["content"] + + # 解析 JSON + import re + json_match = re.search(r'\[.*?\]', content, re.DOTALL) + if json_match: + entities_data = json.loads(json_match.group()) + entities = [] + for i, e in enumerate(entities_data): + entities.append(Entity( + id=f"ent_{i+1}", + name=e["name"], + type=e.get("type", "OTHER"), + start=e["start"], + end=e["end"], + definition=e.get("definition", "") + )) + return entities + except Exception as e: + print(f"LLM extraction failed: {e}") + + return [] + +@app.post("/api/v1/upload", response_model=AnalysisResult) +async def upload_audio(file: UploadFile = File(...)): + """上传音频并分析""" + content = await file.read() + + # Deepgram 转录 + print(f"Transcribing with Deepgram: {file.filename}") + dg_result = transcribe_with_deepgram(content, file.filename) + + # 构建片段 + segments = [ + TranscriptSegment(**seg) for seg in dg_result["segments"] + ] or [TranscriptSegment(start=0, end=0, text=dg_result["full_text"], speaker="Speaker A")] + + # LLM 实体提取 + print("Extracting entities with LLM...") + entities = extract_entities_with_llm(dg_result["full_text"]) + + analysis = AnalysisResult( + transcript_id=os.urandom(8).hex(), + segments=segments, + entities=entities, + full_text=dg_result["full_text"], + created_at=datetime.now().isoformat() + ) + + storage[analysis.transcript_id] = analysis + print(f"Analysis complete: {analysis.transcript_id}, {len(entities)} entities found") + return analysis + +@app.get("/api/v1/transcripts/{transcript_id}", response_model=AnalysisResult) +async def get_transcript(transcript_id: str): + if transcript_id not in storage: + raise HTTPException(status_code=404, detail="Transcript not found") + return storage[transcript_id] + +@app.get("/api/v1/transcripts") +async def list_transcripts(): + return list(storage.values()) + +# Serve frontend +app.mount("/", StaticFiles(directory="frontend", html=True), name="frontend") + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..96004bb --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,7 @@ +fastapi==0.115.0 +uvicorn[standard]==0.32.0 +python-multipart==0.0.17 +deepgram-sdk==3.7.0 +httpx==0.27.2 +pydantic==2.9.2 +python-dotenv==1.0.1 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1a280d8 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +version: '3.8' + +services: + insightflow: + build: . + ports: + - "18000:8000" + environment: + - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} + - KIMI_API_KEY=${KIMI_API_KEY} + volumes: + - ./data:/app/data + restart: unless-stopped diff --git a/frontend/EOF b/frontend/EOF new file mode 100644 index 0000000..e69de29 diff --git a/frontend/app.js b/frontend/app.js new file mode 100644 index 0000000..f187b72 --- /dev/null +++ b/frontend/app.js @@ -0,0 +1,10 @@ +const API_BASE = '/api/v1'; +async function upload(file) { + const formData = new FormData(); + formData.append('file', file); + const res = await fetch(API_BASE + '/upload', { + method: 'POST', + body: formData + }); + return await res.json(); +} diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..f6570ee --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,18 @@ + + +
+ +Phase 1 MVP - 音频转录与实体提取
+