feat: complete Tingwu ASR integration with OSS upload

2026-02-17 12:53:29 +08:00
parent 887ba811e5
commit c1deccbea8
3 changed files with 182 additions and 28 deletions
--- a/backend/main.py
+++ b/backend/main.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-InsightFlow Backend - Phase 1 MVP with 阿里听悟 + OSS
-ASR: 阿里云听悟 (TingWu)
+InsightFlow Backend - Phase 1 MVP (Complete)
+ASR: 阿里云听悟 (TingWu) + OSS
 Speaker Diarization: 听悟内置
 LLM: Kimi API for entity extraction
 """
@@ -9,8 +9,6 @@ LLM: Kimi API for entity extraction
 import os
 import json
 import httpx
-import time
-import uuid
 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
@@ -18,13 +16,19 @@ from pydantic import BaseModel
 from typing import List, Optional
 from datetime import datetime

-# 导入 OSS 上传器
+# Import clients
 try:
    from oss_uploader import get_oss_uploader
    OSS_AVAILABLE = True
 except ImportError:
    OSS_AVAILABLE = False

+try:
+    from tingwu_client import TingwuClient
+    TINGWU_AVAILABLE = True
+except ImportError:
+    TINGWU_AVAILABLE = False
+
 app = FastAPI(title="InsightFlow", version="0.1.0")

 app.add_middleware(
@@ -60,32 +64,38 @@ class AnalysisResult(BaseModel):
 storage = {}

 # API Keys
-ALI_ACCESS_KEY = os.getenv("ALI_ACCESS_KEY", "")
-ALI_SECRET_KEY = os.getenv("ALI_SECRET_KEY", "")
 KIMI_API_KEY = os.getenv("KIMI_API_KEY", "")
 KIMI_BASE_URL = "https://api.kimi.com/coding"

-def transcribe_with_tingwu(audio_data: bytes, filename: str) -> dict:
-    """使用阿里听悟进行转录和说话人分离"""
+def transcribe_audio(audio_data: bytes, filename: str) -> dict:
+    """转录音频：OSS上传 + 听悟转录"""
    
    # 1. 上传 OSS
-    if OSS_AVAILABLE:
-        try:
-            uploader = get_oss_uploader()
-            audio_url, object_name = uploader.upload_audio(audio_data, filename)
-            print(f"Uploaded to OSS: {object_name}")
-        except Exception as e:
-            print(f"OSS upload failed: {e}")
-            # Fallback: mock result
-            return mock_transcribe()
-    else:
+    if not OSS_AVAILABLE:
        print("OSS not available, using mock")
        return mock_transcribe()
    
-    # 2. 调用听悟 API
-    # TODO: 实现听悟 API 调用
-    # 暂时返回 mock
-    return mock_transcribe()
+    try:
+        uploader = get_oss_uploader()
+        audio_url, object_name = uploader.upload_audio(audio_data, filename)
+        print(f"Uploaded to OSS: {object_name}")
+    except Exception as e:
+        print(f"OSS upload failed: {e}")
+        return mock_transcribe()
+    
+    # 2. 听悟转录
+    if not TINGWU_AVAILABLE:
+        print("Tingwu not available, using mock")
+        return mock_transcribe()
+    
+    try:
+        client = TingwuClient()
+        result = client.transcribe(audio_url)
+        print(f"Transcription complete: {len(result['segments'])} segments")
+        return result
+    except Exception as e:
+        print(f"Tingwu failed: {e}")
+        return mock_transcribe()

 def mock_transcribe() -> dict:
    """Mock 转录结果用于测试"""
@@ -157,9 +167,9 @@ async def upload_audio(file: UploadFile = File(...)):
    """上传音频并分析"""
    content = await file.read()
    
-    # 听悟转录
-    print(f"Transcribing with Tingwu: {file.filename}")
-    tw_result = transcribe_with_tingwu(content, file.filename)
+    # 转录
+    print(f"Processing: {file.filename} ({len(content)} bytes)")
+    tw_result = transcribe_audio(content, file.filename)
    
    # 构建片段
    segments = [
@@ -167,7 +177,7 @@ async def upload_audio(file: UploadFile = File(...)):
    ] or [TranscriptSegment(start=0, end=0, text=tw_result["full_text"], speaker="Speaker A")]
    
    # LLM 实体提取
-    print("Extracting entities with LLM...")
+    print("Extracting entities...")
    entities = extract_entities_with_llm(tw_result["full_text"])
    
    analysis = AnalysisResult(
@@ -179,7 +189,7 @@ async def upload_audio(file: UploadFile = File(...)):
    )
    
    storage[analysis.transcript_id] = analysis
-    print(f"Analysis complete: {analysis.transcript_id}, {len(entities)} entities found")
+    print(f"Complete: {analysis.transcript_id}, {len(entities)} entities")
    return analysis

@app.get("/api/v1/transcripts/{transcript_id}", response_model=AnalysisResult)