- 修复PEP8格式问题(行长度超过120字符) - 修复类型注解(添加__init__和_get_db返回类型) - 删除__pycache__缓存文件 - 优化长SQL查询语句格式
272 lines
8.6 KiB
Python
272 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
InsightFlow LLM Client - Phase 4
|
||
用于与 Kimi API 交互,支持 RAG 问答和 Agent 功能
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import re
|
||
from collections.abc import AsyncGenerator
|
||
from dataclasses import dataclass
|
||
|
||
import httpx
|
||
|
||
KIMI_API_KEY = os.getenv("KIMI_API_KEY", "")
|
||
KIMI_BASE_URL = os.getenv("KIMI_BASE_URL", "https://api.kimi.com/coding")
|
||
|
||
|
||
@dataclass
|
||
class ChatMessage:
|
||
role: str
|
||
content: str
|
||
|
||
|
||
@dataclass
|
||
class EntityExtractionResult:
|
||
name: str
|
||
type: str
|
||
definition: str
|
||
confidence: float
|
||
|
||
|
||
@dataclass
|
||
class RelationExtractionResult:
|
||
source: str
|
||
target: str
|
||
type: str
|
||
confidence: float
|
||
|
||
|
||
class LLMClient:
|
||
"""Kimi API 客户端"""
|
||
|
||
def __init__(self, api_key: str = None, base_url: str = None):
|
||
self.api_key = api_key or KIMI_API_KEY
|
||
self.base_url = base_url or KIMI_BASE_URL
|
||
self.headers = {
|
||
"Authorization": f"Bearer {self.api_key}",
|
||
"Content-Type": "application/json",
|
||
}
|
||
|
||
async def chat(
|
||
self, messages: list[ChatMessage], temperature: float = 0.3, stream: bool = False
|
||
) -> str:
|
||
"""发送聊天请求"""
|
||
if not self.api_key:
|
||
raise ValueError("KIMI_API_KEY not set")
|
||
|
||
payload = {
|
||
"model": "k2p5",
|
||
"messages": [{"role": m.role, "content": m.content} for m in messages],
|
||
"temperature": temperature,
|
||
"stream": stream,
|
||
}
|
||
|
||
async with httpx.AsyncClient() as client:
|
||
response = await client.post(
|
||
f"{self.base_url}/v1/chat/completions",
|
||
headers=self.headers,
|
||
json=payload,
|
||
timeout=120.0,
|
||
)
|
||
response.raise_for_status()
|
||
result = response.json()
|
||
return result["choices"][0]["message"]["content"]
|
||
|
||
async def chat_stream(
|
||
self, messages: list[ChatMessage], temperature: float = 0.3
|
||
) -> AsyncGenerator[str, None]:
|
||
"""流式聊天请求"""
|
||
if not self.api_key:
|
||
raise ValueError("KIMI_API_KEY not set")
|
||
|
||
payload = {
|
||
"model": "k2p5",
|
||
"messages": [{"role": m.role, "content": m.content} for m in messages],
|
||
"temperature": temperature,
|
||
"stream": True,
|
||
}
|
||
|
||
async with httpx.AsyncClient() as client:
|
||
async with client.stream(
|
||
"POST",
|
||
f"{self.base_url}/v1/chat/completions",
|
||
headers=self.headers,
|
||
json=payload,
|
||
timeout=120.0,
|
||
) as response:
|
||
response.raise_for_status()
|
||
async for line in response.aiter_lines():
|
||
if line.startswith("data: "):
|
||
data = line[6:]
|
||
if data == "[DONE]":
|
||
break
|
||
try:
|
||
chunk = json.loads(data)
|
||
delta = chunk["choices"][0]["delta"]
|
||
if "content" in delta:
|
||
yield delta["content"]
|
||
except (json.JSONDecodeError, KeyError, IndexError):
|
||
pass
|
||
|
||
async def extract_entities_with_confidence(
|
||
self, text: str
|
||
) -> tuple[list[EntityExtractionResult], list[RelationExtractionResult]]:
|
||
"""提取实体和关系,带置信度分数"""
|
||
prompt = f"""从以下会议文本中提取关键实体和它们之间的关系,以 JSON 格式返回:
|
||
|
||
文本:{text[:3000]}
|
||
|
||
要求:
|
||
1. entities: 每个实体包含 name(名称), type(类型: PROJECT/TECH/PERSON/ORG/OTHER),
|
||
definition(一句话定义), confidence(置信度0-1)
|
||
2. relations: 每个关系包含 source(源实体名), target(目标实体名),
|
||
type(关系类型: belongs_to/works_with/depends_on/mentions/related), confidence(置信度0-1)
|
||
3. 只返回 JSON 对象,格式: {{"entities": [...], "relations": [...]}}
|
||
|
||
示例:
|
||
{{
|
||
"entities": [
|
||
{{"name": "Project Alpha", "type": "PROJECT", "definition": "核心项目",
|
||
"confidence": 0.95}},
|
||
{{"name": "K8s", "type": "TECH", "definition": "Kubernetes容器编排平台",
|
||
"confidence": 0.88}}
|
||
],
|
||
"relations": [
|
||
{{"source": "Project Alpha", "target": "K8s", "type": "depends_on",
|
||
"confidence": 0.82}}
|
||
]
|
||
}}"""
|
||
}}"""
|
||
|
||
messages = [ChatMessage(role="user", content=prompt)]
|
||
content = await self.chat(messages, temperature=0.1)
|
||
|
||
json_match = re.search(r"\{{.*?\}}", content, re.DOTALL)
|
||
if not json_match:
|
||
return [], []
|
||
|
||
try:
|
||
data = json.loads(json_match.group())
|
||
entities = [
|
||
EntityExtractionResult(
|
||
name=e["name"],
|
||
type=e.get("type", "OTHER"),
|
||
definition=e.get("definition", ""),
|
||
confidence=e.get("confidence", 0.8),
|
||
)
|
||
for e in data.get("entities", [])
|
||
]
|
||
relations = [
|
||
RelationExtractionResult(
|
||
source=r["source"],
|
||
target=r["target"],
|
||
type=r.get("type", "related"),
|
||
confidence=r.get("confidence", 0.8),
|
||
)
|
||
for r in data.get("relations", [])
|
||
]
|
||
return entities, relations
|
||
except Exception as e:
|
||
print(f"Parse extraction result failed: {e}")
|
||
return [], []
|
||
|
||
async def rag_query(self, query: str, context: str, project_context: dict) -> str:
|
||
"""RAG 问答 - 基于项目上下文回答问题"""
|
||
prompt = f"""你是一个专业的项目分析助手。基于以下项目信息回答问题:
|
||
|
||
## 项目信息
|
||
{json.dumps(project_context, ensure_ascii=False, indent=2)}
|
||
|
||
## 相关上下文
|
||
{context[:4000]}
|
||
|
||
## 用户问题
|
||
{query}
|
||
|
||
请用中文回答,保持简洁专业。如果信息不足,请明确说明。"""
|
||
|
||
messages = [
|
||
ChatMessage(
|
||
role="system", content="你是一个专业的项目分析助手,擅长从会议记录中提取洞察。"
|
||
),
|
||
ChatMessage(role="user", content=prompt),
|
||
]
|
||
|
||
return await self.chat(messages, temperature=0.3)
|
||
|
||
async def agent_command(self, command: str, project_context: dict) -> dict:
|
||
"""Agent 指令解析 - 将自然语言指令转换为结构化操作"""
|
||
prompt = f"""解析以下用户指令,转换为结构化操作:
|
||
|
||
## 项目信息
|
||
{json.dumps(project_context, ensure_ascii=False, indent=2)}
|
||
|
||
## 用户指令
|
||
{command}
|
||
|
||
请分析指令意图,返回 JSON 格式:
|
||
{{
|
||
"intent": "merge_entities|answer_question|edit_entity|create_relation|unknown",
|
||
"params": {{
|
||
// 根据 intent 不同,参数不同
|
||
}},
|
||
"explanation": "对用户指令的解释"
|
||
}}
|
||
|
||
意图说明:
|
||
- merge_entities: 合并实体,params 包含 source_names(源实体名列表), target_name(目标实体名)
|
||
- answer_question: 回答问题,params 包含 question(问题内容)
|
||
- edit_entity: 编辑实体,params 包含 entity_name(实体名), field(字段), value(新值)
|
||
- create_relation: 创建关系,params 包含 source(源实体), target(目标实体), relation_type(关系类型)
|
||
"""
|
||
|
||
messages = [ChatMessage(role="user", content=prompt)]
|
||
content = await self.chat(messages, temperature=0.1)
|
||
|
||
json_match = re.search(r"\{{.*?\}}", content, re.DOTALL)
|
||
if not json_match:
|
||
return {"intent": "unknown", "explanation": "无法解析指令"}
|
||
|
||
try:
|
||
return json.loads(json_match.group())
|
||
except (json.JSONDecodeError, KeyError, TypeError):
|
||
return {"intent": "unknown", "explanation": "解析失败"}
|
||
|
||
async def analyze_entity_evolution(self, entity_name: str, mentions: list[dict]) -> str:
|
||
"""分析实体在项目中的演变/态度变化"""
|
||
mentions_text = "\n".join(
|
||
[
|
||
f"[{m.get('created_at', '未知时间')}] {m.get('text_snippet', '')}"
|
||
for m in mentions[:20]
|
||
] # 限制数量
|
||
)
|
||
|
||
prompt = f"""分析实体 "{entity_name}" 在项目中的演变和态度变化:
|
||
|
||
## 提及记录
|
||
{mentions_text}
|
||
|
||
请分析:
|
||
1. 该实体的角色/重要性变化
|
||
2. 相关方对它的态度变化
|
||
3. 关键时间节点
|
||
4. 总结性洞察
|
||
|
||
用中文回答,结构清晰。"""
|
||
|
||
messages = [ChatMessage(role="user", content=prompt)]
|
||
return await self.chat(messages, temperature=0.3)
|
||
|
||
|
||
# Singleton instance
|
||
_llm_client = None
|
||
|
||
|
||
def get_llm_client() -> LLMClient:
|
||
global _llm_client
|
||
if _llm_client is None:
|
||
_llm_client = LLMClient()
|
||
return _llm_client
|