马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。
您需要 登录 才可以下载或查看,没有账号?立即注册
×
对话一长,Token 烧得肉疼。那怎么办,做压缩
Java实当代码
- public class ContextCompactSystem {
- // --- 配置 ---
- private static final Path WORKDIR = Paths.get(System.getProperty("user.dir"));
- private static final Path TRANSCRIPT_DIR = WORKDIR.resolve(".transcripts"); // 新增:对话存档目录
- private static final Gson gson = new GsonBuilder().setPrettyPrinting().create();
-
- // 压缩参数
- private static final int THRESHOLD_TOKENS = 50000; // 触发自动压缩的 token 阈值
- private static final int KEEP_RECENT = 3; // 保留的最近工具结果数量
-
- // --- 工具枚举 ---
- public enum ToolType {
- BASH("bash", "Run a shell command."),
- READ_FILE("read_file", "Read file contents."),
- WRITE_FILE("write_file", "Write content to file."),
- EDIT_FILE("edit_file", "Replace exact text in file."),
- COMPACT("compact", "Trigger manual conversation compression."); // 新增:手动压缩工具
- public final String name;
- public final String description;
- ToolType(String name, String description) { this.name = name; this.description = description; }
- }
- // ... 省略相同的 ToolExecutor 接口和基础工具实现
-
- // --- 三层次压缩系统 ---
-
- /**
- * Layer 1: 微观压缩 - 静默替换旧的工具结果
- */
- private static List<Map<String, Object>> microCompact(List<Map<String, Object>> messages) {
- // 收集所有的 tool_result 条目
- List<ToolResultInfo> toolResults = new ArrayList<>();
-
- for (int msgIdx = 0; msgIdx < messages.size(); msgIdx++) {
- Map<String, Object> msg = messages.get(msgIdx);
- if ("user".equals(msg.get("role"))) {
- Object content = msg.get("content");
- if (content instanceof List) {
- @SuppressWarnings("unchecked")
- List<Map<String, Object>> contentList = (List<Map<String, Object>>) content;
-
- for (int partIdx = 0; partIdx < contentList.size(); partIdx++) {
- Map<String, Object> part = contentList.get(partIdx);
- if ("tool_result".equals(part.get("type"))) {
- toolResults.add(new ToolResultInfo(msgIdx, partIdx, part));
- }
- }
- }
- }
- }
-
- if (toolResults.size() <= KEEP_RECENT) {
- return messages;
- }
-
- // 从先前的 assistant 消息中映射 tool_use_id 到 tool_name
- Map<String, String> toolNameMap = new HashMap<>();
- for (Map<String, Object> msg : messages) {
- if ("assistant".equals(msg.get("role"))) {
- Object content = msg.get("content");
- if (content instanceof List) {
- @SuppressWarnings("unchecked")
- List<Map<String, Object>> contentList = (List<Map<String, Object>>) content;
-
- for (Map<String, Object> block : contentList) {
- if ("tool_use".equals(block.get("type"))) {
- String toolId = (String) block.get("id");
- String toolName = (String) block.get("name");
- toolNameMap.put(toolId, toolName);
- }
- }
- }
- }
- }
-
- // 清除旧的结果(保留最近的 KEEP_RECENT 个)
- List<ToolResultInfo> toClear = toolResults.subList(0, toolResults.size() - KEEP_RECENT);
-
- for (ToolResultInfo info : toClear) {
- Map<String, Object> result = info.result;
- Object content = result.get("content");
-
- if (content instanceof String && ((String) content).length() > 100) {
- String toolId = (String) result.get("tool_use_id");
- String toolName = toolNameMap.getOrDefault(toolId, "unknown");
- result.put("content", "[Previous: used " + toolName + "]"); // 静默替换
- }
- }
-
- return messages;
- }
-
- /**
- * Layer 2: 自动压缩 - 保存完整对话并生成摘要
- */
- private static List<Map<String, Object>> autoCompact(List<Map<String, Object>> messages) throws IOException {
- // 保存完整对话到磁盘
- Files.createDirectories(TRANSCRIPT_DIR);
- Path transcriptPath = TRANSCRIPT_DIR.resolve("transcript_" + System.currentTimeMillis() + ".jsonl");
-
- try (BufferedWriter writer = Files.newBufferedWriter(transcriptPath)) {
- for (Map<String, Object> msg : messages) {
- writer.write(gson.toJson(msg));
- writer.newLine();
- }
- }
-
- System.out.println("[transcript saved: " + transcriptPath + "]");
-
- // 调用 LLM 生成摘要
- String conversationText = gson.toJson(messages);
- if (conversationText.length() > 80000) {
- conversationText = conversationText.substring(0, 80000);
- }
-
- String summary = simulateLLMSummary(conversationText);
-
- // 用摘要替换整个对话历史
- List<Map<String, Object>> compressedMessages = new ArrayList<>();
-
- compressedMessages.add(Map.of(
- "role", "user",
- "content", "[Conversation compressed. Transcript: " + transcriptPath + "]\n\n" + summary
- ));
-
- compressedMessages.add(Map.of(
- "role", "assistant",
- "content", "Understood. I have the context from the summary. Continuing."
- ));
-
- return compressedMessages;
- }
-
- /**
- * Layer 3: 手动压缩工具
- * 当 Agent 主动调用 compact 工具时触发
- */
- private static String handleCompactTool(Map<String, Object> args) {
- String focus = (String) args.get("focus");
- String focusMsg = focus != null ? " Focus: " + focus : "";
- return "Manual compression requested." + focusMsg;
- }
-
- /**
- * 估算 token 数量
- * 简单实现:约 4 个字符对应 1 个 token
- */
- private static int estimateTokens(List<Map<String, Object>> messages) {
- String messagesStr = gson.toJson(messages);
- return messagesStr.length() / 4;
- }
-
- // --- 工具处理器映射 ---
- private static final Map<String, ToolExecutor> TOOL_HANDLERS = new HashMap<>();
-
- static {
- // ... 省略基础工具注册
-
- TOOL_HANDLERS.put(ToolType.COMPACT.name, ContextCompactSystem::handleCompactTool);
- }
-
- // --- Agent 主循环(集成了三层压缩)---
- public static void agentLoop(List<Map<String, Object>> messages) {
- while (true) {
- try {
- // Layer 1: 每次调用前进行微观压缩
- messages = microCompact(messages);
-
- // Layer 2: 如果 token 数超过阈值,自动压缩
- if (estimateTokens(messages) > THRESHOLD_TOKENS) {
- System.out.println("[auto_compact triggered]");
- messages = autoCompact(messages);
- }
-
- // ... 省略相同的 LLM 调用逻辑
-
- boolean manualCompact = false;
- for (Map<String, Object> block : content) {
- if ("tool_use".equals(block.get("type"))) {
- String toolName = (String) block.get("name");
-
- // 检查是否是 compact 工具
- if (ToolType.COMPACT.name.equals(toolName)) {
- manualCompact = true; // 标记手动压缩
- }
-
- // ... 执行工具
- }
- }
-
- // Layer 3: 如果调用了 compact 工具,执行手动压缩
- if (manualCompact) {
- System.out.println("[manual compact]");
- messages = autoCompact(messages);
- }
-
- } catch (Exception e) {
- System.err.println("Error in agent loop: " + e.getMessage());
- e.printStackTrace();
- return;
- }
- }
- }
-
- // --- 辅助类和方法 ---
- private static class ToolResultInfo {
- int msgIndex;
- int partIndex;
- Map<String, Object> result;
-
- ToolResultInfo(int msgIndex, int partIndex, Map<String, Object> result) {
- this.msgIndex = msgIndex;
- this.partIndex = partIndex;
- this.result = result;
- }
- }
- }
复制代码 三条理压缩体系架构
办理恒久对话中的上下文长度限定题目,通过三层渐进式压缩计谋,在不丢失关键信息的条件下大幅度缩减上下文长度,实现无穷长对话的本领。- // 压缩流程
- while (true) {
- // Layer 1: 每次调用前进行微观压缩
- messages = microCompact(messages);
-
- // Layer 2: 如果 token 数超过阈值,自动压缩
- if (estimateTokens(messages) > THRESHOLD_TOKENS) {
- messages = autoCompact(messages);
- }
-
- // Layer 3: 如果调用了 compact 工具,执行手动压缩
- if (manualCompact) {
- messages = autoCompact(messages);
- }
- }
复制代码
- 分层压缩:微观、主动、手动三层计谋,粒度从细到粗
- 智能触发:基于token估算主动判定压缩机遇
- 渐进生存:生存迩来的关键信息,确保一连性
- 可规复性:压缩前生存完备对话,克制信息丢失
微观压缩:无感地举行轻量级压缩
- private static List<Map<String, Object>> microCompact(List<Map<String, Object>> messages) {
- // 收集所有的 tool_result
- List<ToolResultInfo> toolResults = new ArrayList<>();
-
- // 保留最近的 KEEP_RECENT 个完整结果
- if (toolResults.size() <= KEEP_RECENT) {
- return messages;
- }
-
- // 将旧的结果替换为占位符
- for (ToolResultInfo info : toClear) {
- result.put("content", "[Previous: used " + toolName + "]");
- }
- }
复制代码
- 存档优先:压缩前先完备生存,克制信息丢失
- 智能择要:用LLM天生高质量的对话择要
- 上下文重置:大幅缩减上下文,但生存焦点信息
- 路径嵌入:在消息中包罗存档路径,便于调试
- 结构完备:保持user-assistant对话结构
手动压缩:给予 Agent 主动控制权
- /**
- * Layer 2: 自动压缩 - 保存完整对话并生成摘要
- */
- private static List<Map<String, Object>> autoCompact(List<Map<String, Object>> messages) throws IOException {
- // 1. 保存完整对话到磁盘
- Files.createDirectories(TRANSCRIPT_DIR);
- Path transcriptPath = TRANSCRIPT_DIR.resolve("transcript_" + System.currentTimeMillis() + ".jsonl");
-
- try (BufferedWriter writer = Files.newBufferedWriter(transcriptPath)) {
- for (Map<String, Object> msg : messages) {
- writer.write(gson.toJson(msg));
- writer.newLine();
- }
- }
- // 存档保护:完整对话保存到文件,随时可查
- // JSONL格式:每行一个消息,便于处理和加载
-
- // 2. 调用 LLM 生成摘要
- String conversationText = gson.toJson(messages);
- if (conversationText.length() > 80000) {
- conversationText = conversationText.substring(0, 80000);
- }
- String summary = simulateLLMSummary(conversationText);
-
- // 3. 用摘要替换整个对话历史
- List<Map<String, Object>> compressedMessages = new ArrayList<>();
-
- compressedMessages.add(Map.of(
- "role", "user",
- "content", "[Conversation compressed. Transcript: " + transcriptPath + "]\n\n" + summary
- ));
- // 上下文重置:用单条消息包含存档位置和摘要
- // 完整可追溯:存档路径包含在上下文中
-
- compressedMessages.add(Map.of(
- "role", "assistant",
- "content", "Understood. I have the context from the summary. Continuing."
- ));
- // 连续性保持:添加assistant确认,维持对话结构
-
- return compressedMessages;
- }
复制代码- /**
- * Layer 3: 手动压缩工具
- * 当 Agent 主动调用 compact 工具时触发
- */
- private static String handleCompactTool(Map<String, Object> args) {
- String focus = (String) args.get("focus");
- String focusMsg = focus != null ? " Focus: " + focus : "";
- return "Manual compression requested." + focusMsg;
- // Agent控制:Agent可以根据需要主动压缩
- // 参数化:可以指定摘要焦点,指导LLM关注特定方面
- }
复制代码
- Agent自主控制:Agent可以主动管理上下文长度
- 使命驱动压缩:在符合的时间点(如使命切换时)触发压缩
- 聚焦择要:可以指定择要重点,优化信息生存
- 无缝集成:与主动压缩共享底层机制
架构演进与代价
从 TaskSystem 到 ContextCompactSystem 的升级:
维度TaskSystemContextCompactSystem对话长度受上下文限定支持无穷长对话信息生存全量存储智能择要+存档控制方式被动限定主动+主动压缩恒久影象使命文件对话存档+择要上下文优化无三层智能压缩
免责声明:如果侵犯了您的权益,请联系站长及时删除侵权内容,谢谢合作!qidao123.com:ToB企服之家,中国第一个企服评测及软件市场,开放入驻,技术点评得现金. |