返回笔记首页

大模型前端项目——六大难点技术实现方案

主题配置

技术栈:Vue3 Composition API + <script setup> + 纯 JavaScript


一、流式输出处理

核心问题

SSE 流式数据到达时,每个 token 都触发一次响应式更新,100ms 内可能触发几十次 re-render,同时 Markdown 内容不完整时(比如代码块只输出了开头的三个反引号)会导致渲染闪烁或布局错乱。


1.1 SSE 连接与断线重连

javascript
// composables/useSSE.js
import { ref, onUnmounted } from 'vue'

export function useSSE() {
    const content = ref('')
    const loading = ref(false)
    const error = ref(null)

    let abortController = null
    let retryCount = 0
    const MAX_RETRY = 3
    const RETRY_DELAY = 1500

    async function startStream(url, body) {
        content.value = ''
        loading.value = true
        error.value = null
        abortController = new AbortController()

        try {
            const res = await fetch(url, {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify(body),
                signal: abortController.signal,
            })

            if (!res.ok) throw new Error(`HTTP ${res.status}`)

            const reader = res.body.getReader()
            const decoder = new TextDecoder()

            while (true) {
                const { done, value } = await reader.read()
                if (done) break

                const chunk = decoder.decode(value, { stream: true })
                // SSE 格式:每行 "data: {...}\n\n"
                const lines = chunk
                    .split('\n')
                    .filter((l) => l.startsWith('data:'))

                for (const line of lines) {
                    const raw = line.slice(5).trim()
                    if (raw === '[DONE]') break
                    try {
                        const parsed = JSON.parse(raw)
                        const token = parsed.choices?.[0]?.delta?.content ?? ''
                        content.value += token
                    } catch {
                        // 解析失败跳过
                    }
                }
            }

            retryCount = 0
        } catch (err) {
            if (err.name === 'AbortError') return

            error.value = err.message
            if (retryCount < MAX_RETRY) {
                retryCount++
                setTimeout(
                    () => startStream(url, body),
                    RETRY_DELAY * retryCount
                )
            }
        } finally {
            loading.value = false
        }
    }

    function stop() {
        abortController?.abort()
    }

    onUnmounted(stop)

    return { content, loading, error, startStream, stop }
}

1.2 批量合并 token 更新,避免频繁 re-render

核心思路:token 到达后先放进缓冲队列,用 requestAnimationFrame 每帧统一 flush 一次,而不是每个 token 都直接写入响应式变量。

javascript
// composables/useStreamBuffer.js
import { ref, onUnmounted } from 'vue'

export function useStreamBuffer() {
    const displayContent = ref('')
    let buffer = ''
    let rafId = null

    function flush() {
        if (buffer) {
            displayContent.value += buffer
            buffer = ''
        }
        rafId = null
    }

    function push(token) {
        buffer += token
        if (!rafId) {
            rafId = requestAnimationFrame(flush)
        }
    }

    function reset() {
        buffer = ''
        displayContent.value = ''
        if (rafId) {
            cancelAnimationFrame(rafId)
            rafId = null
        }
    }

    onUnmounted(() => {
        if (rafId) cancelAnimationFrame(rafId)
    })

    return { displayContent, push, reset }
}

使用时,SSE 每收到一个 token,调用 push(token) 而不是直接写 content.value += token


1.3 流式 Markdown 容错渲染

流式场景下 Markdown 内容随时处于"残缺"状态,直接用 marked 渲染会出现代码块未闭合导致后续内容全被吞掉的问题。解决方案是在渲染前对内容做补全处理。

javascript
// utils/streamMarkdown.js

/**
 * 对流式未完成的 Markdown 做补全,防止渲染错乱
 */
export function patchIncompleteMarkdown(text) {
    let result = text

    // 1. 统计反引号对,奇数个说明代码块未闭合
    const tripleBackticks = (result.match(/```/g) || []).length
    if (tripleBackticks % 2 !== 0) {
        result += '\n```'
    }

    // 2. 处理行内代码未闭合(单个反引号)
    const singleBacktick = (result.match(/(?<!`)`(?!`)/g) || []).length
    if (singleBacktick % 2 !== 0) {
        result += '`'
    }

    // 3. 处理加粗/斜体未闭合
    const boldMarks = (result.match(/\*\*/g) || []).length
    if (boldMarks % 2 !== 0) {
        result += '**'
    }

    return result
}

在组件里结合 marked 使用:

vue
<!-- components/StreamMessage.vue -->
<script setup>
import { computed } from 'vue'
import { marked } from 'marked'
import { patchIncompleteMarkdown } from '@/utils/streamMarkdown'

const props = defineProps({
    content: String,
    streaming: Boolean,
})

const renderedHtml = computed(() => {
    const text = props.streaming
        ? patchIncompleteMarkdown(props.content)
        : props.content
    return marked.parse(text)
})
</script>

<template>
    <div class="message-content" v-html="renderedHtml" />
</template>

二、上下文与会话管理

2.1 前端侧 token 预估

调用 API 之前估算本次请求的 token 数,超出阈值时提示用户。这里用一个轻量的估算方法(不引入 tiktoken-wasm 的简化版):

javascript
// utils/tokenEstimator.js

/**
 * 粗略估算 token 数:
 * - 英文约 4 字符 = 1 token
 * - 中文约 1~1.5 字符 = 1 token
 * 这是简化版,误差在 10~15% 以内,用于前端预警足够
 */
export function estimateTokens(text) {
    if (!text) return 0

    let count = 0
    for (const char of text) {
        const code = char.charCodeAt(0)
        if (code > 0x4e00 && code < 0x9fff) {
            // 中文字符,约 1 token
            count += 1
        } else if (char === ' ' || char === '\n') {
            count += 0.25
        } else {
            count += 0.25
        }
    }
    return Math.ceil(count)
}

export function estimateMessagesTokens(messages) {
    return messages.reduce((total, msg) => {
        return total + estimateTokens(msg.content) + 4 // 每条消息固定开销约 4 token
    }, 0)
}
javascript
// composables/useContextManager.js
import { ref, computed } from 'vue'
import { estimateMessagesTokens } from '@/utils/tokenEstimator'

const CONTEXT_LIMIT = 8000 // 预留 buffer,实际窗口可能是 128k
const WARN_THRESHOLD = 0.8 // 达到 80% 时警告

export function useContextManager() {
    const messages = ref([])

    const tokenUsage = computed(() => estimateMessagesTokens(messages.value))
    const isNearLimit = computed(
        () => tokenUsage.value / CONTEXT_LIMIT > WARN_THRESHOLD
    )
    const isOverLimit = computed(() => tokenUsage.value > CONTEXT_LIMIT)

    function addMessage(role, content) {
        messages.value.push({ role, content, id: Date.now() })
    }

    /**
     * 超出限制时,丢弃最早的几条(保留 system prompt 和最后 N 条)
     */
    function truncateIfNeeded() {
        if (!isOverLimit.value) return

        const systemMessages = messages.value.filter((m) => m.role === 'system')
        const others = messages.value.filter((m) => m.role !== 'system')

        // 从最旧的非 system 消息开始丢弃
        while (
            estimateMessagesTokens([...systemMessages, ...others]) >
                CONTEXT_LIMIT &&
            others.length > 2
        ) {
            others.shift()
        }

        messages.value = [...systemMessages, ...others]
    }

    function getMessagesForAPI() {
        truncateIfNeeded()
        return messages.value.map(({ role, content }) => ({ role, content }))
    }

    return {
        messages,
        tokenUsage,
        isNearLimit,
        isOverLimit,
        addMessage,
        getMessagesForAPI,
    }
}

2.2 多会话状态隔离

javascript
// stores/sessionStore.js
import { reactive, ref } from 'vue'

const sessions = reactive(new Map()) // sessionId -> { messages, title, createdAt }
const currentSessionId = ref(null)

export function useSessionStore() {
    function createSession() {
        const id = `session_${Date.now()}`
        sessions.set(id, {
            id,
            title: '新对话',
            messages: [],
            createdAt: Date.now(),
        })
        currentSessionId.value = id
        return id
    }

    function getSession(id) {
        return sessions.get(id)
    }

    function getCurrentSession() {
        return sessions.get(currentSessionId.value)
    }

    function deleteSession(id) {
        sessions.delete(id)
        if (currentSessionId.value === id) {
            const remaining = [...sessions.keys()]
            currentSessionId.value = remaining[remaining.length - 1] ?? null
        }
    }

    function addMessageToSession(id, message) {
        const session = sessions.get(id)
        if (!session) return
        session.messages.push(message)
        // 自动根据第一条用户消息生成标题
        if (session.messages.length === 1 && message.role === 'user') {
            session.title =
                message.content.slice(0, 20) +
                (message.content.length > 20 ? '...' : '')
        }
    }

    // 持久化到 localStorage
    function persist() {
        const plain = {}
        for (const [id, session] of sessions) {
            plain[id] = session
        }
        localStorage.setItem('chat_sessions', JSON.stringify(plain))
    }

    function restore() {
        const raw = localStorage.getItem('chat_sessions')
        if (!raw) return
        try {
            const plain = JSON.parse(raw)
            for (const [id, session] of Object.entries(plain)) {
                sessions.set(id, session)
            }
            const ids = [...sessions.keys()]
            currentSessionId.value = ids[ids.length - 1] ?? null
        } catch {
            // 数据损坏时忽略
        }
    }

    return {
        sessions,
        currentSessionId,
        createSession,
        getSession,
        getCurrentSession,
        deleteSession,
        addMessageToSession,
        persist,
        restore,
    }
}

三、Function Calling / Tool Use 可视化

3.1 解析 Function Calling 响应结构

OpenAI / 兼容格式的 Function Calling 响应里,tool_calls 字段在流式下是分段到达的,需要合并:

javascript
// utils/toolCallParser.js

/**
 * 流式场景下,tool_calls 的 arguments 是分段 JSON 字符串,需要按 index 合并
 */
export function mergeStreamingToolCalls(accumulated, delta) {
    if (!delta.tool_calls) return accumulated

    const result = [...accumulated]

    for (const deltaCall of delta.tool_calls) {
        const { index, id, type, function: fn } = deltaCall

        if (!result[index]) {
            result[index] = {
                id: '',
                type: 'function',
                function: { name: '', arguments: '' },
            }
        }

        if (id) result[index].id = id
        if (type) result[index].type = type
        if (fn?.name) result[index].function.name += fn.name
        if (fn?.arguments) result[index].function.arguments += fn.arguments
    }

    return result
}

/**
 * 尝试解析 arguments JSON,返回对象或 null
 */
export function parseToolArguments(argumentsStr) {
    try {
        return JSON.parse(argumentsStr)
    } catch {
        return null
    }
}

3.2 工具调用状态机

每个工具调用有完整的生命周期:pending -> calling -> running -> success / error

javascript
// composables/useToolCallState.js
import { reactive } from 'vue'

/**
 * 状态:pending | calling | running | success | error
 */
export function useToolCallState() {
    const toolCalls = reactive(new Map()) // toolCallId -> state object

    function register(toolCall) {
        toolCalls.set(toolCall.id, {
            id: toolCall.id,
            name: toolCall.function.name,
            args: parseArgs(toolCall.function.arguments),
            status: 'pending',
            result: null,
            error: null,
            startTime: null,
            endTime: null,
        })
    }

    function startExecution(id) {
        const call = toolCalls.get(id)
        if (!call) return
        call.status = 'running'
        call.startTime = Date.now()
    }

    function setSuccess(id, result) {
        const call = toolCalls.get(id)
        if (!call) return
        call.status = 'success'
        call.result = result
        call.endTime = Date.now()
    }

    function setError(id, error) {
        const call = toolCalls.get(id)
        if (!call) return
        call.status = 'error'
        call.error = error
        call.endTime = Date.now()
    }

    function getDuration(id) {
        const call = toolCalls.get(id)
        if (!call?.startTime || !call?.endTime) return null
        return call.endTime - call.startTime
    }

    function parseArgs(str) {
        try {
            return JSON.parse(str)
        } catch {
            return str
        }
    }

    return {
        toolCalls,
        register,
        startExecution,
        setSuccess,
        setError,
        getDuration,
    }
}

3.3 工具调用可视化组件

vue
<!-- components/ToolCallCard.vue -->
<script setup>
import { computed } from 'vue'

const props = defineProps({
    call: Object, // { id, name, args, status, result, error }
})

const statusLabel = computed(() => {
    const map = {
        pending: '等待执行',
        running: '执行中...',
        success: '完成',
        error: '失败',
    }
    return map[props.call.status] ?? props.call.status
})

const argsJson = computed(() => {
    return JSON.stringify(props.call.args, null, 2)
})

const resultJson = computed(() => {
    if (!props.call.result) return null
    return typeof props.call.result === 'string'
        ? props.call.result
        : JSON.stringify(props.call.result, null, 2)
})
</script>

<template>
    <div class="tool-call-card" :class="`status-${call.status}`">
        <div class="tool-header">
            <span class="tool-name">🔧 {{ call.name }}</span>
            <span class="tool-status">{{ statusLabel }}</span>
        </div>

        <details class="tool-args">
            <summary>入参</summary>
            <pre><code>{{ argsJson }}</code></pre>
        </details>

        <div v-if="call.status === 'running'" class="tool-spinner">
            执行中...
        </div>

        <details v-if="call.status === 'success'" class="tool-result">
            <summary>返回结果</summary>
            <pre><code>{{ resultJson }}</code></pre>
        </details>

        <div v-if="call.status === 'error'" class="tool-error">
            ❌ {{ call.error }}
        </div>
    </div>
</template>

3.4 "暂停审批"交互

让用户在 Agent 执行工具前确认,适用于写操作(发邮件、删除文件等):

javascript
// composables/useApprovalGate.js
import { ref } from 'vue'

export function useApprovalGate() {
    const pendingApproval = ref(null) // { toolCall, resolve, reject }

    /**
     * 需要用户审批时调用,返回 Promise,用户点确认后 resolve
     */
    function requestApproval(toolCall) {
        return new Promise((resolve, reject) => {
            pendingApproval.value = { toolCall, resolve, reject }
        })
    }

    function approve() {
        pendingApproval.value?.resolve(true)
        pendingApproval.value = null
    }

    function reject() {
        pendingApproval.value?.reject(new Error('用户拒绝执行'))
        pendingApproval.value = null
    }

    return { pendingApproval, requestApproval, approve, reject }
}
vue
<!-- 在父组件中 -->
<script setup>
import { useApprovalGate } from '@/composables/useApprovalGate'
const { pendingApproval, approve, reject } = useApprovalGate()
</script>

<template>
    <div v-if="pendingApproval" class="approval-modal">
        <p>
            即将执行工具:<strong>{{ pendingApproval.toolCall.name }}</strong>
        </p>
        <pre>{{ JSON.stringify(pendingApproval.toolCall.args, null, 2) }}</pre>
        <button @click="approve">确认执行</button>
        <button @click="reject">取消</button>
    </div>
</template>

四、RAG 知识库交互

4.1 大文件分片上传

javascript
// composables/useChunkUpload.js
import { ref } from 'vue'

const CHUNK_SIZE = 2 * 1024 * 1024 // 2MB 每片

export function useChunkUpload() {
    const progress = ref(0)
    const uploading = ref(false)
    const error = ref(null)

    async function upload(file, uploadUrl) {
        uploading.value = true
        error.value = null
        progress.value = 0

        const totalChunks = Math.ceil(file.size / CHUNK_SIZE)
        const fileId = `${file.name}_${file.size}_${file.lastModified}`

        // 先询问服务端已上传到第几片(断点续传)
        let startChunk = 0
        try {
            const res = await fetch(
                `${uploadUrl}/status?fileId=${encodeURIComponent(fileId)}`
            )
            const data = await res.json()
            startChunk = data.uploadedChunks ?? 0
        } catch {
            // 无断点续传支持时从头开始
        }

        try {
            for (let i = startChunk; i < totalChunks; i++) {
                const start = i * CHUNK_SIZE
                const end = Math.min(start + CHUNK_SIZE, file.size)
                const chunk = file.slice(start, end)

                const formData = new FormData()
                formData.append('chunk', chunk)
                formData.append('fileId', fileId)
                formData.append('chunkIndex', i)
                formData.append('totalChunks', totalChunks)
                formData.append('fileName', file.name)

                await fetch(uploadUrl, { method: 'POST', body: formData })

                progress.value = Math.round(((i + 1) / totalChunks) * 100)
            }
        } catch (err) {
            error.value = err.message
        } finally {
            uploading.value = false
        }
    }

    return { progress, uploading, error, upload }
}

4.2 引用溯源高亮

RAG 回答中包含引用段落的索引,点击后跳转到源文档并高亮对应段落。

vue
<!-- components/CitationMessage.vue -->
<script setup>
import { ref } from 'vue'

const props = defineProps({
    content: String, // 回答内容,包含 [1] [2] 这样的引用标记
    citations: Array, // [{ index: 1, docId: 'xxx', chunkText: '...' }]
})

const activeCitation = ref(null)

// 将 [1] 转成可点击的 span
function renderWithCitations(text) {
    return text.replace(/\[(\d+)\]/g, (match, num) => {
        return `<span class="citation-mark" data-index="${num}">${match}</span>`
    })
}

function handleClick(e) {
    const mark = e.target.closest('.citation-mark')
    if (!mark) return
    const index = parseInt(mark.dataset.index)
    activeCitation.value =
        props.citations.find((c) => c.index === index) ?? null
}
</script>

<template>
    <div>
        <div
            class="message-body"
            @click="handleClick"
            v-html="renderWithCitations(content)"
        />

        <div v-if="activeCitation" class="citation-panel">
            <div class="citation-header">
                <span>引用来源 [{{ activeCitation.index }}]</span>
                <button @click="activeCitation = null">关闭</button>
            </div>
            <blockquote class="citation-text">
                {{ activeCitation.chunkText }}
            </blockquote>
            <a :href="`/docs/${activeCitation.docId}`" target="_blank"
                >查看原文档 →</a
            >
        </div>
    </div>
</template>

五、Prompt 工程化管理

5.1 Prompt 模板变量插值

javascript
// utils/promptTemplate.js

/**
 * 解析模板中的变量:支持 {{variableName}} 语法
 * 返回变量列表和渲染函数
 */
export function parseTemplate(template) {
    const regex = /\{\{(\w+)\}\}/g
    const variables = []
    let match

    while ((match = regex.exec(template)) !== null) {
        if (!variables.includes(match[1])) {
            variables.push(match[1])
        }
    }

    function render(vars = {}) {
        return template.replace(/\{\{(\w+)\}\}/g, (_, key) => {
            return vars[key] !== undefined ? vars[key] : `{{${key}}}`
        })
    }

    return { variables, render }
}

5.2 Prompt 版本管理

javascript
// composables/usePromptVersions.js
import { ref, computed } from 'vue'

export function usePromptVersions(promptId) {
    const versions = ref([])
    const currentVersionIndex = ref(0)

    const currentVersion = computed(
        () => versions.value[currentVersionIndex.value]
    )

    const STORAGE_KEY = `prompt_versions_${promptId}`

    function load() {
        const raw = localStorage.getItem(STORAGE_KEY)
        if (raw) {
            try {
                versions.value = JSON.parse(raw)
                currentVersionIndex.value = versions.value.length - 1
            } catch {
                /* 忽略 */
            }
        }
    }

    function save(content, description = '') {
        const version = {
            id: Date.now(),
            content,
            description,
            createdAt: new Date().toISOString(),
        }
        versions.value.push(version)
        currentVersionIndex.value = versions.value.length - 1
        localStorage.setItem(STORAGE_KEY, JSON.stringify(versions.value))
        return version
    }

    function switchTo(index) {
        if (index >= 0 && index < versions.value.length) {
            currentVersionIndex.value = index
        }
    }

    function diff(indexA, indexB) {
        const a = versions.value[indexA]?.content ?? ''
        const b = versions.value[indexB]?.content ?? ''
        // 简单行级 diff,实际项目可引入 diff2html
        const aLines = a.split('\n')
        const bLines = b.split('\n')
        return { aLines, bLines }
    }

    load()

    return {
        versions,
        currentVersion,
        currentVersionIndex,
        save,
        switchTo,
        diff,
    }
}

5.3 多模型对比面板

vue
<!-- components/ModelCompare.vue -->
<script setup>
import { ref, reactive } from 'vue'
import { useSSE } from '@/composables/useSSE'

const props = defineProps({
    models: {
        type: Array,
        default: () => ['gpt-4o', 'gpt-3.5-turbo'],
    },
})

const prompt = ref('')
const results = reactive({})

// 每个模型单独维护一个流
const streams = {}
for (const model of props.models) {
    results[model] = { content: '', loading: false }
}

async function runCompare() {
    if (!prompt.value.trim()) return

    for (const model of props.models) {
        results[model].content = ''
        results[model].loading = true

        // 各自独立发起请求,不互相阻塞
        startModelStream(model)
    }
}

async function startModelStream(model) {
    const abortController = new AbortController()
    streams[model] = abortController

    try {
        const res = await fetch('/api/chat', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({
                model,
                messages: [{ role: 'user', content: prompt.value }],
            }),
            signal: abortController.signal,
        })

        const reader = res.body.getReader()
        const decoder = new TextDecoder()

        while (true) {
            const { done, value } = await reader.read()
            if (done) break
            const chunk = decoder.decode(value, { stream: true })
            const lines = chunk.split('\n').filter((l) => l.startsWith('data:'))
            for (const line of lines) {
                const raw = line.slice(5).trim()
                if (raw === '[DONE]') break
                try {
                    const parsed = JSON.parse(raw)
                    const token = parsed.choices?.[0]?.delta?.content ?? ''
                    results[model].content += token
                } catch {
                    /* 忽略 */
                }
            }
        }
    } catch (err) {
        if (err.name !== 'AbortError') {
            results[model].content = `请求失败:${err.message}`
        }
    } finally {
        results[model].loading = false
    }
}

function stopAll() {
    for (const ctrl of Object.values(streams)) {
        ctrl.abort()
    }
}
</script>

<template>
    <div class="compare-container">
        <textarea v-model="prompt" placeholder="输入 Prompt..." rows="4" />
        <div class="compare-actions">
            <button @click="runCompare">同时发送</button>
            <button @click="stopAll">停止</button>
        </div>
        <div class="compare-grid">
            <div v-for="model in models" :key="model" class="compare-col">
                <div class="model-label">{{ model }}</div>
                <div class="model-output">
                    <span v-if="results[model].loading" class="loading-dot"
                        >●</span
                    >
                    {{ results[model].content }}
                </div>
            </div>
        </div>
    </div>
</template>

六、性能与工程化

6.1 动态高度虚拟列表

聊天消息的高度是动态的(受内容、图片加载影响),不能用固定高度虚拟列表。方案:渲染后测量并缓存高度,滚动时动态计算偏移。

vue
<!-- components/VirtualMessageList.vue -->
<script setup>
import { ref, computed, onMounted, nextTick, watch } from 'vue'

const props = defineProps({
    messages: Array,
    estimatedItemHeight: { type: Number, default: 80 },
    buffer: { type: Number, default: 5 }, // 可视区上下额外渲染条数
})

const containerRef = ref(null)
const heightCache = ref({}) // messageId -> 实际高度
const scrollTop = ref(0)
const containerHeight = ref(600)

// 计算每条消息的顶部偏移
const offsets = computed(() => {
    const result = []
    let top = 0
    for (const msg of props.messages) {
        result.push(top)
        top += heightCache.value[msg.id] ?? props.estimatedItemHeight
    }
    return result
})

const totalHeight = computed(() => {
    const last = props.messages.length - 1
    if (last < 0) return 0
    return (
        offsets.value[last] +
        (heightCache.value[props.messages[last].id] ??
            props.estimatedItemHeight)
    )
})

// 计算可见范围
const visibleRange = computed(() => {
    const start =
        offsets.value.findLastIndex((offset) => offset <= scrollTop.value) -
        props.buffer
    const end =
        offsets.value.findIndex(
            (offset) => offset >= scrollTop.value + containerHeight.value
        ) + props.buffer
    return {
        start: Math.max(0, start),
        end: Math.min(
            props.messages.length - 1,
            end < 0 ? props.messages.length - 1 : end
        ),
    }
})

const visibleMessages = computed(() => {
    return props.messages
        .slice(visibleRange.value.start, visibleRange.value.end + 1)
        .map((msg, i) => ({
            ...msg,
            _top: offsets.value[visibleRange.value.start + i],
        }))
})

function onScroll(e) {
    scrollTop.value = e.target.scrollTop
}

// 测量并缓存渲染后的真实高度
function measureItems() {
    if (!containerRef.value) return
    const items = containerRef.value.querySelectorAll('[data-msg-id]')
    let changed = false
    for (const el of items) {
        const id = el.dataset.msgId
        const h = el.getBoundingClientRect().height
        if (heightCache.value[id] !== h) {
            heightCache.value[id] = h
            changed = true
        }
    }
    return changed
}

onMounted(() => {
    containerHeight.value = containerRef.value?.clientHeight ?? 600
    nextTick(measureItems)
})

watch(
    () => props.messages.length,
    async () => {
        await nextTick()
        measureItems()
        // 新消息到达时滚动到底部
        if (containerRef.value) {
            containerRef.value.scrollTop = totalHeight.value
        }
    }
)
</script>

<template>
    <div
        ref="containerRef"
        class="virtual-list"
        style="overflow-y: auto; position: relative;"
        @scroll="onScroll"
    >
        <!-- 撑起滚动高度 -->
        <div :style="{ height: totalHeight + 'px', position: 'relative' }">
            <div
                v-for="msg in visibleMessages"
                :key="msg.id"
                :data-msg-id="msg.id"
                :style="{
                    position: 'absolute',
                    top: msg._top + 'px',
                    width: '100%',
                }"
            >
                <slot :message="msg" />
            </div>
        </div>
    </div>
</template>

6.2 代码高亮按需懒加载

不在主 bundle 里引入全量 highlight.js,而是在代码块出现在可视区时才异步加载对应语言包:

javascript
// utils/lazyHighlight.js

const loadedLanguages = new Set()

export async function highlightCode(code, language) {
    // 动态加载 highlight.js 核心
    const hljs = await import('highlight.js/lib/core')

    if (language && !loadedLanguages.has(language)) {
        try {
            const langModule = await import(
                `highlight.js/lib/languages/${language}`
            )
            hljs.default.registerLanguage(language, langModule.default)
            loadedLanguages.add(language)
        } catch {
            // 不支持的语言,降级到纯文本
            language = null
        }
    }

    if (language) {
        return hljs.default.highlight(code, { language }).value
    } else {
        return hljs.default.highlightAuto(code).value
    }
}
vue
<!-- components/CodeBlock.vue -->
<script setup>
import { ref, onMounted } from 'vue'
import { highlightCode } from '@/utils/lazyHighlight'
import { useIntersectionObserver } from '@vueuse/core'

const props = defineProps({
    code: String,
    language: String,
})

const containerRef = ref(null)
const highlighted = ref('')
const loaded = ref(false)

const { stop } = useIntersectionObserver(containerRef, ([entry]) => {
    if (entry.isIntersecting && !loaded.value) {
        loaded.value = true
        stop()
        highlightCode(props.code, props.language).then((html) => {
            highlighted.value = html
        })
    }
})
</script>

<template>
    <div ref="containerRef">
        <pre v-if="highlighted"><code v-html="highlighted" /></pre>
        <pre v-else><code>{{ code }}</code></pre>
    </div>
</template>

6.3 多模态内容懒加载预览

vue
<!-- components/MessageAttachment.vue -->
<script setup>
import { ref } from 'vue'
import { useIntersectionObserver } from '@vueuse/core'

const props = defineProps({
    type: String, // 'image' | 'file' | 'audio'
    url: String,
    name: String,
    size: Number,
})

const containerRef = ref(null)
const shouldLoad = ref(false)

const { stop } = useIntersectionObserver(containerRef, ([entry]) => {
    if (entry.isIntersecting) {
        shouldLoad.value = true
        stop()
    }
})

function formatSize(bytes) {
    if (!bytes) return ''
    if (bytes < 1024) return bytes + ' B'
    if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB'
    return (bytes / (1024 * 1024)).toFixed(1) + ' MB'
}
</script>

<template>
    <div ref="containerRef" class="attachment">
        <!-- 图片:进入可视区才加载 -->
        <template v-if="type === 'image'">
            <img v-if="shouldLoad" :src="url" :alt="name" loading="lazy" />
            <div v-else class="img-placeholder" />
        </template>

        <!-- 文件 -->
        <template v-else-if="type === 'file'">
            <a :href="url" target="_blank" class="file-link">
                📎 {{ name }} ({{ formatSize(size) }})
            </a>
        </template>

        <!-- 音频 -->
        <template v-else-if="type === 'audio'">
            <audio v-if="shouldLoad" controls :src="url" />
            <button v-else @click="shouldLoad = true">加载音频</button>
        </template>
    </div>
</template>

附:关键 Composable 依赖关系

plain
useSessionStore          ← 管理所有会话,持久化
  └── useContextManager  ← 单个会话的 token 管理与截断
        └── useSSE       ← 流式请求
              └── useStreamBuffer  ← 批量合并 token 更新

useToolCallState         ← 管理工具调用生命周期
  └── useApprovalGate    ← 写操作人工审批

useChunkUpload           ← 分片上传知识库文档
usePromptVersions        ← Prompt 版本历史

每个 composable 职责单一,可以独立测试,也可以按需组合到具体业务页面。