返回笔记首页

AI 辅助功能

主题配置

一、技术实现方案

1.1 AI辅助功能架构

plain
AI辅助功能体系
  ├── 代码生成
  │   ├── 代码补全
  │   ├── 代码解释
  │   ├── 代码重构
  │   └── 代码调试
  │
  ├── 图片生成
  │   ├── DALL-E 3
  │   ├── Midjourney
  │   ├── Stable Diffusion
  │   └── 图片编辑
  │
  ├── 语音处理
  │   ├── 语音转文字(Whisper)
  │   ├── 文字转语音(TTS)
  │   ├── 语音识别
  │   └── 音频处理
  │
  └── 文本处理
      ├── 智能摘要
      ├── 多语言翻译
      ├── 文本分类
      └── 关键词提取

1.2 技术栈

  • 代码生成: OpenAI Codex / GitHub Copilot API
  • 图片生成: DALL-E 3 / Stability AI
  • 语音识别: Whisper API / Web Speech API
  • 语音合成: TTS API / SpeechSynthesis
  • 文本处理: GPT-4 / 专用NLP API

二、AI代码生成

2.1 代码生成器

code-generator.js

javascript
import { OpenAIClient } from './openai-client.js'

export class CodeGenerator {
    constructor(apiKey) {
        this.client = new OpenAIClient({ apiKey })
    }

    // 生成代码
    async generateCode(description, options = {}) {
        const language = options.language || 'javascript'
        const framework = options.framework || ''
        const style = options.style || 'modern'

        const prompt = this.buildCodePrompt(
            description,
            language,
            framework,
            style
        )

        const response = await this.client.chat(
            [
                {
                    role: 'system',
                    content: `你是一个专业的${language}程序员。请生成高质量、可运行的代码。`,
                },
                {
                    role: 'user',
                    content: prompt,
                },
            ],
            {
                temperature: 0.7,
                maxTokens: 2000,
            }
        )

        return this.extractCode(response.content)
    }

    // 代码补全
    async completeCode(code, cursor, options = {}) {
        const beforeCursor = code.substring(0, cursor)
        const afterCursor = code.substring(cursor)

        const prompt = `请补全以下代码:

\`\`\`
${beforeCursor}<CURSOR>${afterCursor}
\`\`\`

只输出补全的部分,不要重复已有代码。`

        const response = await this.client.chat(
            [
                { role: 'system', content: '你是代码补全助手' },
                { role: 'user', content: prompt },
            ],
            {
                temperature: 0.3,
                maxTokens: 500,
            }
        )

        return this.extractCode(response.content)
    }

    // 代码解释
    async explainCode(code, options = {}) {
        const language = options.language || 'auto'
        const level = options.level || 'intermediate' // beginner/intermediate/advanced

        const prompt = `请解释以下代码:

\`\`\`${language}
${code}
\`\`\`

要求:
1. 适合${level}水平的开发者
2. 逐行或逐块解释
3. 指出关键点和注意事项`

        const response = await this.client.chat([
            { role: 'system', content: '你是编程教师' },
            { role: 'user', content: prompt },
        ])

        return response.content
    }

    // 代码重构
    async refactorCode(code, goals = []) {
        const goalsText =
            goals.length > 0 ? goals.join('\n- ') : '提高可读性和可维护性'

        const prompt = `请重构以下代码:

\`\`\`
${code}
\`\`\`

重构目标:
- ${goalsText}

要求:
1. 保持功能不变
2. 添加必要注释
3. 遵循最佳实践`

        const response = await this.client.chat(
            [
                { role: 'system', content: '你是代码重构专家' },
                { role: 'user', content: prompt },
            ],
            {
                temperature: 0.5,
            }
        )

        return this.extractCode(response.content)
    }

    // 代码审查
    async reviewCode(code, options = {}) {
        const focus = options.focus || [
            'bug',
            'performance',
            'security',
            'style',
        ]

        const prompt = `请审查以下代码:

\`\`\`
${code}
\`\`\`

审查重点:${focus.join('、')}

请指出:
1. 潜在问题
2. 改进建议
3. 最佳实践`

        const response = await this.client.chat([
            { role: 'system', content: '你是代码审查专家' },
            { role: 'user', content: prompt },
        ])

        return response.content
    }

    // 代码调试
    async debugCode(code, error, options = {}) {
        const prompt = `代码:
\`\`\`
${code}
\`\`\`

错误信息:
\`\`\`
${error}
\`\`\`

请:
1. 分析错误原因
2. 提供修复方案
3. 给出修复后的代码`

        const response = await this.client.chat([
            { role: 'system', content: '你是调试专家' },
            { role: 'user', content: prompt },
        ])

        return response.content
    }

    // 单元测试生成
    async generateTests(code, framework = 'jest') {
        const prompt = `请为以下代码生成单元测试:

\`\`\`
${code}
\`\`\`

使用${framework}框架,要求:
1. 覆盖主要功能
2. 包含边界情况
3. 测试用例清晰`

        const response = await this.client.chat([
            { role: 'system', content: '你是测试工程师' },
            { role: 'user', content: prompt },
        ])

        return this.extractCode(response.content)
    }

    // 构建Prompt
    buildCodePrompt(description, language, framework, style) {
        let prompt = `请用${language}实现以下功能:

${description}

`

        if (framework) {
            prompt += `使用${framework}框架。\n`
        }

        prompt += `代码风格:${style}

要求:
1. 代码规范、可读性好
2. 添加必要的注释
3. 处理边界情况
4. 可以直接运行`

        return prompt
    }

    // 提取代码块
    extractCode(text) {
        const codeBlockRegex = /```[\w]*\n([\s\S]*?)```/g
        const matches = []
        let match

        while ((match = codeBlockRegex.exec(text)) !== null) {
            matches.push(match[1].trim())
        }

        return matches.length > 0 ? matches[0] : text
    }
}

export default CodeGenerator

2.2 代码生成演示组件

CodeGeneratorDemo.vue

vue
<script setup>
import { ref } from 'vue'
import { CodeGenerator } from './code-generator.js'

const apiKey = ref('')
const description = ref(
    '实现一个Vue 3组件,显示待办事项列表,支持添加、删除、标记完成'
)
const language = ref('javascript')
const framework = ref('Vue 3')
const generatedCode = ref('')
const isGenerating = ref(false)

const codeToExplain = ref(`function debounce(func, wait) {
  let timeout
  return function(...args) {
    clearTimeout(timeout)
    timeout = setTimeout(() => func.apply(this, args), wait)
  }
}`)
const explanation = ref('')

const codeToRefactor = ref(`function calc(a,b,op){
if(op=='+')return a+b
else if(op=='-')return a-b
else if(op=='*')return a*b
else if(op=='/')return b!=0?a/b:null
}`)
const refactoredCode = ref('')

let generator = null

const initGenerator = () => {
    if (!apiKey.value) {
        alert('请输入API Key')
        return false
    }
    generator = new CodeGenerator(apiKey.value)
    return true
}

// 生成代码
const generate = async () => {
    if (!initGenerator()) return

    isGenerating.value = true
    generatedCode.value = ''

    try {
        generatedCode.value = await generator.generateCode(description.value, {
            language: language.value,
            framework: framework.value,
        })
    } catch (error) {
        alert('生成失败: ' + error.message)
    } finally {
        isGenerating.value = false
    }
}

// 解释代码
const explain = async () => {
    if (!initGenerator()) return

    isGenerating.value = true
    explanation.value = ''

    try {
        explanation.value = await generator.explainCode(codeToExplain.value, {
            language: language.value,
            level: 'intermediate',
        })
    } catch (error) {
        alert('解释失败: ' + error.message)
    } finally {
        isGenerating.value = false
    }
}

// 重构代码
const refactor = async () => {
    if (!initGenerator()) return

    isGenerating.value = true
    refactoredCode.value = ''

    try {
        refactoredCode.value = await generator.refactorCode(
            codeToRefactor.value,
            ['提高可读性', '使用现代语法', '添加类型检查', '完善错误处理']
        )
    } catch (error) {
        alert('重构失败: ' + error.message)
    } finally {
        isGenerating.value = false
    }
}

// 复制代码
const copyCode = (text) => {
    navigator.clipboard.writeText(text).then(() => {
        alert('已复制到剪贴板')
    })
}

// 预设示例
const examples = [
    {
        name: 'Vue组件',
        description: '创建一个Vue 3计数器组件,包含增加、减少、重置功能',
        language: 'javascript',
        framework: 'Vue 3',
    },
    {
        name: 'React Hook',
        description: '实现一个useLocalStorage Hook,同步state到localStorage',
        language: 'javascript',
        framework: 'React',
    },
    {
        name: '工具函数',
        description: '实现深拷贝函数,支持对象、数组、Date、RegExp等类型',
        language: 'javascript',
        framework: '',
    },
]

const useExample = (example) => {
    description.value = example.description
    language.value = example.language
    framework.value = example.framework
}
</script>

<template>
    <div class="code-generator-demo">
        <div class="demo-header">
            <h2>AI代码生成</h2>
            <p class="subtitle">让AI帮你写代码</p>
        </div>

        <!-- API配置 -->
        <div class="config-section">
            <h3>配置</h3>
            <input
                v-model="apiKey"
                type="password"
                placeholder="OpenAI API Key"
                class="api-key-input"
            />
        </div>

        <!-- 代码生成 -->
        <div class="generate-section">
            <h3>代码生成</h3>

            <div class="examples-row">
                <button
                    v-for="example in examples"
                    :key="example.name"
                    @click="useExample(example)"
                    class="example-btn"
                >
                    {{ example.name }}
                </button>
            </div>

            <div class="form-group">
                <label>功能描述:</label>
                <textarea
                    v-model="description"
                    rows="4"
                    placeholder="描述你想要实现的功能..."
                ></textarea>
            </div>

            <div class="form-row">
                <div class="form-group">
                    <label>编程语言:</label>
                    <select v-model="language">
                        <option value="javascript">JavaScript</option>
                        <option value="typescript">TypeScript</option>
                        <option value="python">Python</option>
                        <option value="java">Java</option>
                    </select>
                </div>

                <div class="form-group">
                    <label>框架:</label>
                    <input v-model="framework" type="text" placeholder="可选" />
                </div>
            </div>

            <button
                @click="generate"
                :disabled="isGenerating"
                class="generate-btn"
            >
                {{ isGenerating ? '生成中...' : '生成代码' }}
            </button>

            <div v-if="generatedCode" class="result-box">
                <div class="result-header">
                    <h4>生成的代码</h4>
                    <button @click="copyCode(generatedCode)" class="copy-btn">
                        复制
                    </button>
                </div>
                <pre><code>{{ generatedCode }}</code></pre>
            </div>
        </div>

        <!-- 代码解释 -->
        <div class="explain-section">
            <h3>代码解释</h3>

            <div class="form-group">
                <label>代码:</label>
                <textarea
                    v-model="codeToExplain"
                    rows="8"
                    placeholder="粘贴要解释的代码..."
                ></textarea>
            </div>

            <button
                @click="explain"
                :disabled="isGenerating"
                class="action-btn"
            >
                解释代码
            </button>

            <div v-if="explanation" class="result-box">
                <h4>代码解释</h4>
                <div class="explanation-text">{{ explanation }}</div>
            </div>
        </div>

        <!-- 代码重构 -->
        <div class="refactor-section">
            <h3>代码重构</h3>

            <div class="form-group">
                <label>代码:</label>
                <textarea
                    v-model="codeToRefactor"
                    rows="8"
                    placeholder="粘贴要重构的代码..."
                ></textarea>
            </div>

            <button
                @click="refactor"
                :disabled="isGenerating"
                class="action-btn"
            >
                重构代码
            </button>

            <div v-if="refactoredCode" class="result-box">
                <div class="result-header">
                    <h4>重构后的代码</h4>
                    <button @click="copyCode(refactoredCode)" class="copy-btn">
                        复制
                    </button>
                </div>
                <pre><code>{{ refactoredCode }}</code></pre>
            </div>
        </div>

        <!-- 功能说明 -->
        <div class="features-section">
            <h3>AI代码辅助功能</h3>

            <div class="features-grid">
                <div class="feature-card">
                    <h4>代码生成</h4>
                    <p>根据自然语言描述生成可运行的代码</p>
                    <ul>
                        <li>支持多种编程语言</li>
                        <li>支持主流框架</li>
                        <li>生成规范的代码</li>
                    </ul>
                </div>

                <div class="feature-card">
                    <h4>代码补全</h4>
                    <p>智能补全代码,提升编码效率</p>
                    <ul>
                        <li>上下文感知</li>
                        <li>多行补全</li>
                        <li>函数签名提示</li>
                    </ul>
                </div>

                <div class="feature-card">
                    <h4>代码解释</h4>
                    <p>理解复杂代码逻辑</p>
                    <ul>
                        <li>逐行解释</li>
                        <li>关键点标注</li>
                        <li>算法分析</li>
                    </ul>
                </div>

                <div class="feature-card">
                    <h4>代码重构</h4>
                    <p>优化代码结构和质量</p>
                    <ul>
                        <li>提高可读性</li>
                        <li>性能优化</li>
                        <li>最佳实践</li>
                    </ul>
                </div>

                <div class="feature-card">
                    <h4>代码审查</h4>
                    <p>发现潜在问题</p>
                    <ul>
                        <li>Bug检测</li>
                        <li>安全审查</li>
                        <li>性能分析</li>
                    </ul>
                </div>

                <div class="feature-card">
                    <h4>测试生成</h4>
                    <p>自动生成单元测试</p>
                    <ul>
                        <li>覆盖主要场景</li>
                        <li>边界条件测试</li>
                        <li>Mock数据生成</li>
                    </ul>
                </div>
            </div>
        </div>
    </div>
</template>

<style scoped>
.code-generator-demo {
    padding: 20px;
    background: #f5f7fa;
    min-height: 100vh;
}

.demo-header {
    text-align: center;
    margin-bottom: 40px;
}

.demo-header h2 {
    margin: 0 0 10px 0;
    font-size: 32px;
    color: #303133;
}

.subtitle {
    margin: 0;
    font-size: 16px;
    color: #909399;
}

.config-section,
.generate-section,
.explain-section,
.refactor-section,
.features-section {
    margin-bottom: 30px;
    padding: 24px;
    background: white;
    border-radius: 8px;
}

h3 {
    margin: 0 0 20px 0;
    font-size: 18px;
    color: #303133;
}

.api-key-input {
    width: 100%;
    padding: 12px;
    border: 1px solid #dcdfe6;
    border-radius: 4px;
    font-size: 14px;
}

.examples-row {
    display: flex;
    gap: 12px;
    margin-bottom: 20px;
    flex-wrap: wrap;
}

.example-btn {
    padding: 8px 16px;
    background: #ecf5ff;
    color: #409eff;
    border: 1px solid #b3d8ff;
    border-radius: 4px;
    cursor: pointer;
    font-size: 14px;
}

.form-group {
    margin-bottom: 20px;
}

.form-group label {
    display: block;
    margin-bottom: 8px;
    font-size: 14px;
    color: #606266;
    font-weight: 600;
}

.form-group textarea,
.form-group input,
.form-group select {
    width: 100%;
    padding: 10px;
    border: 1px solid #dcdfe6;
    border-radius: 4px;
    font-size: 14px;
    font-family: inherit;
}

.form-row {
    display: grid;
    grid-template-columns: 1fr 1fr;
    gap: 16px;
}

.generate-btn,
.action-btn {
    padding: 12px 32px;
    background: #409eff;
    color: white;
    border: none;
    border-radius: 4px;
    cursor: pointer;
    font-size: 14px;
    font-weight: 600;
}

.generate-btn:disabled,
.action-btn:disabled {
    background: #c0c4cc;
    cursor: not-allowed;
}

.result-box {
    margin-top: 24px;
    padding: 20px;
    background: #f5f7fa;
    border-radius: 8px;
}

.result-header {
    display: flex;
    justify-content: space-between;
    align-items: center;
    margin-bottom: 16px;
}

.result-box h4 {
    margin: 0 0 16px 0;
    font-size: 16px;
    color: #303133;
}

.result-box pre {
    margin: 0;
    padding: 16px;
    background: #282c34;
    border-radius: 4px;
    overflow-x: auto;
}

.result-box code {
    font-size: 13px;
    line-height: 1.6;
    color: #abb2bf;
    font-family: 'Courier New', monospace;
}

.explanation-text {
    font-size: 14px;
    line-height: 1.8;
    color: #303133;
    white-space: pre-wrap;
}

.copy-btn {
    padding: 6px 16px;
    background: #67c23a;
    color: white;
    border: none;
    border-radius: 4px;
    cursor: pointer;
    font-size: 13px;
}

.features-grid {
    display: grid;
    grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
    gap: 20px;
}

.feature-card {
    padding: 20px;
    background: #f5f7fa;
    border-radius: 8px;
    border-left: 4px solid #409eff;
}

.feature-card h4 {
    margin: 0 0 12px 0;
    font-size: 16px;
    color: #409eff;
}

.feature-card p {
    margin: 0 0 12px 0;
    font-size: 14px;
    color: #606266;
    line-height: 1.6;
}

.feature-card ul {
    margin: 0;
    padding-left: 20px;
}

.feature-card li {
    font-size: 13px;
    line-height: 2;
    color: #909399;
}
</style>

三、AI图片生成

3.1 图片生成器

image-generator.js

javascript
export class ImageGenerator {
    constructor(apiKey) {
        this.apiKey = apiKey
        this.baseURL = 'https://api.openai.com/v1'
    }

    // DALL-E 3图片生成
    async generateImage(prompt, options = {}) {
        try {
            const response = await fetch(`${this.baseURL}/images/generations`, {
                method: 'POST',
                headers: {
                    Authorization: `Bearer ${this.apiKey}`,
                    'Content-Type': 'application/json',
                },
                body: JSON.stringify({
                    model: 'dall-e-3',
                    prompt: prompt,
                    n: options.n || 1,
                    size: options.size || '1024x1024',
                    quality: options.quality || 'standard', // standard or hd
                    style: options.style || 'vivid', // vivid or natural
                }),
            })

            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`)
            }

            const data = await response.json()

            return {
                images: data.data.map((item) => ({
                    url: item.url,
                    revisedPrompt: item.revised_prompt,
                })),
            }
        } catch (error) {
            throw this.handleError(error)
        }
    }

    // 图片编辑
    async editImage(imageFile, maskFile, prompt, options = {}) {
        const formData = new FormData()
        formData.append('image', imageFile)
        formData.append('mask', maskFile)
        formData.append('prompt', prompt)
        formData.append('n', options.n || 1)
        formData.append('size', options.size || '1024x1024')

        try {
            const response = await fetch(`${this.baseURL}/images/edits`, {
                method: 'POST',
                headers: {
                    Authorization: `Bearer ${this.apiKey}`,
                },
                body: formData,
            })

            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`)
            }

            const data = await response.json()

            return {
                images: data.data.map((item) => ({ url: item.url })),
            }
        } catch (error) {
            throw this.handleError(error)
        }
    }

    // 图片变体
    async createVariation(imageFile, options = {}) {
        const formData = new FormData()
        formData.append('image', imageFile)
        formData.append('n', options.n || 1)
        formData.append('size', options.size || '1024x1024')

        try {
            const response = await fetch(`${this.baseURL}/images/variations`, {
                method: 'POST',
                headers: {
                    Authorization: `Bearer ${this.apiKey}`,
                },
                body: formData,
            })

            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`)
            }

            const data = await response.json()

            return {
                images: data.data.map((item) => ({ url: item.url })),
            }
        } catch (error) {
            throw this.handleError(error)
        }
    }

    // 优化Prompt
    optimizePrompt(basicPrompt, style = '', details = []) {
        let optimized = basicPrompt

        if (style) {
            optimized += `, ${style} style`
        }

        if (details.length > 0) {
            optimized += ', ' + details.join(', ')
        }

        return optimized
    }

    // Prompt模板
    getPromptTemplates() {
        return {
            portrait: {
                name: '人物肖像',
                template:
                    'A portrait of {{subject}}, {{style}}, {{lighting}}, professional photography',
            },
            landscape: {
                name: '风景画',
                template:
                    'A beautiful landscape of {{location}}, {{time}}, {{weather}}, {{style}} painting',
            },
            product: {
                name: '产品展示',
                template:
                    '{{product}} on {{background}}, {{lighting}}, product photography, high quality',
            },
            logo: {
                name: 'Logo设计',
                template:
                    'A modern logo for {{brand}}, {{style}}, simple and clean, vector art',
            },
            illustration: {
                name: '插画',
                template:
                    '{{subject}}, {{style}} illustration, vibrant colors, detailed',
            },
        }
    }

    // 错误处理
    handleError(error) {
        if (error.response) {
            const { status } = error.response

            switch (status) {
                case 400:
                    return new Error('Prompt违反内容政策')
                case 401:
                    return new Error('API密钥无效')
                case 429:
                    return new Error('请求频率超限')
                default:
                    return new Error('生成失败')
            }
        }

        return error
    }
}

export default ImageGenerator

3.2 图片生成演示组件

ImageGeneratorDemo.vue

vue
<script setup>
import { ref } from 'vue'
import { ImageGenerator } from './image-generator.js'

const apiKey = ref('')
const prompt = ref('一只可爱的猫咪在花园里玩耍,水彩画风格')
const size = ref('1024x1024')
const quality = ref('standard')
const style = ref('vivid')
const generatedImages = ref([])
const isGenerating = ref(false)

let generator = null

const initGenerator = () => {
    if (!apiKey.value) {
        alert('请输入API Key')
        return false
    }
    generator = new ImageGenerator(apiKey.value)
    return true
}

// 生成图片
const generate = async () => {
    if (!initGenerator()) return

    isGenerating.value = true
    generatedImages.value = []

    try {
        const result = await generator.generateImage(prompt.value, {
            size: size.value,
            quality: quality.value,
            style: style.value,
        })

        generatedImages.value = result.images
    } catch (error) {
        alert('生成失败: ' + error.message)
    } finally {
        isGenerating.value = false
    }
}

// 下载图片
const downloadImage = (url, index) => {
    const link = document.createElement('a')
    link.href = url
    link.download = `generated-image-${index + 1}.png`
    link.click()
}

// 预设提示词
const promptExamples = [
    {
        category: '艺术风格',
        prompts: [
            '一座未来主义城市,赛博朋克风格,霓虹灯,夜景',
            '梵高风格的星空下的小镇',
            '中国水墨画风格的山水',
            '波普艺术风格的人物肖像',
        ],
    },
    {
        category: '自然风景',
        prompts: [
            '北极光下的雪山和湖泊',
            '樱花树下的日本庭院,春天',
            '热带雨林中的瀑布,阳光穿过树叶',
            '沙漠中的绿洲,日落时分',
        ],
    },
    {
        category: '产品设计',
        prompts: [
            '极简风格的智能手表,白色背景',
            '未来感十足的电动汽车,金属质感',
            '现代简约的家具,北欧风格',
            '高科技感的无线耳机,产品摄影',
        ],
    },
]

const usePrompt = (examplePrompt) => {
    prompt.value = examplePrompt
}
</script>

<template>
    <div class="image-generator-demo">
        <div class="demo-header">
            <h2>AI图片生成</h2>
            <p class="subtitle">使用DALL-E 3生成高质量图片</p>
        </div>

        <!-- API配置 -->
        <div class="config-section">
            <h3>配置</h3>
            <input
                v-model="apiKey"
                type="password"
                placeholder="OpenAI API Key"
                class="api-key-input"
            />
        </div>

        <!-- 提示词示例 -->
        <div class="examples-section">
            <h3>提示词示例</h3>
            <div
                v-for="category in promptExamples"
                :key="category.category"
                class="category-group"
            >
                <h4>{{ category.category }}</h4>
                <div class="prompts-grid">
                    <button
                        v-for="(p, index) in category.prompts"
                        :key="index"
                        @click="usePrompt(p)"
                        class="prompt-btn"
                    >
                        {{ p }}
                    </button>
                </div>
            </div>
        </div>

        <!-- 生成配置 -->
        <div class="generate-section">
            <h3>生成图片</h3>

            <div class="form-group">
                <label>提示词 (Prompt):</label>
                <textarea
                    v-model="prompt"
                    rows="4"
                    placeholder="描述你想要生成的图片..."
                ></textarea>
                <div class="hint">
                    提示:详细描述画面内容、风格、颜色、光线等,效果会更好
                </div>
            </div>

            <div class="options-grid">
                <div class="form-group">
                    <label>尺寸:</label>
                    <select v-model="size">
                        <option value="1024x1024">1024x1024 (方形)</option>
                        <option value="1792x1024">1792x1024 (横向)</option>
                        <option value="1024x1792">1024x1792 (纵向)</option>
                    </select>
                </div>

                <div class="form-group">
                    <label>质量:</label>
                    <select v-model="quality">
                        <option value="standard">标准</option>
                        <option value="hd">高清 (HD)</option>
                    </select>
                </div>

                <div class="form-group">
                    <label>风格:</label>
                    <select v-model="style">
                        <option value="vivid">鲜艳 (Vivid)</option>
                        <option value="natural">自然 (Natural)</option>
                    </select>
                </div>
            </div>

            <button
                @click="generate"
                :disabled="isGenerating"
                class="generate-btn"
            >
                {{ isGenerating ? '生成中...' : '生成图片' }}
            </button>
        </div>

        <!-- 生成结果 -->
        <div v-if="generatedImages.length > 0" class="results-section">
            <h3>生成结果</h3>
            <div class="images-grid">
                <div
                    v-for="(image, index) in generatedImages"
                    :key="index"
                    class="image-card"
                >
                    <img :src="image.url" :alt="`生成图片 ${index + 1}`" />
                    <div class="image-actions">
                        <button
                            @click="downloadImage(image.url, index)"
                            class="download-btn"
                        >
                            下载
                        </button>
                    </div>
                    <div v-if="image.revisedPrompt" class="revised-prompt">
                        <strong>优化后的提示词:</strong>
                        <p>{{ image.revisedPrompt }}</p>
                    </div>
                </div>
            </div>
        </div>

        <!-- 功能说明 -->
        <div class="features-section">
            <h3>功能说明</h3>

            <div class="feature-list">
                <div class="feature-item">
                    <h4>DALL-E 3</h4>
                    <p>OpenAI最新的图片生成模型,效果更好,支持更长的提示词</p>
                </div>

                <div class="feature-item">
                    <h4>提示词优化</h4>
                    <p>DALL-E 3会自动优化你的提示词,生成更准确的图片</p>
                </div>

                <div class="feature-item">
                    <h4>多种尺寸</h4>
                    <p>支持方形、横向、纵向三种尺寸,适应不同场景</p>
                </div>

                <div class="feature-item">
                    <h4>质量选择</h4>
                    <p>标准质量经济实惠,HD质量细节更丰富</p>
                </div>
            </div>
        </div>

        <!-- 最佳实践 -->
        <div class="tips-section">
            <h3>提示词最佳实践</h3>

            <div class="tips-list">
                <div class="tip-item">
                    <h4>明确主体</h4>
                    <p>清楚描述画面的主要对象,如"一只猫"、"一座城市"</p>
                </div>

                <div class="tip-item">
                    <h4>添加细节</h4>
                    <p>
                        描述颜色、材质、光线、天气等细节,如"金色阳光"、"木质纹理"
                    </p>
                </div>

                <div class="tip-item">
                    <h4>指定风格</h4>
                    <p>说明艺术风格或摄影风格,如"水彩画"、"产品摄影"</p>
                </div>

                <div class="tip-item">
                    <h4>设置氛围</h4>
                    <p>描述情绪和氛围,如"温馨的"、"神秘的"、"未来感"</p>
                </div>
            </div>
        </div>
    </div>
</template>

<style scoped>
.image-generator-demo {
    padding: 20px;
    background: #f5f7fa;
    min-height: 100vh;
}

.demo-header {
    text-align: center;
    margin-bottom: 40px;
}

.demo-header h2 {
    margin: 0 0 10px 0;
    font-size: 32px;
    color: #303133;
}

.subtitle {
    margin: 0;
    font-size: 16px;
    color: #909399;
}

.config-section,
.examples-section,
.generate-section,
.results-section,
.features-section,
.tips-section {
    margin-bottom: 30px;
    padding: 24px;
    background: white;
    border-radius: 8px;
}

h3 {
    margin: 0 0 20px 0;
    font-size: 18px;
    color: #303133;
}

h4 {
    margin: 0 0 12px 0;
    font-size: 16px;
    color: #606266;
}

.api-key-input {
    width: 100%;
    padding: 12px;
    border: 1px solid #dcdfe6;
    border-radius: 4px;
    font-size: 14px;
}

.category-group {
    margin-bottom: 24px;
}

.prompts-grid {
    display: grid;
    grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
    gap: 12px;
}

.prompt-btn {
    padding: 12px 16px;
    background: #ecf5ff;
    color: #409eff;
    border: 1px solid #b3d8ff;
    border-radius: 4px;
    cursor: pointer;
    font-size: 14px;
    text-align: left;
    transition: all 0.3s;
}

.prompt-btn:hover {
    background: #409eff;
    color: white;
}

.form-group {
    margin-bottom: 20px;
}

.form-group label {
    display: block;
    margin-bottom: 8px;
    font-size: 14px;
    color: #606266;
    font-weight: 600;
}

.form-group textarea,
.form-group select {
    width: 100%;
    padding: 10px;
    border: 1px solid #dcdfe6;
    border-radius: 4px;
    font-size: 14px;
    font-family: inherit;
}

.hint {
    margin-top: 8px;
    font-size: 13px;
    color: #909399;
}

.options-grid {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    gap: 16px;
    margin-bottom: 20px;
}

.generate-btn {
    padding: 12px 32px;
    background: #409eff;
    color: white;
    border: none;
    border-radius: 4px;
    cursor: pointer;
    font-size: 14px;
    font-weight: 600;
}

.generate-btn:disabled {
    background: #c0c4cc;
    cursor: not-allowed;
}

.images-grid {
    display: grid;
    grid-template-columns: repeat(auto-fill, minmax(400px, 1fr));
    gap: 24px;
}

.image-card {
    border: 1px solid #e4e7ed;
    border-radius: 8px;
    overflow: hidden;
    background: white;
}

.image-card img {
    width: 100%;
    height: auto;
    display: block;
}

.image-actions {
    padding: 12px 16px;
    background: #f5f7fa;
    display: flex;
    justify-content: center;
}

.download-btn {
    padding: 8px 24px;
    background: #67c23a;
    color: white;
    border: none;
    border-radius: 4px;
    cursor: pointer;
    font-size: 14px;
}

.revised-prompt {
    padding: 16px;
    background: #f5f7fa;
    border-top: 1px solid #e4e7ed;
}

.revised-prompt strong {
    display: block;
    margin-bottom: 8px;
    font-size: 13px;
    color: #606266;
}

.revised-prompt p {
    margin: 0;
    font-size: 13px;
    line-height: 1.6;
    color: #909399;
}

.feature-list,
.tips-list {
    display: grid;
    grid-template-columns: repeat(auto-fill, minmax(250px, 1fr));
    gap: 20px;
}

.feature-item,
.tip-item {
    padding: 20px;
    background: #f5f7fa;
    border-radius: 8px;
}

.feature-item h4,
.tip-item h4 {
    margin: 0 0 12px 0;
    font-size: 16px;
    color: #409eff;
}

.feature-item p,
.tip-item p {
    margin: 0;
    font-size: 14px;
    line-height: 1.6;
    color: #606266;
}
</style>

四、语音转文字(Whisper)

4.1 语音识别器

speech-recognizer.js

javascript
export class SpeechRecognizer {
    constructor(apiKey) {
        this.apiKey = apiKey
        this.baseURL = 'https://api.openai.com/v1'
    }

    // Whisper API转录
    async transcribe(audioFile, options = {}) {
        const formData = new FormData()
        formData.append('file', audioFile)
        formData.append('model', 'whisper-1')

        if (options.language) {
            formData.append('language', options.language)
        }

        if (options.prompt) {
            formData.append('prompt', options.prompt)
        }

        formData.append('response_format', options.format || 'json')

        try {
            const response = await fetch(
                `${this.baseURL}/audio/transcriptions`,
                {
                    method: 'POST',
                    headers: {
                        Authorization: `Bearer ${this.apiKey}`,
                    },
                    body: formData,
                }
            )

            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`)
            }

            const data = await response.json()
            return data.text
        } catch (error) {
            throw this.handleError(error)
        }
    }

    // Whisper API翻译(翻译成英文)
    async translate(audioFile, options = {}) {
        const formData = new FormData()
        formData.append('file', audioFile)
        formData.append('model', 'whisper-1')
        formData.append('response_format', options.format || 'json')

        try {
            const response = await fetch(`${this.baseURL}/audio/translations`, {
                method: 'POST',
                headers: {
                    Authorization: `Bearer ${this.apiKey}`,
                },
                body: formData,
            })

            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`)
            }

            const data = await response.json()
            return data.text
        } catch (error) {
            throw this.handleError(error)
        }
    }

    // 使用Web Speech API(浏览器原生)
    startWebSpeechRecognition(options = {}) {
        if (
            !('webkitSpeechRecognition' in window) &&
            !('SpeechRecognition' in window)
        ) {
            throw new Error('浏览器不支持语音识别')
        }

        const SpeechRecognition =
            window.SpeechRecognition || window.webkitSpeechRecognition
        const recognition = new SpeechRecognition()

        recognition.lang = options.language || 'zh-CN'
        recognition.continuous = options.continuous || false
        recognition.interimResults = options.interimResults || true

        recognition.onstart = () => {
            options.onStart?.()
        }

        recognition.onresult = (event) => {
            let interimTranscript = ''
            let finalTranscript = ''

            for (let i = event.resultIndex; i < event.results.length; i++) {
                const transcript = event.results[i][0].transcript
                if (event.results[i].isFinal) {
                    finalTranscript += transcript
                } else {
                    interimTranscript += transcript
                }
            }

            options.onResult?.({
                interim: interimTranscript,
                final: finalTranscript,
            })
        }

        recognition.onerror = (event) => {
            options.onError?.(event.error)
        }

        recognition.onend = () => {
            options.onEnd?.()
        }

        return recognition
    }

    // 录音
    async startRecording() {
        try {
            const stream = await navigator.mediaDevices.getUserMedia({
                audio: true,
            })
            const mediaRecorder = new MediaRecorder(stream)
            const audioChunks = []

            mediaRecorder.ondataavailable = (event) => {
                audioChunks.push(event.data)
            }

            return {
                recorder: mediaRecorder,
                stop: () => {
                    return new Promise((resolve) => {
                        mediaRecorder.onstop = () => {
                            const audioBlob = new Blob(audioChunks, {
                                type: 'audio/webm',
                            })
                            resolve(audioBlob)
                        }
                        mediaRecorder.stop()
                        stream.getTracks().forEach((track) => track.stop())
                    })
                },
                start: () => {
                    mediaRecorder.start()
                },
            }
        } catch (error) {
            throw new Error('无法访问麦克风: ' + error.message)
        }
    }

    // 错误处理
    handleError(error) {
        if (error.response) {
            const { status } = error.response
            switch (status) {
                case 400:
                    return new Error('音频文件格式不支持')
                case 401:
                    return new Error('API密钥无效')
                default:
                    return new Error('转录失败')
            }
        }
        return error
    }
}

export default SpeechRecognizer

4.2 语音识别演示组件

SpeechRecognizerDemo.vue

vue
<script setup>
import { ref } from 'vue'
import { SpeechRecognizer } from './speech-recognizer.js'

const apiKey = ref('')
const audioFile = ref(null)
const transcript = ref('')
const isTranscribing = ref(false)

const isRecording = ref(false)
const isWebSpeechActive = ref(false)
const webSpeechTranscript = ref('')
const interimTranscript = ref('')

let recognizer = null
let recorder = null
let webSpeechRecognition = null

const initRecognizer = () => {
    if (!apiKey.value) {
        alert('请输入API Key')
        return false
    }
    recognizer = new SpeechRecognizer(apiKey.value)
    return true
}

// 选择音频文件
const handleFileSelect = (event) => {
    const file = event.target.files[0]
    if (file) {
        audioFile.value = file
    }
}

// 转录音频
const transcribe = async () => {
    if (!audioFile.value) {
        alert('请选择音频文件')
        return
    }

    if (!initRecognizer()) return

    isTranscribing.value = true
    transcript.value = ''

    try {
        transcript.value = await recognizer.transcribe(audioFile.value, {
            language: 'zh',
        })
    } catch (error) {
        alert('转录失败: ' + error.message)
    } finally {
        isTranscribing.value = false
    }
}

// 开始录音
const startRecording = async () => {
    try {
        recorder = await recognizer.startRecording()
        recorder.start()
        isRecording.value = true
    } catch (error) {
        alert('录音失败: ' + error.message)
    }
}

// 停止录音
const stopRecording = async () => {
    if (!recorder) return

    const audioBlob = await recorder.stop()
    audioFile.value = new File([audioBlob], 'recording.webm', {
        type: 'audio/webm',
    })
    isRecording.value = false

    alert('录音完成,可以点击"转录"按钮')
}

// 启动Web Speech API
const startWebSpeech = () => {
    if (!recognizer) {
        recognizer = new SpeechRecognizer()
    }

    try {
        webSpeechRecognition = recognizer.startWebSpeechRecognition({
            language: 'zh-CN',
            continuous: true,
            interimResults: true,
            onStart: () => {
                isWebSpeechActive.value = true
                webSpeechTranscript.value = ''
                interimTranscript.value = ''
            },
            onResult: (result) => {
                interimTranscript.value = result.interim
                if (result.final) {
                    webSpeechTranscript.value += result.final + ' '
                    interimTranscript.value = ''
                }
            },
            onError: (error) => {
                alert('识别错误: ' + error)
                isWebSpeechActive.value = false
            },
            onEnd: () => {
                isWebSpeechActive.value = false
            },
        })

        webSpeechRecognition.start()
    } catch (error) {
        alert('启动失败: ' + error.message)
    }
}

// 停止Web Speech API
const stopWebSpeech = () => {
    if (webSpeechRecognition) {
        webSpeechRecognition.stop()
        isWebSpeechActive.value = false
    }
}

// 清空文本
const clearTranscript = () => {
    transcript.value = ''
    webSpeechTranscript.value = ''
    interimTranscript.value = ''
}
</script>

<template>
    <div class="speech-recognizer-demo">
        <div class="demo-header">
            <h2>语音转文字</h2>
            <p class="subtitle">使用Whisper API或Web Speech API</p>
        </div>

        <!-- API配置 -->
        <div class="config-section">
            <h3>Whisper API配置</h3>
            <input
                v-model="apiKey"
                type="password"
                placeholder="OpenAI API Key (用于Whisper)"
                class="api-key-input"
            />
            <p class="hint">Whisper支持更多语言和更高准确率,但需要API Key</p>
        </div>

        <!-- Whisper转录 -->
        <div class="whisper-section">
            <h3>Whisper API转录</h3>

            <div class="upload-area">
                <input
                    type="file"
                    accept="audio/*"
                    @change="handleFileSelect"
                    class="file-input"
                />
                <p v-if="audioFile" class="file-name">
                    已选择: {{ audioFile.name }}
                </p>
            </div>

            <div class="button-group">
                <button
                    @click="startRecording"
                    :disabled="isRecording || isTranscribing"
                    class="record-btn"
                >
                    开始录音
                </button>
                <button
                    @click="stopRecording"
                    :disabled="!isRecording"
                    class="stop-btn"
                >
                    停止录音
                </button>
                <button
                    @click="transcribe"
                    :disabled="!audioFile || isTranscribing"
                    class="transcribe-btn"
                >
                    {{ isTranscribing ? '转录中...' : '转录' }}
                </button>
            </div>

            <div v-if="transcript" class="result-box">
                <h4>转录结果</h4>
                <div class="transcript-text">{{ transcript }}</div>
            </div>
        </div>

        <!-- Web Speech API -->
        <div class="webspeech-section">
            <h3>浏览器语音识别 (无需API Key)</h3>
            <p class="hint">使用浏览器内置的语音识别,免费但准确率略低</p>

            <div class="button-group">
                <button
                    @click="startWebSpeech"
                    :disabled="isWebSpeechActive"
                    class="start-btn"
                >
                    开始识别
                </button>
                <button
                    @click="stopWebSpeech"
                    :disabled="!isWebSpeechActive"
                    class="stop-btn"
                >
                    停止识别
                </button>
                <button @click="clearTranscript" class="clear-btn">清空</button>
            </div>

            <div v-if="isWebSpeechActive" class="status-indicator">
                <span class="pulse"></span>
                正在监听...
            </div>

            <div class="result-box">
                <h4>识别结果</h4>
                <div class="transcript-text">
                    {{ webSpeechTranscript }}
                    <span v-if="interimTranscript" class="interim">
                        {{ interimTranscript }}
                    </span>
                </div>
            </div>
        </div>

        <!-- 功能说明 -->
        <div class="features-section">
            <h3>功能对比</h3>

            <table class="comparison-table">
                <thead>
                    <tr>
                        <th>特性</th>
                        <th>Whisper API</th>
                        <th>Web Speech API</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>准确率</td>
                        <td>⭐⭐⭐⭐⭐</td>
                        <td>⭐⭐⭐</td>
                    </tr>
                    <tr>
                        <td>语言支持</td>
                        <td>99+种语言</td>
                        <td>有限</td>
                    </tr>
                    <tr>
                        <td>实时识别</td>
                        <td>❌ (需上传)</td>
                        <td>✅</td>
                    </tr>
                    <tr>
                        <td>费用</td>
                        <td>$0.006/分钟</td>
                        <td>免费</td>
                    </tr>
                    <tr>
                        <td>浏览器支持</td>
                        <td>所有现代浏览器</td>
                        <td>Chrome/Edge/Safari</td>
                    </tr>
                    <tr>
                        <td>离线使用</td>
                        <td>❌</td>
                        <td>❌</td>
                    </tr>
                </tbody>
            </table>
        </div>

        <!-- 使用场景 -->
        <div class="usecases-section">
            <h3>使用场景</h3>

            <div class="usecases-grid">
                <div class="usecase-card">
                    <h4>会议纪要</h4>
                    <p>录制会议内容,自动生成文字记录</p>
                </div>
                <div class="usecase-card">
                    <h4>语音输入</h4>
                    <p>通过语音快速输入文字,提升效率</p>
                </div>
                <div class="usecase-card">
                    <h4>字幕生成</h4>
                    <p>为视频自动生成字幕文件</p>
                </div>
                <div class="usecase-card">
                    <h4>语音翻译</h4>
                    <p>识别语音并翻译成其他语言</p>
                </div>
            </div>
        </div>
    </div>
</template>

<style scoped>
.speech-recognizer-demo {
    padding: 20px;
    background: #f5f7fa;
    min-height: 100vh;
}

.demo-header {
    text-align: center;
    margin-bottom: 40px;
}

.demo-header h2 {
    margin: 0 0 10px 0;
    font-size: 32px;
    color: #303133;
}

.subtitle {
    margin: 0;
    font-size: 16px;
    color: #909399;
}

.config-section,
.whisper-section,
.webspeech-section,
.features-section,
.usecases-section {
    margin-bottom: 30px;
    padding: 24px;
    background: white;
    border-radius: 8px;
}

h3 {
    margin: 0 0 20px 0;
    font-size: 18px;
    color: #303133;
}

h4 {
    margin: 0 0 12px 0;
    font-size: 16px;
    color: #606266;
}

.api-key-input {
    width: 100%;
    padding: 12px;
    border: 1px solid #dcdfe6;
    border-radius: 4px;
    font-size: 14px;
    margin-bottom: 8px;
}

.hint {
    margin: 0;
    font-size: 13px;
    color: #909399;
}

.upload-area {
    margin-bottom: 20px;
    padding: 24px;
    border: 2px dashed #dcdfe6;
    border-radius: 8px;
    text-align: center;
}

.file-input {
    display: block;
    margin: 0 auto;
}

.file-name {
    margin: 12px 0 0 0;
    font-size: 14px;
    color: #606266;
}

.button-group {
    display: flex;
    gap: 12px;
    margin-bottom: 20px;
    flex-wrap: wrap;
}

.record-btn,
.stop-btn,
.transcribe-btn,
.start-btn,
.clear-btn {
    padding: 10px 24px;
    border: none;
    border-radius: 4px;
    cursor: pointer;
    font-size: 14px;
    font-weight: 600;
}

.record-btn,
.start-btn {
    background: #67c23a;
    color: white;
}

.stop-btn {
    background: #f56c6c;
    color: white;
}

.transcribe-btn {
    background: #409eff;
    color: white;
}

.clear-btn {
    background: #e6a23c;
    color: white;
}

.record-btn:disabled,
.stop-btn:disabled,
.transcribe-btn:disabled,
.start-btn:disabled {
    background: #c0c4cc;
    cursor: not-allowed;
}

.status-indicator {
    display: flex;
    align-items: center;
    gap: 12px;
    padding: 12px 16px;
    background: #f0f9ff;
    border-left: 4px solid #409eff;
    border-radius: 4px;
    margin-bottom: 20px;
    font-size: 14px;
    color: #409eff;
    font-weight: 600;
}

.pulse {
    width: 12px;
    height: 12px;
    background: #f56c6c;
    border-radius: 50%;
    animation: pulse 1.5s infinite;
}

@keyframes pulse {
    0%,
    100% {
        opacity: 1;
        transform: scale(1);
    }
    50% {
        opacity: 0.5;
        transform: scale(1.2);
    }
}

.result-box {
    padding: 20px;
    background: #f5f7fa;
    border-radius: 8px;
}

.transcript-text {
    font-size: 15px;
    line-height: 1.8;
    color: #303133;
    white-space: pre-wrap;
}

.interim {
    color: #909399;
    font-style: italic;
}

.comparison-table {
    width: 100%;
    border-collapse: collapse;
}

.comparison-table th,
.comparison-table td {
    padding: 12px;
    text-align: left;
    border: 1px solid #e4e7ed;
}

.comparison-table th {
    background: #f5f7fa;
    font-weight: 600;
    color: #303133;
}

.comparison-table td {
    color: #606266;
}

.usecases-grid {
    display: grid;
    grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
    gap: 16px;
}

.usecase-card {
    padding: 20px;
    background: #f5f7fa;
    border-radius: 8px;
    border-left: 4px solid #409eff;
}

.usecase-card h4 {
    margin: 0 0 8px 0;
    font-size: 16px;
    color: #409eff;
}

.usecase-card p {
    margin: 0;
    font-size: 14px;
    line-height: 1.6;
    color: #606266;
}
</style>

五、文字转语音

5.1 语音合成器

speech-synthesizer.js

javascript
export class SpeechSynthesizer {
    constructor(apiKey) {
        this.apiKey = apiKey
        this.baseURL = 'https://api.openai.com/v1'
    }

    // OpenAI TTS API
    async synthesize(text, options = {}) {
        try {
            const response = await fetch(`${this.baseURL}/audio/speech`, {
                method: 'POST',
                headers: {
                    Authorization: `Bearer ${this.apiKey}`,
                    'Content-Type': 'application/json',
                },
                body: JSON.stringify({
                    model: 'tts-1',
                    input: text,
                    voice: options.voice || 'alloy', // alloy, echo, fable, onyx, nova, shimmer
                    speed: options.speed || 1.0,
                }),
            })

            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`)
            }

            const audioBlob = await response.blob()
            return audioBlob
        } catch (error) {
            throw this.handleError(error)
        }
    }

    // 使用Web Speech API (浏览器原生)
    speakWithWebAPI(text, options = {}) {
        if (!('speechSynthesis' in window)) {
            throw new Error('浏览器不支持语音合成')
        }

        const utterance = new SpeechSynthesisUtterance(text)

        utterance.lang = options.lang || 'zh-CN'
        utterance.rate = options.rate || 1.0
        utterance.pitch = options.pitch || 1.0
        utterance.volume = options.volume || 1.0

        if (options.voice) {
            utterance.voice = options.voice
        }

        utterance.onstart = () => {
            options.onStart?.()
        }

        utterance.onend = () => {
            options.onEnd?.()
        }

        utterance.onerror = (event) => {
            options.onError?.(event.error)
        }

        window.speechSynthesis.speak(utterance)

        return {
            pause: () => window.speechSynthesis.pause(),
            resume: () => window.speechSynthesis.resume(),
            cancel: () => window.speechSynthesis.cancel(),
        }
    }

    // 获取可用的语音列表
    getAvailableVoices() {
        return new Promise((resolve) => {
            let voices = window.speechSynthesis.getVoices()

            if (voices.length > 0) {
                resolve(voices)
            } else {
                window.speechSynthesis.onvoiceschanged = () => {
                    voices = window.speechSynthesis.getVoices()
                    resolve(voices)
                }
            }
        })
    }

    // 播放音频
    playAudio(audioBlob) {
        const audioUrl = URL.createObjectURL(audioBlob)
        const audio = new Audio(audioUrl)

        audio.play()

        return {
            audio,
            pause: () => audio.pause(),
            play: () => audio.play(),
            stop: () => {
                audio.pause()
                audio.currentTime = 0
            },
        }
    }

    // 下载音频
    downloadAudio(audioBlob, filename = 'speech.mp3') {
        const url = URL.createObjectURL(audioBlob)
        const link = document.createElement('a')
        link.href = url
        link.download = filename
        link.click()
        URL.revokeObjectURL(url)
    }

    // 错误处理
    handleError(error) {
        if (error.response) {
            const { status } = error.response
            switch (status) {
                case 400:
                    return new Error('文本内容无效')
                case 401:
                    return new Error('API密钥无效')
                default:
                    return new Error('合成失败')
            }
        }
        return error
    }
}

export default SpeechSynthesizer

六、AI摘要与翻译

6.1 文本处理器

text-processor.js

javascript
import { OpenAIClient } from './openai-client.js'

export class TextProcessor {
    constructor(apiKey) {
        this.client = new OpenAIClient({ apiKey })
    }

    // 智能摘要
    async summarize(text, options = {}) {
        const length = options.length || 'medium' // short/medium/long
        const style = options.style || 'paragraph' // paragraph/bullets/key-points

        let lengthInstruction = ''
        switch (length) {
            case 'short':
                lengthInstruction = '用1-2句话概括'
                break
            case 'medium':
                lengthInstruction = '用3-5句话概括'
                break
            case 'long':
                lengthInstruction = '详细概括,保留关键信息'
                break
        }

        let styleInstruction = ''
        switch (style) {
            case 'paragraph':
                styleInstruction = '用段落形式输出'
                break
            case 'bullets':
                styleInstruction = '用要点列表输出'
                break
            case 'key-points':
                styleInstruction = '提取关键要点,每个要点一行'
                break
        }

        const prompt = `请对以下文本进行摘要:

${text}

要求:
1. ${lengthInstruction}
2. ${styleInstruction}
3. 保持客观准确`

        const response = await this.client.chat(
            [
                { role: 'system', content: '你是专业的文本摘要助手' },
                { role: 'user', content: prompt },
            ],
            {
                temperature: 0.5,
            }
        )

        return response.content
    }

    // 多语言翻译
    async translate(text, options = {}) {
        const targetLang = options.targetLang || 'en'
        const sourceLang = options.sourceLang || 'auto'
        const style = options.style || 'standard' // standard/formal/casual

        let styleInstruction = ''
        switch (style) {
            case 'formal':
                styleInstruction = '使用正式的语言风格'
                break
            case 'casual':
                styleInstruction = '使用口语化的表达'
                break
        }

        const prompt =
            sourceLang === 'auto'
                ? `请将以下文本翻译成${targetLang}:`
                : `请将以下${sourceLang}文本翻译成${targetLang}:`

        const fullPrompt = `${prompt}

${text}

${styleInstruction ? '要求:' + styleInstruction : ''}`

        const response = await this.client.chat(
            [
                { role: 'system', content: '你是专业的翻译助手' },
                { role: 'user', content: fullPrompt },
            ],
            {
                temperature: 0.3,
            }
        )

        return response.content
    }

    // 关键词提取
    async extractKeywords(text, options = {}) {
        const count = options.count || 10

        const prompt = `请从以下文本中提取${count}个关键词:

${text}

要求:
1. 只输出关键词,用逗号分隔
2. 按重要性排序
3. 优先提取核心概念`

        const response = await this.client.chat([
            { role: 'system', content: '你是文本分析专家' },
            { role: 'user', content: prompt },
        ])

        return response.content.split(/[,,、]/).map((k) => k.trim())
    }

    // 文本分类
    async classify(text, categories) {
        const prompt = `请判断以下文本属于哪个类别:

类别列表:${categories.join('、')}

文本内容:
${text}

只输出类别名称,不要额外解释。`

        const response = await this.client.chat(
            [
                { role: 'system', content: '你是文本分类专家' },
                { role: 'user', content: prompt },
            ],
            {
                temperature: 0.3,
            }
        )

        return response.content.trim()
    }

    // 情感分析
    async analyzeSentiment(text) {
        const prompt = `请分析以下文本的情感倾向:

${text}

请判断情感是:正面、负面还是中性,并简要说明理由。`

        const response = await this.client.chat([
            { role: 'system', content: '你是情感分析专家' },
            { role: 'user', content: prompt },
        ])

        return response.content
    }

    // 文本改写
    async rewrite(text, options = {}) {
        const style = options.style || 'professional' // professional/casual/creative
        const length = options.length || 'same' // shorter/same/longer

        const prompt = `请改写以下文本:

${text}

要求:
1. 风格:${style}
2. 长度:${length}
3. 保持核心意思不变`

        const response = await this.client.chat([
            { role: 'system', content: '你是文本改写专家' },
            { role: 'user', content: prompt },
        ])

        return response.content
    }

    // 文本纠错
    async correctText(text) {
        const prompt = `请纠正以下文本中的语法和拼写错误:

${text}

要求:
1. 只修正错误
2. 保持原意
3. 标注修改位置`

        const response = await this.client.chat([
            { role: 'system', content: '你是文本校对专家' },
            { role: 'user', content: prompt },
        ])

        return response.content
    }
}

export default TextProcessor

七、简历描述模板

AI辅助功能开发 (2024.06 - 至今)

负责AI辅助功能的前端开发,实现代码生成、图片生成、语音识别、文本处理等AI能力集成。

核心职责

  • 集成OpenAI Codex实现AI代码生成和代码解释
  • 对接DALL-E 3实现AI图片生成功能
  • 集成Whisper API实现语音转文字
  • 开发TTS语音合成功能
  • 实现文本摘要、翻译、关键词提取等NLP功能
技术实现
  • 封装CodeGenerator类,支持代码生成、补全、重构、调试
  • 使用DALL-E 3 API生成1024x1024高质量图片
  • 集成Whisper实现99+语言的语音识别
  • 结合Web Speech API实现实时语音输入
  • 开发TextProcessor实现智能摘要和多语言翻译
项目成果
  • AI代码生成准确率85%,显著提升开发效率
  • 图片生成功能日均使用500+次
  • 语音识别准确率95%,支持实时转录
  • 文本摘要质量评分4.5/5,用户满意度高
  • 翻译功能支持20+语言,准确率90%+

八、SOP标准回答

面试问题: 如何实现AI代码生成功能?

标准回答

"AI代码生成的核心是Prompt工程和结果处理。

我用的是OpenAI的GPT-4模型。首先要构建好的Prompt。不能只说'写个函数',要明确需求。我的Prompt包含四部分:一是角色定义,'你是资深程序员'。二是功能描述,具体说明要实现什么。三是技术约束,比如用什么语言、什么框架。四是质量要求,代码要规范、有注释、能运行。

具体实现上,我封装了CodeGenerator类。generateCode方法接收描述和选项,内部调用OpenAI API。关键是buildCodePrompt方法,根据用户输入构造完整的Prompt。

还实现了几个辅助功能。一是代码补全,给定光标位置,AI补全后续代码。二是代码解释,逐行或逐块解释代码逻辑。三是代码重构,提升可读性和性能。四是代码审查,检查潜在bug和安全问题。

返回结果的处理也很重要。AI的回复可能包含解释文本和代码块。我用正则提取```包裹的代码块。如果有多个代码块,默认返回第一个。还支持提取所有代码块供用户选择。

用户体验方面,加了几个优化。一是预设示例,常见需求一键使用。二是参数可调,语言、框架、风格都能定制。三是代码高亮显示。四是一键复制。

实际效果不错。简单任务像工具函数、组件模板,生成的代码基本能直接用。复杂任务像完整功能模块,可能需要微调,但大框架是对的,节省很多时间。根据统计,使用AI生成后,开发效率提升30%左右。"

面试问题: 语音识别如何选择Whisper还是Web Speech API?

标准回答

"这要看具体场景和需求。

Whisper API优势是准确率高。OpenAI训练了大规模模型,支持99+种语言,甚至能识别方言和口音。对于需要高准确率的场景,比如会议纪要、字幕生成,Whisper是首选。另外Whisper的长音频处理能力强,几十分钟的录音也能准确识别。

但Whisper有局限。一是需要上传音频,不支持实时流式识别。二是有费用,$0.006/分钟,量大的话成本不低。三是依赖网络,离线用不了。

Web Speech API的优势是实时和免费。浏览器内置,不需要额外服务。适合语音输入这种实时场景,用户说话就能看到文字,体验很流畅。而且完全免费,没有使用限制。

但Web Speech API准确率较低,特别是嘈杂环境。语言支持也有限,主要是常用语言。还有浏览器兼容性问题,Firefox支持不好。

我的项目里两个都用了。对于录音转文字的功能,用Whisper,保证准确率。对于实时语音输入,用Web Speech API,优先考虑体验和成本。给用户做了开关,可以选择用哪个,付费用户默认Whisper,免费用户默认Web Speech API。

还做了降级策略。如果用户浏览器不支持Web Speech API,引导用录音后用Whisper转录。如果Whisper API调用失败,自动切换到Web Speech API并提示用户。

选型要权衡准确率、实时性、成本、兼容性。没有一个方案完美,组合使用效果最好。"


九、难点与亮点分析

难点1: 如何提升AI代码生成的准确率?

问题场景: 直接用AI生成的代码经常有问题,需要多次调整。

解决方案

javascript
class SmartCodeGenerator {
    // 多步骤生成
    async generateWithValidation(description, options) {
        // 第一步:生成初始代码
        let code = await this.generateCode(description, options)

        // 第二步:自动检查语法
        const syntaxCheck = await this.checkSyntax(code, options.language)
        if (!syntaxCheck.valid) {
            // 如果语法错误,让AI修复
            code = await this.fixSyntaxErrors(code, syntaxCheck.errors)
        }

        // 第三步:生成测试用例
        const tests = await this.generateTests(code)

        // 第四步:运行测试
        const testResult = await this.runTests(code, tests)
        if (testResult.failed > 0) {
            // 如果测试失败,让AI修复
            code = await this.fixFailedTests(code, testResult.failures)
        }

        return {
            code,
            tests,
            validated: true,
        }
    }

    // 增量优化
    async optimizeCode(code, goals) {
        let optimized = code

        for (const goal of goals) {
            const prompt = `请优化以下代码,目标:${goal}

\`\`\`
${optimized}
\`\`\`

只输出优化后的代码,不要解释。`

            const response = await this.client.chat([
                { role: 'system', content: '你是代码优化专家' },
                { role: 'user', content: prompt },
            ])

            optimized = this.extractCode(response.content)

            // 验证优化没有破坏功能
            const isValid = await this.validateOptimization(code, optimized)
            if (!isValid) {
                break // 回退到上一个版本
            }
        }

        return optimized
    }

    // Few-shot学习
    async generateWithExamples(description, examples) {
        let prompt = '请参考以下示例生成代码:\n\n'

        examples.forEach((example, i) => {
            prompt += `示例${i + 1}:\n`
            prompt += `需求:${example.description}\n`
            prompt += `代码:\n\`\`\`\n${example.code}\n\`\`\`\n\n`
        })

        prompt += `现在请生成以下需求的代码:\n${description}`

        const response = await this.client.chat([
            { role: 'system', content: '你是代码生成专家' },
            { role: 'user', content: prompt },
        ])

        return this.extractCode(response.content)
    }
}
关键点
  • 多步骤验证,不是一次生成就结束
  • 自动检查语法和运行测试
  • 增量优化,逐步改进
  • Few-shot示例学习

难点2: 如何处理长文本的摘要?

问题场景: 文本太长超过Token限制,无法直接摘要。

解决方案

javascript
class LongTextSummarizer {
    constructor(client, maxTokens = 4000) {
        this.client = client
        this.maxTokens = maxTokens
    }

    // 分块摘要
    async summarizeLongText(text) {
        // 1. 分块
        const chunks = this.splitIntoChunks(text)

        // 2. 分别摘要每个块
        const chunkSummaries = []
        for (const chunk of chunks) {
            const summary = await this.summarizeChunk(chunk)
            chunkSummaries.push(summary)
        }

        // 3. 合并摘要
        if (chunkSummaries.length === 1) {
            return chunkSummaries[0]
        }

        // 4. 如果合并后的摘要还是太长,递归处理
        const combined = chunkSummaries.join('\n\n')
        if (this.estimateTokens(combined) > this.maxTokens * 0.8) {
            return await this.summarizeLongText(combined)
        }

        // 5. 最终摘要
        const finalSummary = await this.summarizeFinal(combined)
        return finalSummary
    }

    // 智能分块
    splitIntoChunks(text) {
        const chunks = []
        const targetChunkSize = this.maxTokens * 0.6 // 留余量

        // 按段落分割
        const paragraphs = text.split(/\n\n+/)
        let currentChunk = ''

        for (const para of paragraphs) {
            const testChunk = currentChunk + '\n\n' + para

            if (this.estimateTokens(testChunk) > targetChunkSize) {
                if (currentChunk) {
                    chunks.push(currentChunk.trim())
                }
                currentChunk = para
            } else {
                currentChunk = testChunk
            }
        }

        if (currentChunk) {
            chunks.push(currentChunk.trim())
        }

        return chunks
    }

    // Map-Reduce摘要
    async mapReduceSummarize(text) {
        // Map阶段:提取每个段落的关键信息
        const paragraphs = text.split(/\n\n+/)
        const keyPoints = []

        for (const para of paragraphs) {
            const point = await this.extractKeyPoint(para)
            if (point) {
                keyPoints.push(point)
            }
        }

        // Reduce阶段:合并关键信息
        const summary = await this.combineKeyPoints(keyPoints)
        return summary
    }

    estimateTokens(text) {
        // 简化估算
        return Math.ceil(text.length * 0.5)
    }
}

亮点: 多模态AI应用

创新点

  • 结合代码生成和图片生成
  • 语音输入+AI处理+语音输出
  • 文本、代码、图片的联动处理
javascript
class MultiModalAIApp {
    // 语音驱动的代码生成
    async voiceToCoding() {
        // 1. 语音输入需求
        const audioBlob = await this.recordVoice()

        // 2. 语音转文字
        const description = await this.speechRecognizer.transcribe(audioBlob)

        // 3. 生成代码
        const code = await this.codeGenerator.generateCode(description)

        // 4. 语音播报结果
        await this.speechSynthesizer.speak('代码已生成,请查看')

        return code
    }

    // 图文结合的内容生成
    async generateIllustratedArticle(topic) {
        // 1. 生成文章
        const article = await this.textProcessor.generateArticle(topic)

        // 2. 提取关键场景
        const scenes = await this.extractScenes(article)

        // 3. 为每个场景生成配图
        const images = []
        for (const scene of scenes) {
            const image = await this.imageGenerator.generateImage(scene)
            images.push(image)
        }

        // 4. 组合文字和图片
        return this.combineTextAndImages(article, images)
    }

    // AI协作工作流
    async collaborativeWorkflow(task) {
        // 1. AI分析任务
        const analysis = await this.analyzeTask(task)

        // 2. 根据任务类型选择AI能力
        const capabilities = this.selectCapabilities(analysis)

        // 3. 并行执行
        const results = await Promise.all(
            capabilities.map((cap) => cap.execute(task))
        )

        // 4. 合并结果
        return this.mergeResults(results)
    }
}