apps/webgl/frontend/src/lib/extract.ts

/**
 * 从 LLM 生成的原始文本里抠出可运行的 HTML 文档。
 *
 * gemma 大多数时候会乖乖只吐 HTML，但偶尔会：
 *  - 用 ```html ... ``` 围栏包起来
 *  - 在前面/后面加一两句寒暄
 * 这里尽量稳地把中间那坨 HTML 提出来。提不出来就返回空串（让调用方报错）。
 */
export function extractHtml(raw: string): string {
  if (!raw) return ''
  let s = raw.trim()

  // 1. 优先取 markdown 代码围栏里的内容
  const fence = s.match(/```(?:html|HTML)?\s*\n([\s\S]*?)```/)
  if (fence && fence[1].trim()) {
    s = fence[1].trim()
  } else if (s.startsWith('```')) {
    // 围栏没闭合（流式被截断 / 只有开头）—— 去掉开头那行围栏
    s = s.replace(/^```[a-zA-Z]*\s*\n?/, '').replace(/```\s*$/, '').trim()
  }

  // 2. 从第一个 <!doctype 或 <html 开始截（丢掉前面的寒暄）
  const startMatch = s.match(/<!doctype html>|<html[\s>]/i)
  if (startMatch && startMatch.index !== undefined && startMatch.index > 0) {
    s = s.slice(startMatch.index)
  }

  // 3. 截到最后一个 </html> 结束（丢掉后面的寒暄）
  const endIdx = s.toLowerCase().lastIndexOf('</html>')
  if (endIdx !== -1) {
    s = s.slice(0, endIdx + '</html>'.length)
  }

  // 兜底：至少得像个 HTML（有标签），否则当作没提到
  return /<[a-z!][\s\S]*>/i.test(s) ? s.trim() : ''
}
webgl: 新 app — 说需求→gemma 流式生成可交互 WebGL 小程序左 sidebar 聊天说需求，右主区运行/代码两 tab。后端 SSE 流式把 gemma 生成的自包含纯原生 WebGL HTML 一段段回吐，前端实时显示代码、写完丢进沙箱 iframe 跑。完全无状态（迭代靠前端回传 current_code）。给儿子体验 WebGL。 2026-06-03 22:21:33 +01:00			`/**`
			`* 从 LLM 生成的原始文本里抠出可运行的 HTML 文档。`
			`*`
			`* gemma 大多数时候会乖乖只吐 HTML，但偶尔会：`
			* - 用 ```html ... ``` 围栏包起来
			`* - 在前面/后面加一两句寒暄`
			`* 这里尽量稳地把中间那坨 HTML 提出来。提不出来就返回空串（让调用方报错）。`
			`*/`
			`export function extractHtml(raw: string): string {`
			`if (!raw) return ''`
			`let s = raw.trim()`

			`// 1. 优先取 markdown 代码围栏里的内容`
			const fence = s.match(/```(?:html\|HTML)?\s\n([\s\S]?)```/)
			`if (fence && fence[1].trim()) {`
			`s = fence[1].trim()`
			} else if (s.startsWith('```')) {
			`// 围栏没闭合（流式被截断 / 只有开头）—— 去掉开头那行围栏`
			s = s.replace(/^```[a-zA-Z]\s\n?/, '').replace(/```\s*$/, '').trim()
			`}`

			`// 2. 从第一个 <!doctype 或 <html 开始截（丢掉前面的寒暄）`
			`const startMatch = s.match(/<!doctype html>\|<html[\s>]/i)`
			`if (startMatch && startMatch.index !== undefined && startMatch.index > 0) {`
			`s = s.slice(startMatch.index)`
			`}`

			`// 3. 截到最后一个 </html> 结束（丢掉后面的寒暄）`
			`const endIdx = s.toLowerCase().lastIndexOf('</html>')`
			`if (endIdx !== -1) {`
			`s = s.slice(0, endIdx + '</html>'.length)`
			`}`

			`// 兜底：至少得像个 HTML（有标签），否则当作没提到`
			`return /<[a-z!][\s\S]*>/i.test(s) ? s.trim() : ''`
			`}`