validate-ddl.mjs 13.1 KB
// lib/validate-ddl.mjs — docs/03 表格 ↔ DDL(V1.sql)一致性 5 维校验
// 替换 db-init/scripts/validate.sh(跨平台、纯 Node、零外部依赖)。
//
// 用法(CLI):node lib/validate-ddl.mjs <docs03Path> <ddlPath>
//   退出码 0 = 一致;1 = 存在差异(diff 明细打印到 stderr);2 = 用法/路径错误。
// 程序内:import { parseDocsTables, parseDDL, diffSchema } from './validate-ddl.mjs'
//
// 5 维 diff:
//   1) 表集合(missingTables / extraTables)
//   2) 列名(columnMismatches,side: 'docs'|'ddl')
//   3) 列类型(typeMismatches)
//   4) 索引(indexMismatches,side: 'docs'|'ddl')
//   5) 外键(foreignKeyMismatches,side: 'docs'|'ddl')
//
// 数据结构(解析结果):Map<tableName, {
//   columns: Map<colName, type>, indexes: Set<string>, foreignKeys: Set<string> }>

// ── 解析 docs/03 markdown 表定义 ─────────────────────────────────
// 约定:每张表一节,节标题形如  ## `表名`  或  ## `表名` — 业务含义
// 节内的 markdown 表格首列是列名(可含反引号),次列是类型。
// 跳过表头行(列/字段/类型等标签)与分隔行(---)。
// 形如「## 一、全局约定」这类非反引号标题不视为表。
export function parseDocsTables(text) {
  const tables = new Map()
  const lines = String(text).split('\n')
  // 反引号包裹的表名:## `name` 或 ## `name` — purpose
  const headerRe = /^##\s+`([^`]+)`/
  let current = null // { columns: Map }

  for (const raw of lines) {
    const line = raw.replace(/\r$/, '')
    const h2 = line.match(headerRe)
    if (h2) {
      current = { columns: new Map(), indexes: new Set(), foreignKeys: new Set() }
      tables.set(h2[1].trim(), current)
      continue
    }
    // 任何其它二级(或更高)非反引号标题 → 结束当前表块(如 ## 一、全局约定)
    if (/^##\s/.test(line) && !headerRe.test(line)) {
      current = null
      continue
    }
    if (!current) continue
    // markdown 表格行:以 | 开头
    if (!/^\s*\|/.test(line)) continue
    const cells = splitMarkdownRow(line)
    if (cells.length < 2) continue
    const name = stripTicks(cells[0])
    const type = stripTicks(cells[1])
    // 跳过分隔行(---)、表头标签行、空名行
    if (!name) continue
    if (isSeparatorCell(name)) continue
    if (isHeaderLabel(name)) continue
    current.columns.set(name, type)
  }
  return tables
}

// ── 解析 CREATE TABLE DDL ────────────────────────────────────────
// 提取每个 CREATE TABLE 的:列名→类型、索引名集合、外键描述集合。
export function parseDDL(text) {
  const tables = new Map()
  const src = String(text)
  // 抓取 CREATE TABLE <name> ( <body> ) ;name 可带反引号;body 到匹配的右括号
  const createRe = /CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?`?([A-Za-z0-9_]+)`?\s*\(/gi
  let m
  while ((m = createRe.exec(src)) !== null) {
    const tableName = m[1]
    const bodyStart = createRe.lastIndex - 1 // 指向 '('
    const body = extractBalancedParens(src, bodyStart)
    if (body == null) continue
    const parsed = parseTableBody(body)
    tables.set(tableName, parsed)
    // 继续从 body 之后扫描
    createRe.lastIndex = bodyStart + body.length + 2
  }
  return tables
}

function parseTableBody(body) {
  const columns = new Map()
  const indexes = new Set()
  const foreignKeys = new Set()
  for (const itemRaw of splitTopLevelCommas(body)) {
    const item = itemRaw.trim()
    if (!item) continue
    const upper = item.toUpperCase()

    // 外键约束(可带前缀 CONSTRAINT <name>)
    if (/\bFOREIGN\s+KEY\b/i.test(item)) {
      const fk = item.match(/FOREIGN\s+KEY\s*\(([^)]*)\)\s*REFERENCES\s+`?([A-Za-z0-9_]+)`?\s*\(([^)]*)\)/i)
      if (fk) {
        const fromCols = fk[1].replace(/`/g, '').replace(/\s+/g, '')
        const refTable = fk[2]
        const toCols = fk[3].replace(/`/g, '').replace(/\s+/g, '')
        foreignKeys.add(`${fromCols}->${refTable}(${toCols})`)
      } else {
        foreignKeys.add(item)
      }
      continue
    }

    // PRIMARY KEY (...)
    if (/^PRIMARY\s+KEY/i.test(item)) {
      indexes.add('PRIMARY')
      continue
    }
    // UNIQUE [KEY|INDEX] <name> (...) / KEY <name> (...) / INDEX <name> (...)
    if (/^(UNIQUE\s+(KEY|INDEX)|KEY|INDEX|FULLTEXT\s+KEY|SPATIAL\s+KEY)\b/i.test(item)) {
      const nameMatch = item.match(/^(?:UNIQUE\s+(?:KEY|INDEX)|KEY|INDEX|FULLTEXT\s+KEY|SPATIAL\s+KEY)\s+`?([A-Za-z0-9_]+)`?/i)
      indexes.add(nameMatch ? nameMatch[1] : item)
      continue
    }
    // CONSTRAINT <name> 但非外键(如 UNIQUE/CHECK 约束)→ 当索引/约束记
    if (/^CONSTRAINT\b/i.test(upper)) {
      const cn = item.match(/^CONSTRAINT\s+`?([A-Za-z0-9_]+)`?/i)
      indexes.add(cn ? cn[1] : item)
      continue
    }
    // CHECK (...)
    if (/^CHECK\b/i.test(upper)) continue

    // 普通列:<name> <type> ...   name 可带反引号;type 取到第一个属性关键字/逗号前
    const col = item.match(/^`?([A-Za-z0-9_]+)`?\s+(.+)$/s)
    if (!col) continue
    const name = col[1]
    const type = extractType(col[2])
    columns.set(name, type)
  }
  return { columns, indexes, foreignKeys }
}

// 从列定义剩余部分提取类型(含括号内长度),到下一个属性关键字前停止。
function extractType(rest) {
  const s = rest.trim()
  // 类型形如 varchar(100) / decimal(10,2) / int unsigned / bigint
  const m = s.match(/^([A-Za-z]+(?:\s+(?:unsigned|signed|zerofill))*)\s*(\([^)]*\))?/i)
  if (!m) return s.split(/\s+/)[0]
  let type = m[1].trim()
  // 仅保留基础类型词 + 括号;丢弃 unsigned/signed 这类修饰以贴近 docs/03 写法(docs 一般只写基础类型)
  const base = type.split(/\s+/)[0]
  return base + (m[2] ? m[2].replace(/\s+/g, '') : '')
}

// ── 5 维 diff ────────────────────────────────────────────────────
export function diffSchema(docsTables, ddlTables) {
  const diff = {
    missingTables: [],       // docs 有、DDL 无
    extraTables: [],         // DDL 有、docs 无
    columnMismatches: [],    // { table, column, side: 'docs'|'ddl' }
    typeMismatches: [],      // { table, column, docsType, ddlType }
    indexMismatches: [],     // { table, index, side: 'docs'|'ddl' }
    foreignKeyMismatches: [],// { table, foreignKey, side: 'docs'|'ddl' }
    hasDiff: false,
  }

  const docNames = new Set(docsTables.keys())
  const ddlNames = new Set(ddlTables.keys())

  for (const t of docNames) if (!ddlNames.has(t)) diff.missingTables.push(t)
  for (const t of ddlNames) if (!docNames.has(t)) diff.extraTables.push(t)
  diff.missingTables.sort()
  diff.extraTables.sort()

  // 仅对共有表做列/类型/索引/外键比对
  for (const t of [...docNames].filter(n => ddlNames.has(n)).sort()) {
    const d = docsTables.get(t)
    const s = ddlTables.get(t)

    // 维度 2/3:列名 + 列类型
    for (const [col, dType] of d.columns) {
      if (!s.columns.has(col)) {
        diff.columnMismatches.push({ table: t, column: col, side: 'docs' })
      } else {
        const sType = s.columns.get(col)
        if (!typesEqual(dType, sType)) {
          diff.typeMismatches.push({ table: t, column: col, docsType: dType, ddlType: sType })
        }
      }
    }
    for (const col of s.columns.keys()) {
      if (!d.columns.has(col)) diff.columnMismatches.push({ table: t, column: col, side: 'ddl' })
    }

    // 维度 4:索引
    const dIdx = d.indexes || new Set()
    const sIdx = s.indexes || new Set()
    for (const ix of dIdx) if (!sIdx.has(ix)) diff.indexMismatches.push({ table: t, index: ix, side: 'docs' })
    for (const ix of sIdx) if (!dIdx.has(ix)) diff.indexMismatches.push({ table: t, index: ix, side: 'ddl' })

    // 维度 5:外键
    const dFk = d.foreignKeys || new Set()
    const sFk = s.foreignKeys || new Set()
    for (const fk of dFk) if (!sFk.has(fk)) diff.foreignKeyMismatches.push({ table: t, foreignKey: fk, side: 'docs' })
    for (const fk of sFk) if (!dFk.has(fk)) diff.foreignKeyMismatches.push({ table: t, foreignKey: fk, side: 'ddl' })
  }

  diff.hasDiff = diff.missingTables.length > 0 || diff.extraTables.length > 0 ||
    diff.columnMismatches.length > 0 || diff.typeMismatches.length > 0 ||
    diff.indexMismatches.length > 0 || diff.foreignKeyMismatches.length > 0
  return diff
}

// ── 工具函数 ─────────────────────────────────────────────────────
function stripTicks(s) {
  return String(s).replace(/`/g, '').trim()
}

function splitMarkdownRow(line) {
  // 去掉首尾管道再按 | 切分
  let t = line.trim()
  if (t.startsWith('|')) t = t.slice(1)
  if (t.endsWith('|')) t = t.slice(0, -1)
  return t.split('|').map(c => c.trim())
}

function isSeparatorCell(cell) {
  // 形如 --- / :--- / ---: / :---:
  return /^:?-{1,}:?$/.test(cell.trim())
}

function isHeaderLabel(cell) {
  // 表头标签:列 / 字段 / 字段名 / 类型 / 列名(避免把表头行当列)
  return ['列', '字段', '字段名', '列名', '类型', 'name', 'type', 'column'].includes(cell.trim())
}

// 提取从 openIdx(指向 '(')开始的平衡括号内部内容(不含最外层括号)。
function extractBalancedParens(src, openIdx) {
  if (src[openIdx] !== '(') return null
  let depth = 0
  for (let i = openIdx; i < src.length; i++) {
    const ch = src[i]
    if (ch === '(') depth++
    else if (ch === ')') {
      depth--
      if (depth === 0) return src.slice(openIdx + 1, i)
    }
  }
  return null
}

// 在顶层(括号深度 0)按逗号切分 DDL body,保护 varchar(100) / decimal(10,2) 内的逗号。
function splitTopLevelCommas(body) {
  const out = []
  let depth = 0
  let buf = ''
  for (let i = 0; i < body.length; i++) {
    const ch = body[i]
    if (ch === '(') { depth++; buf += ch }
    else if (ch === ')') { depth--; buf += ch }
    else if (ch === ',' && depth === 0) { out.push(buf); buf = '' }
    else buf += ch
  }
  if (buf.trim()) out.push(buf)
  return out
}

// 类型相等比较:大小写不敏感、忽略空白。
function typesEqual(a, b) {
  const norm = (x) => String(x).toLowerCase().replace(/\s+/g, '')
  return norm(a) === norm(b)
}

// ── 报告(供 CLI 与外部复用)────────────────────────────────────
export function formatDiff(diff) {
  const out = []
  if (diff.missingTables.length) {
    out.push('=== 维度1 表集合:docs/03 有但 DDL 无 ===')
    for (const t of diff.missingTables) out.push(`  - ${t}`)
  }
  if (diff.extraTables.length) {
    out.push('=== 维度1 表集合:DDL 有但 docs/03 无 ===')
    for (const t of diff.extraTables) out.push(`  - ${t}`)
  }
  if (diff.columnMismatches.length) {
    out.push('=== 维度2 列名 ===')
    for (const m of diff.columnMismatches) {
      out.push(`  - ${m.table}.${m.column} 仅在 ${m.side === 'docs' ? 'docs/03' : 'DDL'}`)
    }
  }
  if (diff.typeMismatches.length) {
    out.push('=== 维度3 列类型 ===')
    for (const m of diff.typeMismatches) {
      out.push(`  - ${m.table}.${m.column}: docs/03=${m.docsType} ≠ DDL=${m.ddlType}`)
    }
  }
  if (diff.indexMismatches.length) {
    out.push('=== 维度4 索引 ===')
    for (const m of diff.indexMismatches) {
      out.push(`  - ${m.table} 索引 ${m.index} 仅在 ${m.side === 'docs' ? 'docs/03' : 'DDL'}`)
    }
  }
  if (diff.foreignKeyMismatches.length) {
    out.push('=== 维度5 外键 ===')
    for (const m of diff.foreignKeyMismatches) {
      out.push(`  - ${m.table} 外键 ${m.foreignKey} 仅在 ${m.side === 'docs' ? 'docs/03' : 'DDL'}`)
    }
  }
  return out.join('\n')
}

// ── CLI 入口 ─────────────────────────────────────────────────────
// 用 pathToFileURL 做比较:路径含空格/非 ASCII 时 import.meta.url 是百分号编码,
// 而 process.argv[1] 是原始路径,直接 `file://${argv1}` 拼接永远不相等。
const { pathToFileURL } = await import('node:url')
const isCliEntry = process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href
if (isCliEntry) {
  const { readFileSync, existsSync } = await import('node:fs')
  const [docsPath, ddlPath] = process.argv.slice(2)
  if (!docsPath || !ddlPath) {
    console.error('用法: node lib/validate-ddl.mjs <docs/03 path> <V1.sql path>')
    process.exit(2)
  }
  if (!existsSync(docsPath)) { console.error(`validate-ddl: docs 不存在: ${docsPath}`); process.exit(2) }
  if (!existsSync(ddlPath)) { console.error(`validate-ddl: DDL 不存在: ${ddlPath}`); process.exit(2) }

  const docsTables = parseDocsTables(readFileSync(docsPath, 'utf8'))
  const ddlTables = parseDDL(readFileSync(ddlPath, 'utf8'))
  const diff = diffSchema(docsTables, ddlTables)

  if (diff.hasDiff) {
    console.error(formatDiff(diff))
    process.exit(1)
  }
  console.log('validate-ddl: ✓ docs/03 与 DDL 在 5 维(表/列/类型/索引/外键)一致')
  process.exit(0)
}