From 047d358b89a6c15a9d25a3428303717e59ff3bac Mon Sep 17 00:00:00 2001 From: zichun Date: Sat, 9 May 2026 16:43:53 +0800 Subject: [PATCH] search: keep code identifiers whole (drop CamelCase splitter) --- en/mkdocs.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/en/mkdocs.yml b/en/mkdocs.yml index bbff20b..b355804 100644 --- a/en/mkdocs.yml +++ b/en/mkdocs.yml @@ -36,13 +36,17 @@ theme: icon: material/brightness-4 name: Switch to light mode -# CJK-aware search: regex separator includes word boundaries plus CJK punctuation; -# for true Chinese tokenization, jieba is invoked by the catalog generator at index time +# Search separator: whitespace + common punctuation + dots + HTML entities + CJK punctuation. +# CamelCase splitter removed \u2014 code-identifier searches like "BusinessBaseServiceImpl" or +# "MyBatis" now match the whole identifier instead of being chopped into [Business, Base, +# Service, Impl] (which produced 1.9k spurious matches and lost the ranked exact hit). +# Lunr supports wildcard suffixes (e.g. `Service*`) for partial-token search if needed. +# For true Chinese tokenization, jieba is invoked by the catalog generator at index time # (see scripts/gen_catalog.py). Mid-term improvement: a custom mkdocs plugin to feed # jieba-segmented terms into lunr. plugins: - search: - separator: '[\s\-,;:!=\[\]()"`/]+|(?!\b)(?=[A-Z][a-z])|\.(?!\d)|&[lg]t;|[\u3000-\u303f\uff00-\uffef]' + separator: '[\s\-,;:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|[\u3000-\u303f\uff00-\uffef]' markdown_extensions: - admonition -- libgit2 0.22.2