From 047d358b89a6c15a9d25a3428303717e59ff3bac Mon Sep 17 00:00:00 2001
From: zichun <zhuzichunhogan@gmail.com>
Date: Sat, 9 May 2026 16:43:53 +0800
Subject: [PATCH] search: keep code identifiers whole (drop CamelCase splitter)

---
 en/mkdocs.yml | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/en/mkdocs.yml b/en/mkdocs.yml
index bbff20b..b355804 100644
--- a/en/mkdocs.yml
+++ b/en/mkdocs.yml
@@ -36,13 +36,17 @@ theme:
         icon: material/brightness-4
         name: Switch to light mode
 
-# CJK-aware search: regex separator includes word boundaries plus CJK punctuation;
-# for true Chinese tokenization, jieba is invoked by the catalog generator at index time
+# Search separator: whitespace + common punctuation + dots + HTML entities + CJK punctuation.
+# CamelCase splitter removed \u2014 code-identifier searches like "BusinessBaseServiceImpl" or
+# "MyBatis" now match the whole identifier instead of being chopped into [Business, Base,
+# Service, Impl] (which produced 1.9k spurious matches and lost the ranked exact hit).
+# Lunr supports wildcard suffixes (e.g. `Service*`) for partial-token search if needed.
+# For true Chinese tokenization, jieba is invoked by the catalog generator at index time
 # (see scripts/gen_catalog.py). Mid-term improvement: a custom mkdocs plugin to feed
 # jieba-segmented terms into lunr.
 plugins:
   - search:
-      separator: '[\s\-,;:!=\[\]()"`/]+|(?!\b)(?=[A-Z][a-z])|\.(?!\d)|&[lg]t;|[\u3000-\u303f\uff00-\uffef]'
+      separator: '[\s\-,;:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|[\u3000-\u303f\uff00-\uffef]'
 
 markdown_extensions:
   - admonition
--
libgit2 0.22.2