diff --git a/en/mkdocs.yml b/en/mkdocs.yml index bbff20b..b355804 100644 --- a/en/mkdocs.yml +++ b/en/mkdocs.yml @@ -36,13 +36,17 @@ theme: icon: material/brightness-4 name: Switch to light mode -# CJK-aware search: regex separator includes word boundaries plus CJK punctuation; -# for true Chinese tokenization, jieba is invoked by the catalog generator at index time +# Search separator: whitespace + common punctuation + dots + HTML entities + CJK punctuation. +# CamelCase splitter removed \u2014 code-identifier searches like "BusinessBaseServiceImpl" or +# "MyBatis" now match the whole identifier instead of being chopped into [Business, Base, +# Service, Impl] (which produced 1.9k spurious matches and lost the ranked exact hit). +# Lunr supports wildcard suffixes (e.g. `Service*`) for partial-token search if needed. +# For true Chinese tokenization, jieba is invoked by the catalog generator at index time # (see scripts/gen_catalog.py). Mid-term improvement: a custom mkdocs plugin to feed # jieba-segmented terms into lunr. plugins: - search: - separator: '[\s\-,;:!=\[\]()"`/]+|(?!\b)(?=[A-Z][a-z])|\.(?!\d)|&[lg]t;|[\u3000-\u303f\uff00-\uffef]' + separator: '[\s\-,;:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|[\u3000-\u303f\uff00-\uffef]' markdown_extensions: - admonition