From 2358a2bae446328adfcec0d4a1668fa6844e703a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrico=20J=C3=B6rns?= Date: Tue, 20 May 2025 11:45:14 +0200 Subject: [PATCH] conf.py: tweak SearchEnglish to be hyphen-friendly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This modifies the default indexer split() and js splitQuery() methods to support searching for words with hyphens. While this might not be an ideal, rock solid, and fully future-proof solution, it allows at least to search for strings inlcuding hyphens, such as 'bitbake-layers', 'send-error-report', or 'oe-core'. Below is a bit more detailed explanation of the two modifications done: 1) The default split regex in the sphinx-doc SearchLanguage base class is: | _word_re = re.compile(r'\w+') which we simply extend to include hyphens '-'. This will result in a searchindex.js that contains words with hyphens, too. 2) The 'searchtool.js' code notes for its splitQuery() implementation: | /** | * Default splitQuery function. Can be overridden in ``sphinx.search`` with a | * custom function per language. | * | * The regular expression works by splitting the string on consecutive characters | * that are not Unicode letters, numbers, underscores, or emoji characters. | * This is the same as ``\W+`` in Python, preserving the surrogate pair area. | */ | if (typeof splitQuery === "undefined") { | var splitQuery = (query) => query | .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) | .filter(term => term) // remove remaining empty strings | } The hook for this is documented in the sphinx-docs 'SearchLanguage' base class. | .. attribute:: js_splitter_code | | Return splitter function of JavaScript version. The function should be | named as ``splitQuery``. And it should take a string and return list of | strings. | | .. versionadded:: 3.0 We use this to define a simplified splitQuery() function with a split argument that splits on empty spaces only. We extend SearchEnglish (which extends SearchLanguage) here to retain the stemmer code and stopwords for English. [YOCTO #14534] (From yocto-docs rev: d4a98ee19e0cbd6be96923dc72faee143a6b294b) Signed-off-by: Enrico Jörns Signed-off-by: Antonin Godard Signed-off-by: Richard Purdie --- documentation/conf.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/documentation/conf.py b/documentation/conf.py index 2aceeb8e79..ad60d91139 100644 --- a/documentation/conf.py +++ b/documentation/conf.py @@ -13,6 +13,7 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # import os +import re import sys import datetime try: @@ -173,6 +174,24 @@ latex_elements = { 'preamble': '\\usepackage[UTF8]{ctex}\n\\setcounter{tocdepth}{2}', } + +from sphinx.search import SearchEnglish +from sphinx.search import languages +class DashFriendlySearchEnglish(SearchEnglish): + + # Accept words that can include hyphens + _word_re = re.compile(r'[\w\-]+') + + js_splitter_code = """ +function splitQuery(query) { + return query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}-]+/gu) + .filter(term => term.length > 0); +} +""" + +languages['en'] = DashFriendlySearchEnglish + # Make the EPUB builder prefer PNG to SVG because of issues rendering Inkscape SVG from sphinx.builders.epub3 import Epub3Builder Epub3Builder.supported_image_types = ['image/png', 'image/gif', 'image/jpeg']