Bradley Kirton's Blog

Published on March 29, 2024

Go home

Token highlighting for web search

This snippet wraps the tokens of the provided text in span tags and applies the font-bold tailwind class to matched tokens.

import dataclasses
import re

from django.utils.html import format_html


@dataclasses.dataclass
class Token:
    """
    Models a token.
    """

    token_type: str
    value: str


def highlight_matched_tokens(text: str, matches: set[str]) -> str:
    """
    Apply highlighting to the matches found within the provided text.

    :param text: A string of text to be highlighted.
    :param matches: A set of tokens to highlight.
    :returns: The text with the appropriate styling applied.
    """

    patterns = []
    for match in matches:
        pattern = (re.escape(match), lambda _, value: Token("match", value))
        patterns.append(pattern)

    patterns += [
        (r"\s+", lambda _, value: Token("whitespace", value)),
        (r"\S+", lambda _, value: Token("nonmatch", value)),
    ]

    scanner = re.Scanner(patterns, re.UNICODE + re.IGNORECASE)  # type: ignore
    tokens, _ = scanner.scan(text)

    formatted_text = ""
    for token in tokens:
        if token.token_type in ("whitespace", "nonmatch"):
            formatted_text += f"<span>{token.value}</span>"
        else:
            formatted_text += f'<span class="font-bold">{token.value}</span>'

    return format_html(formatted_text)