from folkugat_web.config import search as config from folkugat_web.model.search import NGrams from folkugat_web.utils import groupby def get_all_ngrams(text: str) -> list[tuple[int, str]]: return [(m, text[i:i+m]) for m in range(config.MIN_NGRAM_LENGTH, len(text) + 1) for i in range(len(text) - m + 1) if m > 0] def get_text_ngrams(*texts: str) -> NGrams: lower_texts = [word.lower() for text in texts for word in text.split()] word_ngrams = [ngram for ngrams in map(get_all_ngrams, lower_texts) for ngram in ngrams] result = dict(groupby( word_ngrams, key_fn=lambda x: x[0], group_fn=lambda gr: list(set(map(lambda x: x[1], gr))), )) return result