Initial commit
This commit is contained in:
19
folkugat_web/services/ngrams.py
Normal file
19
folkugat_web/services/ngrams.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import operator
|
||||
|
||||
from folkugat_web.config import search as config
|
||||
from folkugat_web.utils import groupby
|
||||
|
||||
|
||||
def get_all_ngrams(text):
|
||||
return [(m, text[i:i+m])
|
||||
for m in range(config.MIN_NGRAM_LENGTH, len(text) + 1)
|
||||
for i in range(len(text) - m + 1)
|
||||
if m > 0]
|
||||
|
||||
def get_text_ngrams(*texts):
|
||||
texts = [word.lower() for text in texts for word in text.split()]
|
||||
word_ngrams = [ngram for ngrams in map(get_all_ngrams, texts) for ngram in ngrams]
|
||||
result = dict(groupby(word_ngrams,
|
||||
key_fn=operator.itemgetter(0),
|
||||
group_fn=lambda gr: list(set(map(operator.itemgetter(1), gr)))))
|
||||
return result
|
||||
Reference in New Issue
Block a user