In [ ]:
import itertools
from collections import Counter

import editdistance
import nltk

corpus = list(nltk.corpus.brown.words())
unigrams = corpus
bigrams = list(nltk.bigrams(corpus))

unigram_lm = Counter(unigrams)
for k in unigram_lm:
    unigram_lm[k] /= len(unigrams)

bigram_lm = Counter(bigrams)
for k in bigram_lm:
    bigram_lm[k] /= len(bigrams)

dictionary = set(corpus)

lm_lambda = 0.001

def error_model(query, word):
    return 1 - editdistance.eval(query, word) / max(len(query), len(word))

def language_model(words):
    if len(words) == 1:
        return unigram_lm[words[0]]
    else:
        prob = 1
        for i in range(len(words) - 1):
            prob *= (bigram_lm[words[i], words[i + 1]] * 0.5 + unigram_lm[words[i]] * 0.5)
        prob *= unigram_lm[words[len(words) - 1]]
        return prob

def score_sent(sent):
    score = 1
    for i in range(len(sent)):
        word, error_score = sent[i]
        score *= error_score
    score *= language_model([word for word, s in sent]) ** lm_lambda
    return score

while True:
    query = input('\nquery: ').split()

    candidate_words = []
    for i in range(len(query)):
        word_and_scores = [(word, error_model(query[i], word)) for word in dictionary]
        top_words = sorted(word_and_scores, key=lambda x: x[1], reverse=True)[:10]
        candidate_words.append(top_words)

    print('\ncandidate words:')
    for c in candidate_words:
        print(c)

    candidate_sentences = list(itertools.product(*candidate_words))
    sents_with_scores = [(sent, score_sent(sent)) for sent in candidate_sentences]

    print('\ndid you mean...')
    for sent, score in sorted(sents_with_scores, key=lambda x: x[1], reverse=True)[:10]:
        print(score, '\t', ' '.join([word for word, score in sent]))