Source code for cardbuilder.input.word

from enum import Enum
from typing import Optional, List, Iterable

from cardbuilder.common import Language
from cardbuilder.common.util import Shared
from cardbuilder.exceptions import CardBuilderUsageException


class WordForm(Enum):
    PHONETICALLY_EQUIVALENT = 1
    LEMMA = 2


[docs]class Word: """ The class representing words we want to build flashcards for. One word can correspond to multiple string forms of the same word, all of which can be used for lookup. """ form_map = { WordForm.PHONETICALLY_EQUIVALENT: { Language.ENGLISH: lambda input_form: input_form.lower(), #TODO: false advertising (what if it's uppercase in the dictionary?) Language.JAPANESE: lambda input_form: ''.join(x['hira'] for x in Shared.get_kakasi().convert(input_form)) }, WordForm.LEMMA: { Language.ENGLISH: lambda input_form: Shared.get_spacy(Language.ENGLISH)(input_form)[0].lemma_, Language.JAPANESE: lambda input_form: Shared.get_spacy(Language.JAPANESE)(input_form)[0].lemma_, } } def __init__(self, input_form: str, lang: Language, additional_forms: Optional[List[WordForm]] = None): """ Args: input_form: the original form of the word found in user input or a WordList lang: the language of the word. additional_forms: the types of other forms this word should include. """ self.input_form = input_form self.lang = lang if additional_forms is not None: self.additional_forms = additional_forms else: self.additional_forms = [] self._formset = [self.input_form] # instantiate a list to preserve order, but use it like a set for form in self.additional_forms: if self.lang not in self.form_map[form]: raise CardBuilderUsageException('Unsupported form {} for language {}'.format(form.name, self.lang)) self._formset.append(self.form_map[form][self.lang](self.input_form)) def __contains__(self, form: str) -> bool: """ Args: form: a string representing a concrete word form. Returns: whether or not the given form is an applicable form of this word. """ return form in self._formset def __iter__(self) -> Iterable: """ Yields: a string representing each applicable form of the word. """ return iter(self._formset) def __repr__(self): return '<Word: {}>'.format(str(self)) def __str__(self): return self.input_form