Newer
Older
#: The directory in which wordlists are stored
SRC_DIR = os.path.dirname(__file__)
#: A regular expression matching ASCII chars
RE_ASCII_CHARS = re.compile('^[a-zA-Z]{2}$')
def get_wordlist(path):
"""Parse file at `path` and build a word list of it.
A wordlist is expected to contain lines of format::
<NUMS><TAB><WORD>\n
for instance::
136512\tTerm
"""
result = []
with open(path, 'r') as fd:
for line in fd.readlines():
if not '\t' in line:
continue
term = line.split('\t')[1].strip()
if term != '': # do not accept empty strings
result.append(term)
def get_wordlist_path(lang):
"""Get path to a wordlist file for language `lang`.
The `lang` string is a 2-char country code. Invalid codes raise a
ValueError.
"""
basename = 'wordlist_%s.asc' % lang
if not RE_ASCII_CHARS.match(lang):
raise ValueError("Not a valid language code: %s" % lang)
return os.path.abspath(os.path.join(SRC_DIR, basename.lower()))