Rename RE_LANG_CODE to RE_WORDLIST_NAME.

We will drop the assumption that wordlists are generally organized by language. There are, for instance, many different language lists available in english for many different purposes. Therefore we will support different names for different wordlists, but the names do not neccessary depend on a language (only).

Rename RE_LANG_CODE to RE_WORDLIST_NAME.
3eb7f54c · ulif · 25faa4dd · 3eb7f54c · 3eb7f54c
Commit 3eb7f54c authored 9 years ago by ulif
--- a/diceware/__init__.py
+++ b/diceware/__init__.py
@@ -28,9 +28,9 @@ __version__ = pkg_resources.get_distribution('diceware').version
 WORDLISTS_DIR = os.path.abspath(
    os.path.join(os.path.dirname(__file__), 'wordlists'))

-#: A regular expression matching 2 consecutive ASCII chars. We
-#: consider this to represent some language/country code.
-RE_LANG_CODE = re.compile('^[a-zA-Z]{2}$')
+#: A regular expression matching allowed wordlist names. We
+#: allow names that cannot easily mess up filesystems.
+RE_WORDLIST_NAME = re.compile('^[a-zA-Z0-9_-]+$')

 #: Special chars inserted on demand
 SPECIAL_CHARS = r"~!#$%^&*()-=+[]\{}:;" + r'"' + r"'<>?/0123456789"
@@ -148,7 +148,7 @@ def get_wordlist_path(lang):
    The `lang` string is a 2-char country code. Invalid codes raise a
    ValueError.
    """
-    if not RE_LANG_CODE.match(lang):
+    if not RE_WORDLIST_NAME.match(lang):
        raise ValueError("Not a valid language code: %s" % lang)
    basename = 'wordlist_%s.txt' % lang
    return os.path.join(WORDLISTS_DIR, basename.lower())

--- a/tests/test_diceware.py
+++ b/tests/test_diceware.py
@@ -5,7 +5,7 @@ import pytest
 import sys
 from io import StringIO
 from diceware import (
-    WORDLISTS_DIR, RE_LANG_CODE, SPECIAL_CHARS, get_wordlist,
+    WORDLISTS_DIR, RE_WORDLIST_NAME, SPECIAL_CHARS, get_wordlist,
    get_wordlist_path, insert_special_char, get_passphrase,
    handle_options, main, __version__, print_version, get_random_sources,
    )
@@ -73,17 +73,22 @@ class Test_GetWordList(object):

 class TestDicewareModule(object):

-    def test_re_lang_code(self):
-        # RE_LANG_CODE really works
+    def test_re_wordlist_name(self):
+        # RE_WORDLIST_NAME really works
        # valid stuff
-        assert RE_LANG_CODE.match('de') is not None
-        assert RE_LANG_CODE.match('DE') is not None
-        assert RE_LANG_CODE.match('vb') is not None
+        assert RE_WORDLIST_NAME.match('de') is not None
+        assert RE_WORDLIST_NAME.match('DE') is not None
+        assert RE_WORDLIST_NAME.match('vb') is not None
+        assert RE_WORDLIST_NAME.match('8k') is not None
+        assert RE_WORDLIST_NAME.match('original') is not None
+        assert RE_WORDLIST_NAME.match('with_underscore') is not None
+        assert RE_WORDLIST_NAME.match('u') is not None
        # invalid stuff
-        assert RE_LANG_CODE.match('de_DE') is None
-        assert RE_LANG_CODE.match('u1') is None
-        assert RE_LANG_CODE.match('u') is None
-        assert RE_LANG_CODE.match('dea') is None
+        assert RE_WORDLIST_NAME.match('with space') is None
+        assert RE_WORDLIST_NAME.match('"with quotation marks"') is None
+        assert RE_WORDLIST_NAME.match("'with quotation marks'") is None
+        assert RE_WORDLIST_NAME.match('with.dot') is None
+        assert RE_WORDLIST_NAME.match('with/slash') is None

    def test_get_random_sources(self):
        # we can get a dict of random sources registered as entry_points.