From 6e7c5a93c6bf8ac2b0d84ecd1c5d8070650fe788 Mon Sep 17 00:00:00 2001 From: ulif <uli@gnufix.de> Date: Sat, 25 Jul 2015 14:23:17 +0200 Subject: [PATCH] Add regexp for numbered wlist entries. We need this regular expression to strip entries in wordlists like the original 7776-terms wordlist from diceware.com. --- diceware/wordlist.py | 3 +++ tests/test_wordlist.py | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/diceware/wordlist.py b/diceware/wordlist.py index 77caee0..4347c33 100644 --- a/diceware/wordlist.py +++ b/diceware/wordlist.py @@ -26,6 +26,9 @@ WORDLISTS_DIR = os.path.abspath( #: allow names that cannot easily mess up filesystems. RE_WORDLIST_NAME = re.compile('^[a-zA-Z0-9_-]+$') +#: A regular expression matching numbered entries in wordlists. +RE_NUMBERED_WORDLIST_ENTRY = re.compile('^[0-9]+\s+([^\s]+)$') + def get_wordlist_names(): """Get a all names of wordlists stored locally. diff --git a/tests/test_wordlist.py b/tests/test_wordlist.py index 1815b38..d9fd16f 100644 --- a/tests/test_wordlist.py +++ b/tests/test_wordlist.py @@ -1,8 +1,9 @@ import os import pytest from diceware.wordlist import ( - WORDLISTS_DIR, RE_WORDLIST_NAME, get_wordlist, get_signed_wordlist, - get_wordlist_path, get_wordlist_names, is_signed_wordlist, + WORDLISTS_DIR, RE_WORDLIST_NAME, RE_NUMBERED_WORDLIST_ENTRY, get_wordlist, + get_signed_wordlist, get_wordlist_path, get_wordlist_names, + is_signed_wordlist, ) @@ -81,6 +82,16 @@ class TestWordlistModule(object): assert RE_WORDLIST_NAME.match('with.dot') is None assert RE_WORDLIST_NAME.match('with/slash') is None + def test_re_numbered_wordlist_entry(self): + assert RE_NUMBERED_WORDLIST_ENTRY.match('11111 a') is not None + assert RE_NUMBERED_WORDLIST_ENTRY.match( + '11111 a').groups() == ('a', ) + assert RE_NUMBERED_WORDLIST_ENTRY.match('12211\t 1') is not None + assert RE_NUMBERED_WORDLIST_ENTRY.match( + '12211\t 1').groups() == ('1', ) + assert RE_NUMBERED_WORDLIST_ENTRY.match('12a11 foo') is None + assert RE_NUMBERED_WORDLIST_ENTRY.match('foo bar') is None + def test_get_wordlist_path(self): # we can get valid wordlist paths assert os.path.exists(get_wordlist_path('en_8k')) -- GitLab