Password strength meter

XKCD-compatible password strength meter in Python. Tr0ub4dor&3 has approx 28 bits of entropy and correcthorsebatterystaple has approx 44.

Last modified
Lines 234

Parent directory Download CGIread sitemap Main page

Quick links: __missing__ get_transformations get_word_list listdict main measure measure_word

  1. #!/usr/bin/python
  2. import math
  3. import string
  4. import sys
  5. def get_transformations():
  6.     '''
  7.     Return two dictionaries for substring substitution: reverse, forward
  8.     
  9.     `reverse` will map passwordish input to possible lowercase outputs.
  10.     `forward` will map lowercase input to possible passwordish outputs.
  11.     
  12.     Ex:
  13.         reverse['$'] -> ['s']
  14.         forward['s'] -> ['S', 's', 'Z', 'z', '$']
  15.     '''
  16.     class listdict(dict):
  17.         def __missing__(self, key):
  18.             self[key] = []
  19.             return self[key]
  20.     # Dictionary of substring subsition 
  21.     transformations = listdict({
  22.         "ate": ["8",],
  23.         "for": ["4",],
  24.         "to": ["2",],
  25.         "too": ["2",],
  26.         "a": ["4", "@", "/-\\", "^",],
  27.         "c": ["k",],
  28.         "e": ["3",],
  29.         "i": ["1", "l", "|",],
  30.         "l": ["1", "l", "|",],
  31.         "o": ["0",],
  32.         "s": ["$", "z", "Z", "5",],
  33.         "t": ["7",],
  34.         "z": ["s", "S", "2",],
  35.     })
  36.     for key in range(ord("a"), ord("z")+1):
  37.         transformations[chr(key)].append(chr(key))
  38.         transformations[chr(key)].append(chr(key).upper())
  39.     reverse_lookup = listdict()
  40.     for key in transformations:
  41.         for value in transformations[key]:
  42.             reverse_lookup[value].append(key)
  43.     return dict(reverse_lookup), dict(transformations)
  44. def get_word_list():
  45.     '''
  46.     Return a list of lowercase English words that are long enough.
  47.     '''
  48.     s = open('/usr/share/dict/words').read() + 'troubador'
  49.     words = set()
  50.     for line in s.split('\n'):
  51.         if len(line) > 3 and "'" not in line:
  52.             words.add(line.lower())
  53.     return words
  54. def measure_word(password, comb=1, word_list=None):
  55.     '''
  56.     Decide if `password` begins with an English word.
  57.     
  58.     Return: None
  59.         Password does not begin with an English word.
  60.         
  61.     Return: bits, remaining_password
  62.         Password does begin with an English word.
  63.         `bits` is the entropy of the measuered part of the password.
  64.         `remaining_password` is the unmeasured part (after the word).
  65.     '''
  66.     BITS_PER_CHAR = 1.9
  67.     BONUS_BITS = 1.0
  68.     
  69.     if word_list is None:
  70.         word_list = get_word_list()
  71.     
  72.     candidate = None
  73.     for key in sorted(reverse, key=lambda x: len(x), reverse=True):
  74.         if password.startswith(key):
  75.             for value in reverse[key]:
  76.                 new_word_list = []
  77.                 for word in word_list:
  78.                     if word == value:
  79.                         # Must set to variable
  80.                         # Ex.  "hello" would otherwise match "hell"
  81.                         # because "l" is in the word list, but so is "lo" which does
  82.                         # not match "l".
  83.                         # Set the variable but keep searching. In the next level,
  84.                         # "o" is in the word list and that's the time to return.
  85.                         candidate = math.log(comb * 2**BONUS_BITS, 2), password[len(key):]
  86.                     if word.startswith(value):
  87.                         new_word_list.append(word[len(value):])
  88.                 if new_word_list:
  89.                     if key == value:
  90.                         comb_factor = 1
  91.                     else:
  92.                         comb_factor = len(forward[value])
  93.                     buf = measure_word(
  94.                         password[len(key):],
  95.                         comb * comb_factor * 2**BITS_PER_CHAR,
  96.                         new_word_list
  97.                     )
  98.                     if buf is not None:
  99.                         return buf
  100.     else:
  101.         # No better match has been found.
  102.         return candidate
  103. def measure(password):
  104.     '''
  105.     Return how many bits of entropy there are in the password.
  106.     '''
  107.     MAGIC_FACTOR = 0.6
  108.     bits = 0
  109.     misc_chars = ''
  110.     words_cache = {}
  111.     chars_cache = set()
  112.     while password:
  113.         # Check if it is known to not be a word.
  114.         for char in chars_cache:
  115.             if password[0] == char:
  116.                 password = password[1:]
  117.                 misc_chars += char
  118.                 break
  119.         else:
  120.             # Check if it is known to be a word.
  121.             for word in words_cache:
  122.                 if password.startswith(word):
  123.                     password = password[len(word):]
  124.                     words_cache[word] += 1
  125.                     break
  126.             else:
  127.                 # Not cached, measure word.
  128.                 tmp = measure_word(password)
  129.                 if tmp is not None:
  130.                     wordlen = len(password) - len(tmp[1])
  131.                     word = password[:wordlen]
  132.                     words_cache[word] = 1
  133.                     # First instance of word.
  134.                     bits += tmp[0]
  135.                     password = tmp[1]
  136.                 else:
  137.                     # Not a word.
  138.                     misc_chars += password[0]
  139.                     chars_cache.add(password[0])
  140.                     password = password[1:]
  141.     # Repeated words.
  142.     for key in words_cache:
  143.         bits += math.log(words_cache[key], 2)
  144.     # Deal with misc_chars.
  145.     if misc_chars:
  146.         types = {
  147.             "upper": string.uppercase,
  148.             "lower": string.lowercase,
  149.             "digits": string.digits,
  150.             "punctuation": string.punctuation,
  151.         }
  152.         # 26 + 26 + 10 + something
  153.         expected_set = 0
  154.         for key in types:
  155.             value = filter(lambda x: x in types[key], misc_chars)
  156.             if value:
  157.                 expected_set += len(types[key])
  158.         if expected_set == 0:
  159.             expected_set = 128
  160.         # Decide whether the miscellaneous characters are random or not.
  161.         poor = False
  162.         if len(misc_chars) > expected_set:
  163.             for ch in set(misc_chars):
  164.                 if float(misc_chars.count(ch))/len(misc_chars) > 2.0/expected_set:
  165.                     poor = True
  166.                     break
  167.         else:
  168.             if float(len(misc_chars))/len(set(misc_chars)) > 2.0:
  169.                 poor = True
  170.         if poor:
  171.             bits += (
  172.                 # Repetitive
  173.                 math.log(len(misc_chars))
  174.                 # Predictable set
  175.                 * math.log(len(set(misc_chars)), 2)
  176.                 # xkcd factor for "Tr0ub4dor&3"
  177.                 * MAGIC_FACTOR
  178.                 # Some credit for at least trying
  179.                 + 4
  180.             )
  181.         else:
  182.             bits += len(misc_chars) * math.log(expected_set, 2) * MAGIC_FACTOR
  183.     return bits
  184. def main():
  185.     messages = {
  186.         10: "Are you kidding me?",
  187.         20: "Vulnerable",
  188.         30: "Can't you do better than that?",
  189.         40: "Almost good enough",
  190.         50: "Good enough",
  191.         60: "Good",
  192.         70: "Very good",
  193.         80: "Secure",
  194.         90: "Awesome",
  195.     }
  196.     try:
  197.         while True:
  198.             sys.stderr.write('\nMeasure password strength: ')
  199.             sys.stderr.flush()
  200.             
  201.             line = sys.stdin.readline()
  202.             if not line:
  203.                 break
  204.             bits = measure(line.rstrip('\n'))
  205.             
  206.             sys.stdout.write('~{} bits: '.format(int(bits + 0.5)))
  207.             for level in sorted(messages):
  208.                 if level > bits:
  209.                     sys.stdout.write(messages[level] + '\n')
  210.                     break
  211.             else:
  212.                 sys.stdout.write('Speachless\n\n')
  213.     except KeyboardInterrupt:
  214.         pass
  215.     sys.stderr.write('\n')
  216. reverse, forward = get_transformations()
  217. if __name__ == '__main__':
  218.     main()