XHTML minify

This simple XHTML minifier removes only indentation and comments. The reason I wrote this is that the other HTML minifiers caused validation errors.

Last modified
Lines 100

Parent directory Download CGIread sitemap Main page

Quick links: engine

  1. #!/usr/bin/python
  2. '''Minify XHTML
  3. Simple filter.
  4. This simple XHTML minifier removes only indentation and comments. The reason
  5. I wrote this is that the other HTML minifiers caused validation errors.
  6. '''
  7. #import sys
  8. no_minify = [
  9.     ('<pre', '</pre>')          # Start/stop patterns.
  10. ]
  11. def engine(get_input, set_output):
  12.     '''get_input is a function that returns a string (one line) with a
  13.     trailing newline.
  14.     set_output is a function that takes one argument: string.
  15.     
  16.     Returns when get_input returns empty string.
  17.     '''
  18.     nop_mode = False
  19.     comment = False
  20.     in_str = get_input()
  21.     if not in_str:
  22.         return
  23.     #Comment
  24.     #   Leave?
  25.     #No comment
  26.     #   Does a comment begin on this line?
  27.     #   nop_mode
  28.     #           Leave nop_mode?
  29.     #   normal mode
  30.     #           Enter nop_mode?
  31.     #   Did a comment begin on this line?
  32.     while True:
  33.         # Get input it before the loop and in the end, simplifies
  34.         # nop_mode changes.
  35.         if comment:
  36.             # Leave comment mode?
  37.             ptr = in_str.find('-->')
  38.             if ptr >= 0:
  39.                 in_str = in_str[ptr + 3:]
  40.                 comment = False
  41.                 continue
  42.         else:
  43.             # Does a comment begin on this line?
  44.             comment_ptr = in_str.find('<!--')
  45.             if comment_ptr >= 0:
  46.                 comment = True
  47.                 in_str,comment_str = in_str[:comment_ptr],in_str[comment_ptr:]
  48.             if nop_mode:
  49.                 # Input must not be modified.
  50.                 
  51.                 # Get back to normal mode?
  52.                 ptrs = filter(lambda x: x >= 0,
  53.                             [in_str.find(stop) for start, stop in no_minify])
  54.                 if not len(ptrs):
  55.                     set_output(in_str)
  56.                 else:
  57.                     set_output(in_str[:min(ptrs)])
  58.                     in_str = in_str[min(ptrs):]
  59.                     nop_mode = False
  60.                     # Make it believe it came from a new line.
  61.                     continue
  62.             else:
  63.                 # Preserve the newline and remove all leading whitespace.
  64.                 
  65.                 # Get into NOP mode?
  66.                 ptrs = filter(lambda x: x >= 0,
  67.                             [in_str.find(start) for start, stop in no_minify])
  68.                 if not len(ptrs):
  69.                     for ptr, c in enumerate(in_str):
  70.                         if c not in ' \t':
  71.                             set_output(in_str[ptr:])
  72.                             break
  73.                 else:
  74.                     for ptr, c in enumerate(in_str):
  75.                         if c not in ' \t':
  76.                             set_output(in_str[ptr:min(ptrs)])
  77.                             break
  78.                     in_str = in_str[min(ptrs):]
  79.                     nop_mode = True
  80.                     continue
  81.             # Did a comment begin on this line?
  82.             if comment:
  83.                 # Loop in comment mode using the rest of the string instead
  84.                 # of a new line.
  85.                 in_str = comment_str
  86.                 continue
  87.         in_str = get_input()
  88.         if not in_str:
  89.             break
  90. if __name__ == '__main__':
  91.     import sys
  92.     engine(sys.stdin.readline, sys.stdout.write)