XHTML minify
This simple XHTML minifier removes only indentation and comments. The reason I wrote this is that the other HTML minifiers caused validation errors.
Last modified | |
Lines | 100 |
Parent directory Download CGIread sitemap Main page
Quick links: engine
#!/usr/bin/python
'''Minify XHTML
Simple filter.
This simple XHTML minifier removes only indentation and comments. The reason
I wrote this is that the other HTML minifiers caused validation errors.
'''
#import sys
no_minify = [
('<pre', '</pre>') # Start/stop patterns.
]
def engine(get_input, set_output):
'''get_input is a function that returns a string (one line) with a
trailing newline.
set_output is a function that takes one argument: string.
Returns when get_input returns empty string.
'''
nop_mode = False
comment = False
in_str = get_input()
if not in_str:
return
#Comment
# Leave?
#No comment
# Does a comment begin on this line?
# nop_mode
# Leave nop_mode?
# normal mode
# Enter nop_mode?
# Did a comment begin on this line?
while True:
# Get input it before the loop and in the end, simplifies
# nop_mode changes.
if comment:
# Leave comment mode?
ptr = in_str.find('-->')
if ptr >= 0:
in_str = in_str[ptr + 3:]
comment = False
continue
else:
# Does a comment begin on this line?
comment_ptr = in_str.find('<!--')
if comment_ptr >= 0:
comment = True
in_str,comment_str = in_str[:comment_ptr],in_str[comment_ptr:]
if nop_mode:
# Input must not be modified.
# Get back to normal mode?
ptrs = filter(lambda x: x >= 0,
[in_str.find(stop) for start, stop in no_minify])
if not len(ptrs):
set_output(in_str)
else:
set_output(in_str[:min(ptrs)])
in_str = in_str[min(ptrs):]
nop_mode = False
# Make it believe it came from a new line.
continue
else:
# Preserve the newline and remove all leading whitespace.
# Get into NOP mode?
ptrs = filter(lambda x: x >= 0,
[in_str.find(start) for start, stop in no_minify])
if not len(ptrs):
for ptr, c in enumerate(in_str):
if c not in ' \t':
set_output(in_str[ptr:])
break
else:
for ptr, c in enumerate(in_str):
if c not in ' \t':
set_output(in_str[ptr:min(ptrs)])
break
in_str = in_str[min(ptrs):]
nop_mode = True
continue
# Did a comment begin on this line?
if comment:
# Loop in comment mode using the rest of the string instead
# of a new line.
in_str = comment_str
continue
in_str = get_input()
if not in_str:
break
if __name__ == '__main__':
import sys
engine(sys.stdin.readline, sys.stdout.write)