mkhtml - navigation maker
It is a simple HTML/CGI preprocessor that automagically generates the navigation for every page from a tree.
Last modified | |
Lines | 668 |
Parent directory Download CGIread sitemap Main page
Quick links: fetchurls fixurl flat_nav flatten getname main mk_makefile mk_navigation my_parse_args navigation search substitute title
#!/usr/bin/python2
import os
import sys
import argparse
import time
# New MACRO syntax:
# %MACRO("arg","arg")%
# %%
global_doc = '''
Goals
=====
- All pages' navigation gets regenerated automagically if you
add/remove/renames pages.
- Navigation is autogenerated, footer is autoincluded,
with autogenerated timestamps.
- make(1)
Commandline usage
=================
<script_name> --conf=<sitecfg> --makemake
Generate the Makefile, the Makefile does this by itself.
<script_name> --conf=<sitecfg> <filename> ...
Turn 'foo.html.src' into 'foo.html', which will have
autogenerated navigation and footer.
make
Update whatever is necessary.
Input format
============
Macro preprocessor.
<script_name> --conf=<sitecfg> <filename> ...
Where filename SHOULD end with '.src', for automation using make,
the filename MUST end with '.src'.
And the footer file whose path/name is stored in the configuration
file.
'foo.html' is generated from 'foo.html.src' and the footer can be
included with a macro.
Macros are expanded using Python's builtin str.replace(src, dest)
'__X__H1__FOO__' becomes '__X<h1 id="title">...</h1>FOO__'.
Looks undefined != is undefined.
__H1__ <h1 id="title">
Name for the page,
from the navigation tree.
</h1>
__TITLE__ <title>Same as __H1__</title>
__TIMESTAMP__ Time when output was generated.
__MODIFIED__ Time of last modification of the source file.
__LINK_ARG__ URL-escaped absolute URL for this page,
http://validator.w3.org/check?uri=__LINK_ARG__
__OUT_T__ 'output' if OUTPUT file does not have execute
permission, 'script' if output file has
execute permission.
'output generated <LONG AGO>' would be
misleading if it `said` by a CGI script that
generates FRESH content.
__URI__ Server-relative URI, eg. /foo.html
__FOOTER__ This is the ONLY macro that cannot be used in
the footer-file.
Configuration file
==================
<script_name> --conf=<conf_file> [args...]
The configuration file contains a Python datastructure, it is read
using Python's own parser. WARNING: eval() is capable of running
evil() code.
The datastructure is a dictionary <dict> {}, (a hash if you're
more familiar with perl).
Keys
----
'tree' Main navigation, a tree structure.
tuple () of (URL, name, children):
`URL` is a absolute URL without protocol
and host. The root of the webserver
is '/'.
`name` is the default title and h1 fo
the page. NOTE: You can set title
and h1 manually, don't forget that.
`children` is either None or a list []
of tuples () of (URL, name, children).
'subs' - Extra pages that don't fit in the
navigation, they are always included in
the navigation and they are always on the
bottom.
- ex. Privacy policy, Terms of use, etc.
- List [] of tuples () of (URL, name).
- Look at the documentation for 'tree', for
information about `URL` and `name`.
'hidden' - Extra pages that shouldn't be shown in the
navigation.
- ex: Error pages
- List [] of tuples () of (URL, name).
- Look at the documentation for 'tree', for
'conf' Path to <conf_file>. string
'script' Path to <script_name>. string
'footer' Path to the footer file. string
'host' Hostname, no protocol nor trailing slash.
ex. 'example.com', '10.1.2.3'
string
NOTE: You probably need to pipe the output into a pager.
./mkhtml --conf= --doc | less
'''
def fixurl(url):
'''If trailing slash: append 'index.html' in the Makefile.
Remove first character (must be a slash).
'''
real_url = url.split('?')[0]
if real_url.endswith('/'):
for index_name in conf['index-names']:
try:
path = real_url.lstrip('/') + index_name
os.stat(path)
return path
except:
pass
#return real_url.lstrip('/') + 'index.html'
print('Shit! Did you forgot to create a dummy index file for {}?'.format(url))
else:
return real_url.lstrip('/')
def my_parse_args():
'''
Uses argparse.
Use the source, Luke.
'''
parser = argparse.ArgumentParser(
description='''Tool to generate navigation to [X]HTML
(from <filename>.src) and generate a Makefile for automatic
invocation of this tool.''')
parser.add_argument('--conf', dest='conf', required=True, action='store',
help='''Path to the file that contains the navigation and
miscellaneous configuration. WARNING: The content of the file is
passed to eval()''')
parser.add_argument('files',
help='''The filename(s) must end with '.src', it is a compiler
pattern and the outfile is <filename> - '.src'.''', nargs='*')
parser.add_argument('--makemake', dest='mknav', action='store_true',
help='''Generate the Makefile instead,
needed when new pages are added.''')
parser.add_argument('--doc', dest='print_doc', action='store_true',
help='''Print out the doc-string for complete documentation.
Add "--conf=" as an argument. ./mkhtml --conf= --doc''')
args = parser.parse_args()
if args.print_doc:
print(global_doc)
if (not args.mknav) and (not args.files):
# NO-OP.
if args.print_doc:
sys.exit(0)
else:
parser.print_help()
sys.exit(1)
return args
def mk_makefile(conf):
tree = conf['tree']
subs = conf['subs'] + conf['hidden']
script = conf['script']
#footer = conf['footer']
sitecfg = conf['conf']
includes = [conf['includes'][x] for x in conf['includes']]
makefile = open('Makefile', 'w')
# Find all urls.
urls = []
def fetchurls(tree):
for url, ign, child in tree:
#print(url, ign)
urls.append(url)
if child is not None:
fetchurls(child)
fetchurls([tree])
for url, ign in subs:
urls.append(url)
urls = map(fixurl, urls)
makefile.write('''# Autogenerated by mkhtml ({})
CONF = {}
SCRIPT = {}
#FOOTER = <left-curly><right-curly>
INCLUDES = {}
CMD = $(SCRIPT) --conf=$(CONF)
DEPS = $(CONF) $(SCRIPT) $(INCLUDES)
all : Makefile {}
Makefile : $(CONF) $(SCRIPT)
\t$(CMD) --makemake
\tmake
'''.format(
sys.argv[0],
sitecfg,
script,
' \\\n\t\t'.join(includes),
' \\\n\t\t'.join(urls)
))
for url in urls:
makefile.write('''{0} : {0}.src $(DEPS)
\t$(CMD) {0}.src
'''.format(url))
makefile.close()
def mk_navigation(tree, subs, hidden, url, navconf):
'''
Automagically translate the tree of pages into the navigation
for a specific page.
XHTML, my_name = mk_navigation(root, url)
`XHTML` The navigation code
`my_name` Name for default <h1> and <title>
tree = (url, title, children)
subs = [(url, title), ...]
See Example and Implementation
Example
=======
Input
-----
tree = (
'/', 'Home', [
('/foo.html', 'foo', None),
('/bar.html', 'bar', [
('/baz.html', 'foo', None),
]),
]
)
subs = [
('/contact.html', 'contact'),
('/policy.html', 'policy'),
]
url = '/baz.html'
Output
------
Home | | foo | bar
bar | | BAZ
| contact | policy
XHTML fragment
--------------
<div id="navigation"><div id="nav_inner">
<p class="row">
<span class="head1"><span class="head2"><a href="/">
Home</a></span></span>
<span class="sub1"><span class="sub2"><a href="/foo.html">
foo</a></span></span>
<span class="sub1"><span class="sub2"><a href="/bar.html">
bar</a></span></span>
</p>
<p class="row">
<span class="head1"><span class="head2"><a href="/bar.html">
bar</a></span></span>
<!-- Bold; self -->
<span class="sub1"><b class="sub2">baz</b></span>
</p>
<p class="row">
<!-- These have no parent/head -->
<span class="sub1"><span class="sub2"><a href="/contact.html">
contact</a></span></span>
<span class="sub1"><span class="sub2"><a href="/policy.html">
policy</a></span></span>
</p>
</div></div>
Implementation
==============
tree # This is the tree structure:
# (URL, name, children=<tree>)
subs # These will always be used (contact, policy, etc.)
rows = [row] # This looks like the output
#row = (heads, subs)
#heads, subs = [''], ['']
names = {} # Step 3.
# names[URL] = name
# type(names) = dict
*. Step 1 and 2 Will only add URLs
1. Choose the correct path from the tree.
NOTE: Not if the page is one of the subs
NOTE: Subs will still need the top level.
2. Add the subs, that's easy.
3. Get the names for the URLs.
4. Generate XHTML, NOTE: DO NOT generate selfreferences
Step 1
------
1. Find the path
2. Make a flattened version containing the full tree.
[/], [/foo.html, /bar.html]
[/bar.html], [/baz.html]
3. Use the path to select rows from the flattened version.
NOTE: The last item in `path` MAY have no children.
Find the path
-------------
# 1. Add self.
# 2. Return True if self == destination
# 3. Recursively crawl children.
# 4. Clean up after children that return False
'''
# def mk_navigation(root, url):
def flat_nav(tree, URL):
'''
Return the rows to `URL` in the tree `tree`.
magic('/baz.html')
path = ['/', '/bar.html', '/baz.html']
# 1. Generate path
# 2. Flatten navigation tree into rows [([head],[sub,...]),...]
# 3. Select
'''
path = []
# 1. Add self.
# 2. Return True if self == destination
# 3. Recursively crawl children.
# 4. Clean up after children that return False
def search(tree, URL):
# Enter node.
path.append(tree[0])
# Is it the right one?
if tree[0].split('?')[0] == URL.split('?')[0]:
return True
if tree[2] is not None:
for child in tree[2]:
# TIP: When recursing into children, verify that you
# spelled 'child', not 'tree'.
if search(child, URL):
# [grand,...]child matches, bail out.
return True
else:
# Clean up after child.
path.pop(-1)
else:
# No [grand,...]children were correct.
return False
else:
# No children.
return False
# This will fail if URL is not found.
assert search(tree, URL)
# 2. flatten
flat = []
def flatten(tree):
# Add self.
if tree[2] is not None:
children = map(lambda x: x[0], tree[2])
else:
children = None
flat.append(([tree[0]], children))
# Recurse into children.
if tree[2] is not None:
for child in tree[2]:
flatten(child)
flatten(tree)
# 3. Select using `path` and `flat`
out = []
for path_item in path:
# Find item in flat.
for index, flat_item in enumerate(flat):
if flat_item[0][0] == path_item:
# 0'th head in flat_item
if flat_item[1] is not None:
# TODO: Disallow bogus parent/child.
# ``I am not one of my parent's children.``
out.append(flat[index])
break
else:
raise ValueError(
'Bogus URL "{}", step 1.3., cannot happen!'.format(URL))
#rows.extend(flat_nav(tree, URL))
# rows = [row,...] # This looks like the output
# row = (heads, subs)
# heads, subs = [URL, ...]
return out
# def mk_navigation(root, url):
tree # This is the tree structure:
# (URL, name, children=<tree>)
subs # These will always be used
rows = []
#rows = [row,...] # This looks like the output
#row = (heads, subs)
#heads, subs = [URL,...]
names = {}
# names[URL] = name
# type(names) = dict
# Do not find full path for subs.
if url not in map(lambda x: x[0], subs + hidden):
# This part is tricky enough to deserve its own function.
rows.extend(flat_nav(tree, url))
else:
rows.append((
[tree[0]], # Root node.
map(lambda x: x[0], tree[2]) # Remove grandchildren.
))
# Add the subs
rows.append(([], map(lambda x: x[0], subs)))
# 3. Get names for the URLs
# names[URL] = name
# type(names) = dict
#names = {}
def getname(tree):
# add self.
names[tree[0]] = tree[1]
# Recurse into children.
if tree[2] is not None:
for child in tree[2]:
getname(child)
# TIP: If you have a recursing function, USE IT.
getname(tree)
for pair in subs + hidden:
names[pair[0]] = pair[1]
# 4. Generate XHTML.
# NOTE: DO NOT generate selfreferences
# div_s <div id="navigation">...</div>
# p_s <p class="row">...</p>
# '\n'.join(rows)
# link_s[is_sub][no_click] links_s[2][2]
# rows[row_n][is_sub][link_n]
# link_s[is_sub][link_url==URL].format(URL, name)
# chunks chunks of the output string
# big_chunks <p>
navdiv_start = {
'div': '<div',
'nav-div': '<nav><div',
'nav': '<nav',
}[navconf['navdiv']]
navdiv_end = {
'div': '</div>',
'nav-div': '</div></nav>',
'nav': '</nav>',
}[navconf['navdiv']]
# '\n' huh? why? Because __NAVIGATION__ is indented in .src
div_s = (
'\n<!-- BEGIN autogenerated navigation -->\n' +
navdiv_start +
' id="navigation"><div id="nav_inner">' +
'\n<p><a href="#content" class="textonly">Skip navigation</a></p>'
'\n{}\n' +
'<hr class="textonly"/>\n' +
'</div>' +
navdiv_end +
'\n<!-- END autogenerated navigation -->\n')
p_s = '<p class="row">\n{}\n</p>'
indent = ' ' * 8
active = navconf['active']
links_s = [
[
(
'<span class="textonly" translate="no">[</span>' +
'<a class="head" href="{0}">{1}</a>' +
'<span class="textonly" translate="no">]</span>\n' +
'>>'
).format('{0}', '{1}'),
(
'<span class="textonly" translate="no">]</span>' +
'<{0} class="head active">{1}</{0}>' +
'<span class="textonly" translate="no">[</span>\n' +
'>>'
).format(active, '{1}'),
],
[
(
'<span class="textonly" translate="no">[</span>' +
'<a class="sub" href="{0}">{1}</a>' +
'<span class="textonly" translate="no">]</span>'
).format('{0}', '{1}'),
(
'<span class="textonly" translate="no">]</span>' +
'<{0} class="sub active">{1}</{0}>' +
'<span class="textonly" translate="no">[</span>'
).format(active, '{1}'),
],
]
# '{2}<span class="textonly">|</span>\n' +
chunks = []
big_chunks = []
for row in rows:
for row_x in range(2):
for link_URL in row[row_x]:
chunks.append(links_s[row_x][link_URL==url].format(
#navconf['site'] + link_URL,
link_URL,
names[link_URL].replace(' ', '\xc2\xa0'),
))
big_chunks.append(p_s.format('\n'.join(chunks)))
chunks = []
return div_s.format('\n'.join(big_chunks)), names[url]
def substitute(substitutes, s):
tmp = s
for pair in substitutes:
tmp = tmp.replace('__{}__'.format(pair[0]), pair[1])
return tmp
def main():
'''
'''
global conf
args = my_parse_args()
conf = eval(open(args.conf).read())
tree = conf['tree']
subs = conf['subs']
hidden = conf['hidden']
makefile_conf = conf['conf']
makefile_script = conf['script']
macros = conf['macros']
includes = conf['includes']
macros['host'] = host_name = conf['host']
macros['scheme'] = scheme = conf['scheme']
macros['site'] = site = scheme + '://' + host_name
navconf = conf['navconf']
navconf['site'] = macros['site']
if args.mknav:
mk_makefile(conf)
for filename in args.files:
out_name = filename[:-len('.src')]
this_url = '/' + out_name
for index_name in conf['index-names']:
if this_url.endswith('/' + index_name):
this_url = this_url.replace(index_name, '')
#tree = [root[:3]]
#subs = root[3]
# <output_type> generated at __TIMESTAMP__
output_type = {
False: 'page',
True: 'CGI script'
}[os.access(out_name, os.X_OK)]
xhtml_nav, myname = mk_navigation(tree, subs, hidden, this_url, navconf)
link_arg = (
'http://' + host_name +
this_url.replace('&','%26').replace('?','%3f').replace('=','%3d')
)
autofill = [
('TITLE', '<title>{}</title>'.format(myname)),
('NAVIGATION', xhtml_nav),
('H1', '<h1 id="title">{}</h1>'.format(myname)),
('TIMESTAMP', time.strftime('%F', time.gmtime())),
('MODIFIED', time.strftime(
'%F',
time.gmtime(os.stat(filename).st_mtime)
)
),
('LINK_ARG', link_arg),
('OUT_T', output_type),
('URI', this_url),
]
for macro in macros:
autofill.append((macro.upper(), macros[macro]))
#footer_file = open(footer)
#footer = substitute(autofill, footer_file.read())
#footer_file.close()
#autofill.append(('FOOTER', footer))
autofill_extend = []
for include in includes:
f = open(includes[include])
content = substitute(autofill, f.read())
f.close()
autofill_extend.append((include.upper(), content))
autofill.extend(autofill_extend)
in_f = open(filename)
out_f = open(out_name, 'w')
out_f.write(substitute(autofill, in_f.read()))
out_f.close()
in_f.close()
conf = {}
if __name__ == '__main__':
main()