#!/usr/bin/python2 import os import sys import argparse import time # New MACRO syntax: # %MACRO("arg","arg")% # %% global_doc = ''' Goals ===== - All pages' navigation gets regenerated automagically if you add/remove/renames pages. - Navigation is autogenerated, footer is autoincluded, with autogenerated timestamps. - make(1) Commandline usage ================= --conf= --makemake Generate the Makefile, the Makefile does this by itself. --conf= ... Turn 'foo.html.src' into 'foo.html', which will have autogenerated navigation and footer. make Update whatever is necessary. Input format ============ Macro preprocessor. --conf= ... Where filename SHOULD end with '.src', for automation using make, the filename MUST end with '.src'. And the footer file whose path/name is stored in the configuration file. 'foo.html' is generated from 'foo.html.src' and the footer can be included with a macro. Macros are expanded using Python's builtin str.replace(src, dest) '__X__H1__FOO__' becomes '__X

...

FOO__'. Looks undefined != is undefined. __H1__

Name for the page, from the navigation tree.

__TITLE__ Same as __H1__ __TIMESTAMP__ Time when output was generated. __MODIFIED__ Time of last modification of the source file. __LINK_ARG__ URL-escaped absolute URL for this page, http://validator.w3.org/check?uri=__LINK_ARG__ __OUT_T__ 'output' if OUTPUT file does not have execute permission, 'script' if output file has execute permission. 'output generated ' would be misleading if it `said` by a CGI script that generates FRESH content. __URI__ Server-relative URI, eg. /foo.html __FOOTER__ This is the ONLY macro that cannot be used in the footer-file. Configuration file ================== --conf= [args...] The configuration file contains a Python datastructure, it is read using Python's own parser. WARNING: eval() is capable of running evil() code. The datastructure is a dictionary {}, (a hash if you're more familiar with perl). Keys ---- 'tree' Main navigation, a tree structure. tuple () of (URL, name, children): `URL` is a absolute URL without protocol and host. The root of the webserver is '/'. `name` is the default title and h1 fo the page. NOTE: You can set title and h1 manually, don't forget that. `children` is either None or a list [] of tuples () of (URL, name, children). 'subs' - Extra pages that don't fit in the navigation, they are always included in the navigation and they are always on the bottom. - ex. Privacy policy, Terms of use, etc. - List [] of tuples () of (URL, name). - Look at the documentation for 'tree', for information about `URL` and `name`. 'hidden' - Extra pages that shouldn't be shown in the navigation. - ex: Error pages - List [] of tuples () of (URL, name). - Look at the documentation for 'tree', for 'conf' Path to . string 'script' Path to . string 'footer' Path to the footer file. string 'host' Hostname, no protocol nor trailing slash. ex. 'example.com', '10.1.2.3' string NOTE: You probably need to pipe the output into a pager. ./mkhtml --conf= --doc | less ''' def fixurl(url): '''If trailing slash: append 'index.html' in the Makefile. Remove first character (must be a slash). ''' real_url = url.split('?')[0] if real_url.endswith('/'): for index_name in conf['index-names']: try: path = real_url.lstrip('/') + index_name os.stat(path) return path except: pass #return real_url.lstrip('/') + 'index.html' print('Shit! Did you forgot to create a dummy index file for {}?'.format(url)) else: return real_url.lstrip('/') def my_parse_args(): ''' Uses argparse. Use the source, Luke. ''' parser = argparse.ArgumentParser( description='''Tool to generate navigation to [X]HTML (from .src) and generate a Makefile for automatic invocation of this tool.''') parser.add_argument('--conf', dest='conf', required=True, action='store', help='''Path to the file that contains the navigation and miscellaneous configuration. WARNING: The content of the file is passed to eval()''') parser.add_argument('files', help='''The filename(s) must end with '.src', it is a compiler pattern and the outfile is - '.src'.''', nargs='*') parser.add_argument('--makemake', dest='mknav', action='store_true', help='''Generate the Makefile instead, needed when new pages are added.''') parser.add_argument('--doc', dest='print_doc', action='store_true', help='''Print out the doc-string for complete documentation. Add "--conf=" as an argument. ./mkhtml --conf= --doc''') args = parser.parse_args() if args.print_doc: print(global_doc) if (not args.mknav) and (not args.files): # NO-OP. if args.print_doc: sys.exit(0) else: parser.print_help() sys.exit(1) return args def mk_makefile(conf): tree = conf['tree'] subs = conf['subs'] + conf['hidden'] script = conf['script'] #footer = conf['footer'] sitecfg = conf['conf'] includes = [conf['includes'][x] for x in conf['includes']] makefile = open('Makefile', 'w') # Find all urls. urls = [] def fetchurls(tree): for url, ign, child in tree: #print(url, ign) urls.append(url) if child is not None: fetchurls(child) fetchurls([tree]) for url, ign in subs: urls.append(url) urls = map(fixurl, urls) makefile.write('''# Autogenerated by mkhtml ({}) CONF = {} SCRIPT = {} #FOOTER = INCLUDES = {} CMD = $(SCRIPT) --conf=$(CONF) DEPS = $(CONF) $(SCRIPT) $(INCLUDES) all : Makefile {} Makefile : $(CONF) $(SCRIPT) \t$(CMD) --makemake \tmake '''.format( sys.argv[0], sitecfg, script, ' \\\n\t\t'.join(includes), ' \\\n\t\t'.join(urls) )) for url in urls: makefile.write('''{0} : {0}.src $(DEPS) \t$(CMD) {0}.src '''.format(url)) makefile.close() def mk_navigation(tree, subs, hidden, url, navconf): ''' Automagically translate the tree of pages into the navigation for a specific page. XHTML, my_name = mk_navigation(root, url) `XHTML` The navigation code `my_name` Name for default

and tree = (url, title, children) subs = [(url, title), ...] See Example and Implementation Example ======= Input ----- tree = ( '/', 'Home', [ ('/foo.html', 'foo', None), ('/bar.html', 'bar', [ ('/baz.html', 'foo', None), ]), ] ) subs = [ ('/contact.html', 'contact'), ('/policy.html', 'policy'), ] url = '/baz.html' Output ------ Home | | foo | bar bar | | BAZ | contact | policy XHTML fragment -------------- <div id="navigation"><div id="nav_inner"> <p class="row"> <span class="head1"><span class="head2"><a href="/"> Home</a></span></span> <span class="sub1"><span class="sub2"><a href="/foo.html"> foo</a></span></span> <span class="sub1"><span class="sub2"><a href="/bar.html"> bar</a></span></span> </p> <p class="row"> <span class="head1"><span class="head2"><a href="/bar.html"> bar</a></span></span> <!-- Bold; self --> <span class="sub1"><b class="sub2">baz</b></span> </p> <p class="row"> <!-- These have no parent/head --> <span class="sub1"><span class="sub2"><a href="/contact.html"> contact</a></span></span> <span class="sub1"><span class="sub2"><a href="/policy.html"> policy</a></span></span> </p> </div></div> Implementation ============== tree # This is the tree structure: # (URL, name, children=<tree>) subs # These will always be used (contact, policy, etc.) rows = [row] # This looks like the output #row = (heads, subs) #heads, subs = [''], [''] names = {} # Step 3. # names[URL] = name # type(names) = dict *. Step 1 and 2 Will only add URLs 1. Choose the correct path from the tree. NOTE: Not if the page is one of the subs NOTE: Subs will still need the top level. 2. Add the subs, that's easy. 3. Get the names for the URLs. 4. Generate XHTML, NOTE: DO NOT generate selfreferences Step 1 ------ 1. Find the path 2. Make a flattened version containing the full tree. [/], [/foo.html, /bar.html] [/bar.html], [/baz.html] 3. Use the path to select rows from the flattened version. NOTE: The last item in `path` MAY have no children. Find the path ------------- # 1. Add self. # 2. Return True if self == destination # 3. Recursively crawl children. # 4. Clean up after children that return False ''' # def mk_navigation(root, url): def flat_nav(tree, URL): ''' Return the rows to `URL` in the tree `tree`. magic('/baz.html') path = ['/', '/bar.html', '/baz.html'] # 1. Generate path # 2. Flatten navigation tree into rows [([head],[sub,...]),...] # 3. Select ''' path = [] # 1. Add self. # 2. Return True if self == destination # 3. Recursively crawl children. # 4. Clean up after children that return False def search(tree, URL): # Enter node. path.append(tree[0]) # Is it the right one? if tree[0].split('?')[0] == URL.split('?')[0]: return True if tree[2] is not None: for child in tree[2]: # TIP: When recursing into children, verify that you # spelled 'child', not 'tree'. if search(child, URL): # [grand,...]child matches, bail out. return True else: # Clean up after child. path.pop(-1) else: # No [grand,...]children were correct. return False else: # No children. return False # This will fail if URL is not found. assert search(tree, URL) # 2. flatten flat = [] def flatten(tree): # Add self. if tree[2] is not None: children = map(lambda x: x[0], tree[2]) else: children = None flat.append(([tree[0]], children)) # Recurse into children. if tree[2] is not None: for child in tree[2]: flatten(child) flatten(tree) # 3. Select using `path` and `flat` out = [] for path_item in path: # Find item in flat. for index, flat_item in enumerate(flat): if flat_item[0][0] == path_item: # 0'th head in flat_item if flat_item[1] is not None: # TODO: Disallow bogus parent/child. # ``I am not one of my parent's children.`` out.append(flat[index]) break else: raise ValueError( 'Bogus URL "{}", step 1.3., cannot happen!'.format(URL)) #rows.extend(flat_nav(tree, URL)) # rows = [row,...] # This looks like the output # row = (heads, subs) # heads, subs = [URL, ...] return out # def mk_navigation(root, url): tree # This is the tree structure: # (URL, name, children=<tree>) subs # These will always be used rows = [] #rows = [row,...] # This looks like the output #row = (heads, subs) #heads, subs = [URL,...] names = {} # names[URL] = name # type(names) = dict # Do not find full path for subs. if url not in map(lambda x: x[0], subs + hidden): # This part is tricky enough to deserve its own function. rows.extend(flat_nav(tree, url)) else: rows.append(( [tree[0]], # Root node. map(lambda x: x[0], tree[2]) # Remove grandchildren. )) # Add the subs rows.append(([], map(lambda x: x[0], subs))) # 3. Get names for the URLs # names[URL] = name # type(names) = dict #names = {} def getname(tree): # add self. names[tree[0]] = tree[1] # Recurse into children. if tree[2] is not None: for child in tree[2]: getname(child) # TIP: If you have a recursing function, USE IT. getname(tree) for pair in subs + hidden: names[pair[0]] = pair[1] # 4. Generate XHTML. # NOTE: DO NOT generate selfreferences # div_s <div id="navigation">...</div> # p_s <p class="row">...</p> # '\n'.join(rows) # link_s[is_sub][no_click] links_s[2][2] # rows[row_n][is_sub][link_n] # link_s[is_sub][link_url==URL].format(URL, name) # chunks chunks of the output string # big_chunks <p> navdiv_start = { 'div': '<div', 'nav-div': '<nav><div', 'nav': '<nav', }[navconf['navdiv']] navdiv_end = { 'div': '</div>', 'nav-div': '</div></nav>', 'nav': '</nav>', }[navconf['navdiv']] # '\n' huh? why? Because __NAVIGATION__ is indented in .src div_s = ( '\n<!-- BEGIN autogenerated navigation -->\n' + navdiv_start + ' id="navigation"><div id="nav_inner">' + '\n<p><a href="#content" class="textonly">Skip navigation</a></p>' '\n{}\n' + '<hr class="textonly"/>\n' + '</div>' + navdiv_end + '\n<!-- END autogenerated navigation -->\n') p_s = '<p class="row">\n{}\n</p>' indent = ' ' * 8 active = navconf['active'] links_s = [ [ ( '<span class="textonly" translate="no">[</span>' + '<a class="head" href="{0}">{1}</a>' + '<span class="textonly" translate="no">]</span>\n' + '>>' ).format('{0}', '{1}'), ( '<span class="textonly" translate="no">]</span>' + '<{0} class="head active">{1}</{0}>' + '<span class="textonly" translate="no">[</span>\n' + '>>' ).format(active, '{1}'), ], [ ( '<span class="textonly" translate="no">[</span>' + '<a class="sub" href="{0}">{1}</a>' + '<span class="textonly" translate="no">]</span>' ).format('{0}', '{1}'), ( '<span class="textonly" translate="no">]</span>' + '<{0} class="sub active">{1}</{0}>' + '<span class="textonly" translate="no">[</span>' ).format(active, '{1}'), ], ] # '{2}<span class="textonly">|</span>\n' + chunks = [] big_chunks = [] for row in rows: for row_x in range(2): for link_URL in row[row_x]: chunks.append(links_s[row_x][link_URL==url].format( #navconf['site'] + link_URL, link_URL, names[link_URL].replace(' ', '\xc2\xa0'), )) big_chunks.append(p_s.format('\n'.join(chunks))) chunks = [] return div_s.format('\n'.join(big_chunks)), names[url] def substitute(substitutes, s): tmp = s for pair in substitutes: tmp = tmp.replace('__{}__'.format(pair[0]), pair[1]) return tmp def main(): ''' ''' global conf args = my_parse_args() conf = eval(open(args.conf).read()) tree = conf['tree'] subs = conf['subs'] hidden = conf['hidden'] makefile_conf = conf['conf'] makefile_script = conf['script'] macros = conf['macros'] includes = conf['includes'] macros['host'] = host_name = conf['host'] macros['scheme'] = scheme = conf['scheme'] macros['site'] = site = scheme + '://' + host_name navconf = conf['navconf'] navconf['site'] = macros['site'] if args.mknav: mk_makefile(conf) for filename in args.files: out_name = filename[:-len('.src')] this_url = '/' + out_name for index_name in conf['index-names']: if this_url.endswith('/' + index_name): this_url = this_url.replace(index_name, '') #tree = [root[:3]] #subs = root[3] # <output_type> generated at __TIMESTAMP__ output_type = { False: 'page', True: 'CGI script' }[os.access(out_name, os.X_OK)] xhtml_nav, myname = mk_navigation(tree, subs, hidden, this_url, navconf) link_arg = ( 'http://' + host_name + this_url.replace('&','%26').replace('?','%3f').replace('=','%3d') ) autofill = [ ('TITLE', '<title>{}'.format(myname)), ('NAVIGATION', xhtml_nav), ('H1', '

{}

'.format(myname)), ('TIMESTAMP', time.strftime('%F', time.gmtime())), ('MODIFIED', time.strftime( '%F', time.gmtime(os.stat(filename).st_mtime) ) ), ('LINK_ARG', link_arg), ('OUT_T', output_type), ('URI', this_url), ] for macro in macros: autofill.append((macro.upper(), macros[macro])) #footer_file = open(footer) #footer = substitute(autofill, footer_file.read()) #footer_file.close() #autofill.append(('FOOTER', footer)) autofill_extend = [] for include in includes: f = open(includes[include]) content = substitute(autofill, f.read()) f.close() autofill_extend.append((include.upper(), content)) autofill.extend(autofill_extend) in_f = open(filename) out_f = open(out_name, 'w') out_f.write(substitute(autofill, in_f.read())) out_f.close() in_f.close() conf = {} if __name__ == '__main__': main()