mkhtml - navigation maker

It is a simple HTML/CGI preprocessor that automagically generates the navigation for every page from a tree.

Last modified
Lines 668

Parent directory Download CGIread sitemap Main page

Quick links: fetchurls fixurl flat_nav flatten getname main mk_makefile mk_navigation my_parse_args navigation search substitute title

  1. #!/usr/bin/python2
  2. import os
  3. import sys
  4. import argparse
  5. import time
  6. # New MACRO syntax:
  7. # %MACRO("arg","arg")%
  8. # %%
  9. global_doc = '''
  10. Goals
  11. =====
  12.     - All pages' navigation gets regenerated automagically if you
  13.         add/remove/renames pages.
  14.     - Navigation is autogenerated, footer is autoincluded,
  15.         with autogenerated timestamps.
  16.     - make(1)
  17. Commandline usage
  18. =================
  19.     <script_name> --conf=<sitecfg> --makemake
  20.         Generate the Makefile, the Makefile does this by itself.
  21.     <script_name> --conf=<sitecfg> <filename> ...
  22.         Turn 'foo.html.src' into 'foo.html', which will have
  23.         autogenerated navigation and footer.
  24.     
  25.     make
  26.         Update whatever is necessary.
  27. Input format
  28. ============
  29.     
  30.     Macro preprocessor.
  31.     
  32.     <script_name> --conf=<sitecfg> <filename> ...
  33.     Where filename SHOULD end with '.src', for automation using make,
  34.     the filename MUST end with '.src'.
  35.     
  36.     And the footer file whose path/name is stored in the configuration
  37.     file.
  38.     
  39.     'foo.html' is generated from 'foo.html.src' and the footer can be
  40.     included with a macro.
  41.     
  42.     Macros are expanded using Python's builtin str.replace(src, dest)
  43.     '__X__H1__FOO__' becomes '__X<h1 id="title">...</h1>FOO__'.
  44.     Looks undefined != is undefined.
  45.     __H1__              <h1 id="title">
  46.                             Name for the page,
  47.                             from the navigation tree.
  48.                         </h1>
  49.     
  50.     __TITLE__           <title>Same as __H1__</title>
  51.     
  52.     __TIMESTAMP__       Time when output was generated.
  53.     
  54.     __MODIFIED__        Time of last modification of the source file.
  55.     
  56.     __LINK_ARG__        URL-escaped absolute URL for this page,
  57.                         http://validator.w3.org/check?uri=__LINK_ARG__
  58.     
  59.     __OUT_T__           'output' if OUTPUT file does not have execute
  60.                         permission, 'script' if output file has
  61.                         execute permission.
  62.                         'output generated <LONG AGO>' would be
  63.                         misleading if it `said` by a CGI script that
  64.                         generates FRESH content.
  65.     
  66.     __URI__             Server-relative URI, eg. /foo.html
  67.     
  68.     __FOOTER__          This is the ONLY macro that cannot be used in
  69.                         the footer-file.
  70. Configuration file
  71. ==================
  72.     <script_name> --conf=<conf_file> [args...]
  73.     
  74.     The configuration file contains a Python datastructure, it is read
  75.     using Python's own parser.  WARNING: eval() is capable of running
  76.     evil() code.
  77.     
  78.     The datastructure is a dictionary <dict> {}, (a hash if you're
  79.     more familiar with perl).
  80.     
  81.     
  82.     Keys
  83.     ----
  84.     
  85.         'tree'          Main navigation, a tree structure.
  86.                         tuple () of (URL, name, children):
  87.                             `URL` is a absolute URL without protocol
  88.                                 and host. The root of the webserver
  89.                                 is '/'.
  90.                             `name` is the default title and h1 fo
  91.                                 the page. NOTE: You can set title
  92.                                 and h1 manually, don't forget that.
  93.                             `children` is either None or a list []
  94.                                 of tuples () of (URL, name, children).
  95.         
  96.         'subs'          - Extra pages that don't fit in the
  97.                             navigation, they are always included in
  98.                             the navigation and they are always on the
  99.                             bottom.
  100.                         - ex. Privacy policy, Terms of use, etc.
  101.                         - List [] of tuples () of (URL, name).
  102.                         - Look at the documentation for 'tree', for
  103.                             information about `URL` and `name`.
  104.         
  105.         'hidden'        - Extra pages that shouldn't be shown in the
  106.                             navigation.
  107.                         - ex: Error pages
  108.                         - List [] of tuples () of (URL, name).
  109.                         - Look at the documentation for 'tree', for
  110.         
  111.         'conf'          Path to <conf_file>. string
  112.         
  113.         'script'        Path to <script_name>. string
  114.         
  115.         'footer'        Path to the footer file. string
  116.         
  117.         'host'          Hostname, no protocol nor trailing slash.
  118.                         ex. 'example.com', '10.1.2.3'
  119.                         string
  120. NOTE: You probably need to pipe the output into a pager.
  121. ./mkhtml --conf= --doc | less
  122. '''
  123. def fixurl(url):
  124.         '''If trailing slash: append 'index.html' in the Makefile.
  125.         Remove first character (must be a slash).
  126.         '''
  127.         real_url = url.split('?')[0]
  128.         if real_url.endswith('/'):
  129.             for index_name in conf['index-names']:
  130.                 try:
  131.                     path = real_url.lstrip('/') + index_name
  132.                     os.stat(path)
  133.                     return path
  134.                 except:
  135.                     pass
  136.             #return real_url.lstrip('/') + 'index.html'
  137.             print('Shit! Did you forgot to create a dummy index file for {}?'.format(url))
  138.         else:
  139.             return real_url.lstrip('/')
  140. def my_parse_args():
  141.     '''
  142.     Uses argparse.
  143.     Use the source, Luke.
  144.     '''
  145.     parser = argparse.ArgumentParser(
  146.         description='''Tool to generate navigation to [X]HTML
  147. (from <filename>.src) and generate a Makefile for automatic
  148. invocation of this tool.''')
  149.     parser.add_argument('--conf', dest='conf', required=True, action='store',
  150.         help='''Path to the file that contains the navigation and
  151. miscellaneous configuration.  WARNING: The content of the file is
  152. passed to eval()''')
  153.     parser.add_argument('files',
  154.         help='''The filename(s) must end with '.src', it is a compiler
  155. pattern and the outfile is <filename> - '.src'.''', nargs='*')
  156.     parser.add_argument('--makemake', dest='mknav', action='store_true',
  157.         help='''Generate the Makefile instead,
  158. needed when new pages are added.''')
  159.     parser.add_argument('--doc', dest='print_doc', action='store_true',
  160.         help='''Print out the doc-string for complete documentation.
  161. Add "--conf=" as an argument. ./mkhtml --conf= --doc''')
  162.     args = parser.parse_args()
  163.     
  164.     if args.print_doc:
  165.         print(global_doc)
  166.     
  167.     if (not args.mknav) and (not args.files):
  168.         # NO-OP.
  169.         if args.print_doc:
  170.             sys.exit(0)
  171.         else:
  172.             parser.print_help()
  173.             sys.exit(1)
  174.     
  175.     return args
  176. def mk_makefile(conf):
  177.     tree = conf['tree']
  178.     subs = conf['subs'] + conf['hidden']
  179.     script = conf['script']
  180.     #footer = conf['footer']
  181.     sitecfg = conf['conf']
  182.     includes = [conf['includes'][x] for x in conf['includes']]
  183.     
  184.     makefile = open('Makefile', 'w')
  185.     
  186.     # Find all urls.
  187.     urls = []
  188.     def fetchurls(tree):
  189.         for url, ign, child in tree:
  190.             #print(url, ign)
  191.             urls.append(url)
  192.             if child is not None:
  193.                 fetchurls(child)
  194.     fetchurls([tree])
  195.     
  196.     for url, ign in subs:
  197.         urls.append(url)
  198.     
  199.     urls = map(fixurl, urls)
  200.     
  201.     makefile.write('''# Autogenerated by mkhtml ({})
  202. CONF = {}
  203. SCRIPT = {}
  204. #FOOTER = <left-curly><right-curly>
  205. INCLUDES = {}
  206. CMD = $(SCRIPT) --conf=$(CONF)
  207. DEPS = $(CONF) $(SCRIPT) $(INCLUDES)
  208. all : Makefile {}
  209. Makefile : $(CONF) $(SCRIPT)
  210. \t$(CMD) --makemake
  211. \tmake
  212. '''.format(
  213.         sys.argv[0],
  214.         sitecfg,
  215.         script,
  216.         ' \\\n\t\t'.join(includes),
  217.         ' \\\n\t\t'.join(urls)
  218.     ))
  219.     for url in urls:
  220.         makefile.write('''{0} : {0}.src $(DEPS)
  221. \t$(CMD) {0}.src
  222. '''.format(url))
  223.     makefile.close()
  224. def mk_navigation(tree, subs, hidden, url, navconf):
  225.     '''
  226.     Automagically translate the tree of pages into the navigation
  227.     for a specific page.
  228.     
  229.     XHTML, my_name = mk_navigation(root, url)
  230.     `XHTML`     The navigation code
  231.     `my_name`   Name for default <h1> and <title>
  232.     
  233.     tree = (url, title, children)
  234.     subs = [(url, title), ...]
  235.     
  236.     See Example and Implementation
  237.     
  238.     Example
  239.     =======
  240.     
  241.     Input
  242.     -----
  243.     
  244.     tree = (
  245.         '/', 'Home', [
  246.             ('/foo.html', 'foo', None),
  247.             ('/bar.html', 'bar', [
  248.                 ('/baz.html', 'foo', None),
  249.             ]),
  250.         ]
  251.     )
  252.     subs = [
  253.             ('/contact.html', 'contact'),
  254.             ('/policy.html', 'policy'),
  255.     ]
  256.     url = '/baz.html'
  257.     
  258.     
  259.     Output
  260.     ------
  261.     
  262.     Home | | foo | bar
  263.     bar | | BAZ
  264.     | contact | policy
  265.     
  266.     
  267.     XHTML fragment
  268.     --------------
  269.     
  270.     <div id="navigation"><div id="nav_inner">
  271.         <p class="row">
  272.             <span class="head1"><span class="head2"><a href="/">
  273.                 Home</a></span></span>
  274.             <span class="sub1"><span class="sub2"><a href="/foo.html">
  275.                 foo</a></span></span>
  276.             <span class="sub1"><span class="sub2"><a href="/bar.html">
  277.                 bar</a></span></span>
  278.         </p>
  279.         <p class="row">
  280.             <span class="head1"><span class="head2"><a href="/bar.html">
  281.                 bar</a></span></span>
  282.             <!-- Bold; self -->
  283.             <span class="sub1"><b class="sub2">baz</b></span>
  284.         </p>
  285.         <p class="row">
  286.             <!-- These have no parent/head -->
  287.             <span class="sub1"><span class="sub2"><a href="/contact.html">
  288.                 contact</a></span></span>
  289.             <span class="sub1"><span class="sub2"><a href="/policy.html">
  290.                 policy</a></span></span>
  291.         </p>
  292.     </div></div>
  293.     
  294.     
  295.     
  296.     Implementation
  297.     ==============
  298.     
  299.     tree        # This is the tree structure:
  300.                 #    (URL, name, children=<tree>)
  301.     subs        # These will always be used (contact, policy, etc.)
  302.     
  303.     rows = [row]        # This looks like the output
  304.         #row = (heads, subs)
  305.         #heads, subs = [''], ['']
  306.     names = {}          # Step 3.
  307.         #   names[URL] = name
  308.         #   type(names) = dict
  309.     
  310.     *.  Step 1 and 2 Will only add URLs
  311.     1.  Choose the correct path from the tree.
  312.         NOTE: Not if the page is one of the subs
  313.         NOTE: Subs will still need the top level.
  314.     2.  Add the subs, that's easy.
  315.     3.  Get the names for the URLs.
  316.     4.  Generate XHTML, NOTE: DO NOT generate selfreferences
  317.     
  318.     Step 1
  319.     ------
  320.     
  321.         1.      Find the path
  322.         2.      Make a flattened version containing the full tree.
  323.                 [/], [/foo.html, /bar.html]
  324.                 [/bar.html], [/baz.html]
  325.         3.      Use the path to select rows from the flattened version.
  326.                 NOTE: The last item in `path` MAY have no children.
  327.         
  328.         
  329.         Find the path
  330.         -------------
  331.         
  332.         # 1. Add self.
  333.         # 2. Return True if self == destination
  334.         # 3. Recursively crawl children.
  335.         # 4. Clean up after children that return False
  336.     
  337.     '''
  338.     # def mk_navigation(root, url):
  339.     def flat_nav(tree, URL):
  340.         '''
  341.         Return the rows to `URL` in the tree `tree`.
  342.         
  343.         magic('/baz.html')
  344.         path = ['/', '/bar.html', '/baz.html']
  345.         
  346.         # 1. Generate path
  347.         # 2. Flatten navigation tree into rows [([head],[sub,...]),...]
  348.         # 3. Select
  349.         '''
  350.         path = []
  351.         
  352.         # 1. Add self.
  353.         # 2. Return True if self == destination
  354.         # 3. Recursively crawl children.
  355.         # 4. Clean up after children that return False
  356.         
  357.             # Enter node.
  358.             path.append(tree[0])
  359.             
  360.             # Is it the right one?
  361.             if tree[0].split('?')[0] == URL.split('?')[0]:
  362.                 return True
  363.             
  364.             if tree[2] is not None:
  365.                 for child in tree[2]:
  366.                     # TIP: When recursing into children, verify that you
  367.                     # spelled 'child', not 'tree'.
  368.                     if search(child, URL):
  369.                         # [grand,...]child matches, bail out.
  370.                         return True
  371.                     else:
  372.                         # Clean up after child.
  373.                         path.pop(-1)
  374.                 else:
  375.                     # No [grand,...]children were correct.
  376.                     return False
  377.             else:
  378.                 # No children.
  379.                 return False
  380.         
  381.         # This will fail if URL is not found.
  382.         assert search(tree, URL)
  383.         
  384.         # 2. flatten
  385.         flat = []
  386.         def flatten(tree):
  387.             # Add self.
  388.             if tree[2] is not None:
  389.                 children = map(lambda x: x[0], tree[2])
  390.             else:
  391.                 children = None
  392.             flat.append(([tree[0]], children))
  393.             # Recurse into children.
  394.             if tree[2] is not None:
  395.                 for child in tree[2]:
  396.                     flatten(child)
  397.         flatten(tree)
  398.         
  399.         # 3. Select using `path` and `flat`
  400.         out = []
  401.         for path_item in path:
  402.             # Find item in flat.
  403.             for index, flat_item in enumerate(flat):
  404.                 if flat_item[0][0] == path_item:
  405.                     # 0'th head in flat_item
  406.                     if flat_item[1] is not None:
  407.                         # TODO: Disallow bogus parent/child.
  408.                         #       ``I am not one of my parent's children.``
  409.                         out.append(flat[index])
  410.                     break
  411.             else:
  412.                 raise ValueError(
  413.                     'Bogus URL "{}", step 1.3.,  cannot happen!'.format(URL))
  414.         #rows.extend(flat_nav(tree, URL))
  415.         #       rows = [row,...]   # This looks like the output
  416.         #               row = (heads, subs)
  417.         #               heads, subs = [URL, ...]
  418.         return out
  419.     
  420.     # def mk_navigation(root, url):
  421.     tree        # This is the tree structure:
  422.                 #    (URL, name, children=<tree>)
  423.     subs        # These will always be used
  424.     rows = []
  425.     #rows = [row,...]   # This looks like the output
  426.         #row = (heads, subs)
  427.         #heads, subs = [URL,...]
  428.     names = {}
  429.     #   names[URL] = name
  430.     #   type(names) = dict
  431.     
  432.     # Do not find full path for subs.
  433.     if url not in map(lambda x: x[0], subs + hidden):
  434.         # This part is tricky enough to deserve its own function.
  435.         rows.extend(flat_nav(tree, url))
  436.     else:
  437.         rows.append((
  438.             [tree[0]],                          # Root node.
  439.             map(lambda x: x[0], tree[2])        # Remove grandchildren.
  440.         ))
  441.     # Add the subs
  442.     rows.append(([], map(lambda x: x[0], subs)))
  443.     
  444.     # 3. Get names for the URLs
  445.     #   names[URL] = name
  446.     #   type(names) = dict
  447.     #names = {}
  448.     def getname(tree):
  449.         # add self.
  450.         names[tree[0]] = tree[1]
  451.         # Recurse into children.
  452.         if tree[2] is not None:
  453.             for child in tree[2]:
  454.                 getname(child)
  455.     # TIP: If you have a recursing function, USE IT.
  456.     getname(tree)
  457.     for pair in subs + hidden:
  458.         names[pair[0]] = pair[1]
  459.     # 4. Generate XHTML.
  460.     # NOTE: DO NOT generate selfreferences
  461.     # div_s     <div id="navigation">...</div>
  462.     # p_s       <p class="row">...</p>
  463.     #           '\n'.join(rows)
  464.     # link_s[is_sub][no_click]  links_s[2][2]
  465.     #           rows[row_n][is_sub][link_n]
  466.     #           link_s[is_sub][link_url==URL].format(URL, name)
  467.     # chunks    chunks of the output string
  468.     # big_chunks        <p>
  469.     
  470.     navdiv_start = {
  471.         'div': '<div',
  472.         'nav-div': '<nav><div',
  473.         'nav': '<nav',
  474.     }[navconf['navdiv']]
  475.     navdiv_end = {
  476.         'div': '</div>',
  477.         'nav-div': '</div></nav>',
  478.         'nav': '</nav>',
  479.     }[navconf['navdiv']]
  480.     
  481.     # '\n' huh? why? Because __NAVIGATION__ is indented in .src
  482.     div_s = (
  483.         '\n<!-- BEGIN autogenerated navigation -->\n' +
  484.         navdiv_start +
  485.         ' id="navigation"><div id="nav_inner">' +
  486.         '\n<p><a href="#content" class="textonly">Skip navigation</a></p>'
  487.         '\n{}\n' +
  488.         '<hr class="textonly"/>\n' +
  489.         '</div>' +
  490.         navdiv_end + 
  491.         '\n<!-- END autogenerated navigation -->\n')
  492.     p_s = '<p class="row">\n{}\n</p>'
  493.     
  494.     indent = ' ' * 8
  495.     active = navconf['active']
  496.     
  497.     links_s = [
  498.         [
  499.             (
  500.                 '<span class="textonly" translate="no">[</span>' +
  501.                 '<a class="head" href="{0}">{1}</a>' +
  502.                 '<span class="textonly" translate="no">]</span>\n' +
  503.                 '&gt;&gt;'
  504.             ).format('{0}', '{1}'),
  505.             (
  506.                 '<span class="textonly" translate="no">]</span>' +
  507.                 '<{0} class="head active">{1}</{0}>' +
  508.                 '<span class="textonly" translate="no">[</span>\n' +
  509.                 '&gt;&gt;'
  510.             ).format(active, '{1}'),
  511.         ],
  512.         [
  513.             (
  514.                 '<span class="textonly" translate="no">[</span>' +
  515.                 '<a class="sub" href="{0}">{1}</a>' +
  516.                 '<span class="textonly" translate="no">]</span>'
  517.             ).format('{0}', '{1}'),
  518.             (
  519.                 '<span class="textonly" translate="no">]</span>' +
  520.                 '<{0} class="sub active">{1}</{0}>' +
  521.                 '<span class="textonly" translate="no">[</span>'
  522.             ).format(active, '{1}'),
  523.         ],
  524.     ]
  525.     # '{2}<span class="textonly">|</span>\n' +
  526.     
  527.     chunks = []
  528.     big_chunks = []
  529.     
  530.     for row in rows:
  531.         for row_x in range(2):
  532.             for link_URL in row[row_x]:
  533.                 chunks.append(links_s[row_x][link_URL==url].format(
  534.                     #navconf['site'] + link_URL,
  535.                     link_URL,
  536.                     names[link_URL].replace(' ', '\xc2\xa0'),
  537.                 ))
  538.         big_chunks.append(p_s.format('\n'.join(chunks)))
  539.         chunks = []
  540.     return div_s.format('\n'.join(big_chunks)), names[url]
  541. def substitute(substitutes, s):
  542.     tmp = s
  543.     for pair in substitutes:
  544.         tmp = tmp.replace('__{}__'.format(pair[0]), pair[1])
  545.     return tmp
  546. def main():
  547.     '''
  548.     '''
  549.     global conf
  550.     args = my_parse_args()
  551.     conf = eval(open(args.conf).read())
  552.     tree = conf['tree']
  553.     subs = conf['subs']
  554.     hidden = conf['hidden']
  555.     makefile_conf = conf['conf']
  556.     makefile_script = conf['script']
  557.     macros = conf['macros']
  558.     includes = conf['includes']
  559.     macros['host'] = host_name = conf['host']
  560.     macros['scheme'] = scheme = conf['scheme']
  561.     macros['site'] = site = scheme + '://' + host_name
  562.     navconf = conf['navconf']
  563.     navconf['site'] = macros['site']
  564.     if args.mknav:
  565.         mk_makefile(conf)
  566.     for filename in args.files:
  567.         out_name = filename[:-len('.src')]
  568.         this_url = '/' + out_name
  569.         for index_name in conf['index-names']:
  570.             if this_url.endswith('/' + index_name):
  571.                 this_url = this_url.replace(index_name, '')
  572.         #tree = [root[:3]]
  573.         #subs = root[3]
  574.         
  575.         # <output_type> generated at __TIMESTAMP__
  576.         output_type = {
  577.                 False: 'page',
  578.                 True: 'CGI script'
  579.             }[os.access(out_name, os.X_OK)]
  580.         
  581.         xhtml_nav, myname = mk_navigation(tree, subs, hidden, this_url, navconf)
  582.         
  583.         link_arg = (
  584.             'http://' + host_name +
  585.             this_url.replace('&','%26').replace('?','%3f').replace('=','%3d')
  586.         )
  587.         
  588.         autofill = [
  589.             ('TITLE',           '<title>{}</title>'.format(myname)),
  590.             ('NAVIGATION',      xhtml_nav),
  591.             ('H1',              '<h1 id="title">{}</h1>'.format(myname)),
  592.             ('TIMESTAMP',       time.strftime('%F', time.gmtime())),
  593.             ('MODIFIED',        time.strftime(
  594.                                     '%F',
  595.                                     time.gmtime(os.stat(filename).st_mtime)
  596.                                 )
  597.             ),
  598.             ('LINK_ARG',        link_arg),
  599.             ('OUT_T',           output_type),
  600.             ('URI',             this_url),
  601.         ]
  602.         
  603.         for macro in macros:
  604.             autofill.append((macro.upper(), macros[macro]))
  605.         
  606.         #footer_file = open(footer)
  607.         #footer = substitute(autofill, footer_file.read())
  608.         #footer_file.close()
  609.         
  610.         #autofill.append(('FOOTER', footer))
  611.         
  612.         autofill_extend = []
  613.         for include in includes:
  614.             f = open(includes[include])
  615.             content = substitute(autofill, f.read())
  616.             f.close()
  617.             autofill_extend.append((include.upper(), content))
  618.         autofill.extend(autofill_extend)
  619.         
  620.         in_f = open(filename)
  621.         out_f = open(out_name, 'w')
  622.         out_f.write(substitute(autofill, in_f.read()))
  623.         out_f.close()
  624.         in_f.close()
  625. conf = {}
  626. if __name__ == '__main__':
  627.     main()