source code of /sitemap.py.src

Last modified
Lines 199

Parent directory Download CGIread sitemap Main page

Quick links: HTML XML content lastmod_changefreq main pretty write_sitemap

  1. #!/usr/bin/python2
  2. # -*- coding: UTF-8 -*-
  3. import compressout
  4. import os
  5. import time
  6. import cgitb
  7. cgitb.enable()
  8. import sitemapdata
  9. def lastmod_changefreq(filename, mod_compressout):
  10.     changefreq = [
  11.         ('hourly',  21600),     # 6 hours
  12.         ('daily',   259200),    # 3 days
  13.         ('weekly',  1209600),   # 2 weeks
  14.         ('monthly', 5184000),   # 60 days
  15.         ('yearly',  -1)             # Stop
  16.     ]
  17.     last_modifed = os.stat(filename).st_mtime
  18.     mod_compressout.write_b('    <lastmod>{}</lastmod>\n'.format(
  19.         time.strftime('%Y-%m-%d', time.gmtime(last_modifed))
  20.     ))
  21.     # Use highest frequency that is at most 2/T
  22.     # T is the time between the previous change and the next change.
  23.     # As the next change has not yet occurred, assume that
  24.     # now = previous + T/2, ie. right in the middle.
  25.     time_delta = time.time() - last_modifed
  26.     changefreq_kw = None
  27.     for keyword, limit in changefreq:
  28.         if limit/2.0 > time_delta:
  29.             break
  30.         changefreq_kw = keyword
  31.         if limit < 0:
  32.             break
  33.     mod_compressout.write_b(
  34.         '    <changefreq>{}</changefreq>\n'.format(keyword)
  35.     )
  36. def pretty(text):
  37.     text = text.replace('\n', ' ')
  38.     while 2*' ' in text:
  39.         text = text.replace(2*' ', 1*' ')
  40.     text = text.replace('&', '&amp;')
  41.     text = text.replace('<', '&lt;')
  42.     text = text.replace('>', '&gt;')
  43.     return text.strip()
  44. def XML():
  45.     compressout.write_h('Content-Type: application/xml; charset=UTF-8\n\n')
  46.     compressout.write_b('<?xml version="1.0" encoding="UTF-8"?>\n')
  47.     compressout.write_b(
  48.         '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'
  49.     )
  50.     compressout.write_b(
  51.         'xmlns:i="http://www.google.com/schemas/sitemap-image/1.1"\n'
  52.     )
  53.     compressout.write_b(
  54.         'xmlns:v="http://www.google.com/schemas/sitemap-video/1.1"\n'
  55.     )
  56.     compressout.write_b('>\n\n')
  57.     # Flatten subsitemaps
  58.     for url in sitemapdata.sitemap:
  59.         if 'subsitemap' in url:
  60.             sitemapdata.sitemap.extend(url['subsitemap'])
  61.             del url['subsitemap']
  62.     for url in sitemapdata.sitemap:
  63.         compressout.write_b('<url>\n')
  64.         compressout.write_b('    <loc>{}</loc>\n'.format(
  65.             sitemapdata.site + url['URL']
  66.         ))
  67.         if 'priority' in url:
  68.             compressout.write_b('    <priority>{}</priority>\n'.format(
  69.                 url['priority']
  70.             ))
  71.         if 'file' in url:
  72.             lastmod_changefreq(url['file'], compressout)
  73.         if 'images' in url:
  74.             for image in url['images']:
  75.                 compressout.write_b('    <i:image>\n')
  76.                 compressout.write_b(
  77.                     '        <i:loc>{}</i:loc>\n'.format(
  78.                         sitemapdata.imgsite + image['URL']
  79.                     )
  80.                 )
  81.                 if 'description' in image:
  82.                     compressout.write_b(
  83.                         '        <i:caption>{}</i:caption>\n'.format(
  84.                             pretty(image['description'])
  85.                         )
  86.                     )
  87.                 # Image license.
  88.                 if 'license' in image:
  89.                     license = image['license']
  90.                 else:
  91.                     license = sitemapdata.imglicense
  92.                 if license is not None:
  93.                     compressout.write_b(
  94.                         '        <i:license>{}</i:license>\n'.format(
  95.                             license
  96.                         )
  97.                     )
  98.                 compressout.write_b('    </i:image>\n')
  99.         if 'videos' in url:
  100.             for video in url['videos']:
  101.                 compressout.write_b('    <v:video>\n')
  102.                 compressout.write_b(
  103.                     '        <v:title>{}</v:title>\n'.format(
  104.                         video['title']
  105.                     )
  106.                 )
  107.                 if 'content' in video:
  108.                     compressout.write_b(
  109.                         '        <v:content_loc>{}</v:content_loc>\n'.format(
  110.                             video['content']
  111.                         )
  112.                     )
  113.                 if 'player' in video:
  114.                     compressout.write_b(
  115.                         '        <v:player_loc>{}</v:player_loc>\n'.format(
  116.                             video['player']
  117.                         )
  118.                     )
  119.                 compressout.write_b(
  120.                     '        <v:description>{}</v:description>\n'.format(
  121.                         video['description']
  122.                     )
  123.                 )
  124.                 compressout.write_b(
  125.                     '        <v:thumbnail_loc>{}</v:thumbnail_loc>\n'.format(
  126.                         video['thumbnail']
  127.                     )
  128.                 )
  129.                 compressout.write_b('    </v:video>\n')
  130.         compressout.write_b('</url>\n')
  131.     compressout.write_b('\n</urlset>\n')
  132. def HTML():
  133.     if 'application/xhtml+xml' in os.getenv('HTTP_ACCEPT', ''):
  134.         content_type = 'application/xhtml+xml'
  135.     else:
  136.         content_type = 'text/html'
  137.     compressout.write_h('Content-Type: {}; charset=UTF-8\n\n'.format(
  138.         content_type
  139.     ))
  140.     compressout.write_b('''__HTML5NC__''')
  141.     compressout.write_b('''
  142.         __TITLE__
  143.         <meta name="robots" content="noindex, follow"/>
  144.     </head>
  145.     <body>
  146.         __NAVIGATION__
  147.         <main><div id="content">
  148.             __H1__
  149. ''')
  150.     def write_sitemap(sitemap):
  151.         compressout.write_b('<ul>\n')
  152.         for url in sitemap:
  153.             compressout.write_b('<li><a href="{}">{}</a>'.format(
  154.                 url['URL'].replace('&', '&amp;').replace('"', '&quot;'),
  155.                 pretty(url['description'])
  156.             ))
  157.             if 'subsitemap' in url:
  158.                 write_sitemap(url['subsitemap'])
  159.             compressout.write_b('</li>\n')
  160.         compressout.write_b('</ul>\n')
  161.     write_sitemap(sitemapdata.sitemap)
  162.     compressout.write_b('''
  163.             <h2>Other sitemaps</h2>
  164.             <ul>
  165.     ''')
  166.     for URL, name in sitemapdata.html_sitemaps:
  167.         compressout.write_b('<li><a href="{}">{}</a></li>\n'.format(
  168.             URL.replace('&', '&amp;').replace('"', '&quot;'),
  169.             pretty(name)
  170.         ))
  171.     compressout.write_b('''
  172.             </ul>
  173.         </div></main>
  174.         __FOOTER__
  175.     </body>
  176. </html>
  177.     ''')
  178. def main():
  179.     os.chdir(sitemapdata.basedir)
  180.     compressout.init()
  181.     compressout.write_h('X-Robots-Tag: noindex, follow\n')
  182.     query_string = os.getenv('QUERY_STRING')
  183.     if query_string == 'xml':
  184.         XML()
  185.     else:
  186.         HTML()
  187.     compressout.done()
  188. if __name__ == '__main__':
  189.     main()