#!/usr/bin/python2 # -*- coding: UTF-8 -*- import compressout import os import time import cgitb cgitb.enable() import sitemapdata def lastmod_changefreq(filename, mod_compressout): changefreq = [ ('hourly', 21600), # 6 hours ('daily', 259200), # 3 days ('weekly', 1209600), # 2 weeks ('monthly', 5184000), # 60 days ('yearly', -1) # Stop ] last_modifed = os.stat(filename).st_mtime mod_compressout.write_b(' {}\n'.format( time.strftime('%Y-%m-%d', time.gmtime(last_modifed)) )) # Use highest frequency that is at most 2/T # T is the time between the previous change and the next change. # As the next change has not yet occurred, assume that # now = previous + T/2, ie. right in the middle. time_delta = time.time() - last_modifed changefreq_kw = None for keyword, limit in changefreq: if limit/2.0 > time_delta: break changefreq_kw = keyword if limit < 0: break mod_compressout.write_b( ' {}\n'.format(keyword) ) def pretty(text): text = text.replace('\n', ' ') while 2*' ' in text: text = text.replace(2*' ', 1*' ') text = text.replace('&', '&') text = text.replace('<', '<') text = text.replace('>', '>') return text.strip() def XML(): compressout.write_h('Content-Type: application/xml; charset=UTF-8\n\n') compressout.write_b('\n') compressout.write_b( '\n\n') # Flatten subsitemaps for url in sitemapdata.sitemap: if 'subsitemap' in url: sitemapdata.sitemap.extend(url['subsitemap']) del url['subsitemap'] for url in sitemapdata.sitemap: compressout.write_b('\n') compressout.write_b(' {}\n'.format( sitemapdata.site + url['URL'] )) if 'priority' in url: compressout.write_b(' {}\n'.format( url['priority'] )) if 'file' in url: lastmod_changefreq(url['file'], compressout) if 'images' in url: for image in url['images']: compressout.write_b(' \n') compressout.write_b( ' {}\n'.format( sitemapdata.imgsite + image['URL'] ) ) if 'description' in image: compressout.write_b( ' {}\n'.format( pretty(image['description']) ) ) # Image license. if 'license' in image: license = image['license'] else: license = sitemapdata.imglicense if license is not None: compressout.write_b( ' {}\n'.format( license ) ) compressout.write_b(' \n') if 'videos' in url: for video in url['videos']: compressout.write_b(' \n') compressout.write_b( ' {}\n'.format( video['title'] ) ) if 'content' in video: compressout.write_b( ' {}\n'.format( video['content'] ) ) if 'player' in video: compressout.write_b( ' {}\n'.format( video['player'] ) ) compressout.write_b( ' {}\n'.format( video['description'] ) ) compressout.write_b( ' {}\n'.format( video['thumbnail'] ) ) compressout.write_b(' \n') compressout.write_b('\n') compressout.write_b('\n\n') def HTML(): if 'application/xhtml+xml' in os.getenv('HTTP_ACCEPT', ''): content_type = 'application/xhtml+xml' else: content_type = 'text/html' compressout.write_h('Content-Type: {}; charset=UTF-8\n\n'.format( content_type )) compressout.write_b(''' ''') compressout.write_b(''' Sitemap

Sitemap

''') def write_sitemap(sitemap): compressout.write_b('\n') write_sitemap(sitemapdata.sitemap) compressout.write_b('''

Other sitemaps

''') def main(): os.chdir(sitemapdata.basedir) compressout.init() compressout.write_h('X-Robots-Tag: noindex, follow\n') query_string = os.getenv('QUERY_STRING') if query_string == 'xml': XML() else: HTML() compressout.done() if __name__ == '__main__': main()