source code of /sitemap.py.src
Last modified | |
Lines | 199 |
Parent directory Download CGIread sitemap Main page
Quick links: HTML XML content lastmod_changefreq main pretty write_sitemap
#!/usr/bin/python2
# -*- coding: UTF-8 -*-
import compressout
import os
import time
import cgitb
cgitb.enable()
import sitemapdata
def lastmod_changefreq(filename, mod_compressout):
changefreq = [
('hourly', 21600), # 6 hours
('daily', 259200), # 3 days
('weekly', 1209600), # 2 weeks
('monthly', 5184000), # 60 days
('yearly', -1) # Stop
]
last_modifed = os.stat(filename).st_mtime
mod_compressout.write_b(' <lastmod>{}</lastmod>\n'.format(
time.strftime('%Y-%m-%d', time.gmtime(last_modifed))
))
# Use highest frequency that is at most 2/T
# T is the time between the previous change and the next change.
# As the next change has not yet occurred, assume that
# now = previous + T/2, ie. right in the middle.
time_delta = time.time() - last_modifed
changefreq_kw = None
for keyword, limit in changefreq:
if limit/2.0 > time_delta:
break
changefreq_kw = keyword
if limit < 0:
break
mod_compressout.write_b(
' <changefreq>{}</changefreq>\n'.format(keyword)
)
def pretty(text):
text = text.replace('\n', ' ')
while 2*' ' in text:
text = text.replace(2*' ', 1*' ')
text = text.replace('&', '&')
text = text.replace('<', '<')
text = text.replace('>', '>')
return text.strip()
def XML():
compressout.write_h('Content-Type: application/xml; charset=UTF-8\n\n')
compressout.write_b('<?xml version="1.0" encoding="UTF-8"?>\n')
compressout.write_b(
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'
)
compressout.write_b(
'xmlns:i="http://www.google.com/schemas/sitemap-image/1.1"\n'
)
compressout.write_b(
'xmlns:v="http://www.google.com/schemas/sitemap-video/1.1"\n'
)
compressout.write_b('>\n\n')
# Flatten subsitemaps
for url in sitemapdata.sitemap:
if 'subsitemap' in url:
sitemapdata.sitemap.extend(url['subsitemap'])
del url['subsitemap']
for url in sitemapdata.sitemap:
compressout.write_b('<url>\n')
compressout.write_b(' <loc>{}</loc>\n'.format(
sitemapdata.site + url['URL']
))
if 'priority' in url:
compressout.write_b(' <priority>{}</priority>\n'.format(
url['priority']
))
if 'file' in url:
lastmod_changefreq(url['file'], compressout)
if 'images' in url:
for image in url['images']:
compressout.write_b(' <i:image>\n')
compressout.write_b(
' <i:loc>{}</i:loc>\n'.format(
sitemapdata.imgsite + image['URL']
)
)
if 'description' in image:
compressout.write_b(
' <i:caption>{}</i:caption>\n'.format(
pretty(image['description'])
)
)
# Image license.
if 'license' in image:
license = image['license']
else:
license = sitemapdata.imglicense
if license is not None:
compressout.write_b(
' <i:license>{}</i:license>\n'.format(
license
)
)
compressout.write_b(' </i:image>\n')
if 'videos' in url:
for video in url['videos']:
compressout.write_b(' <v:video>\n')
compressout.write_b(
' <v:title>{}</v:title>\n'.format(
video['title']
)
)
if 'content' in video:
compressout.write_b(
' <v:content_loc>{}</v:content_loc>\n'.format(
video['content']
)
)
if 'player' in video:
compressout.write_b(
' <v:player_loc>{}</v:player_loc>\n'.format(
video['player']
)
)
compressout.write_b(
' <v:description>{}</v:description>\n'.format(
video['description']
)
)
compressout.write_b(
' <v:thumbnail_loc>{}</v:thumbnail_loc>\n'.format(
video['thumbnail']
)
)
compressout.write_b(' </v:video>\n')
compressout.write_b('</url>\n')
compressout.write_b('\n</urlset>\n')
def HTML():
if 'application/xhtml+xml' in os.getenv('HTTP_ACCEPT', ''):
content_type = 'application/xhtml+xml'
else:
content_type = 'text/html'
compressout.write_h('Content-Type: {}; charset=UTF-8\n\n'.format(
content_type
))
compressout.write_b('''__HTML5NC__''')
compressout.write_b('''
__TITLE__
<meta name="robots" content="noindex, follow"/>
</head>
<body>
__NAVIGATION__
<main><div id="content">
__H1__
''')
def write_sitemap(sitemap):
compressout.write_b('<ul>\n')
for url in sitemap:
compressout.write_b('<li><a href="{}">{}</a>'.format(
url['URL'].replace('&', '&').replace('"', '"'),
pretty(url['description'])
))
if 'subsitemap' in url:
write_sitemap(url['subsitemap'])
compressout.write_b('</li>\n')
compressout.write_b('</ul>\n')
write_sitemap(sitemapdata.sitemap)
compressout.write_b('''
<h2>Other sitemaps</h2>
<ul>
''')
for URL, name in sitemapdata.html_sitemaps:
compressout.write_b('<li><a href="{}">{}</a></li>\n'.format(
URL.replace('&', '&').replace('"', '"'),
pretty(name)
))
compressout.write_b('''
</ul>
</div></main>
__FOOTER__
</body>
</html>
''')
def main():
os.chdir(sitemapdata.basedir)
compressout.init()
compressout.write_h('X-Robots-Tag: noindex, follow\n')
query_string = os.getenv('QUERY_STRING')
if query_string == 'xml':
XML()
else:
HTML()
compressout.done()
if __name__ == '__main__':
main()