source code of /sitemap.py

Last modified	2023-02-02
Lines	261

Parent directory Download CGIread sitemap Main page

Quick links: HTML XML contact content footer lastmod_changefreq main navigation pretty title write_sitemap

```
#!/usr/bin/python2
```
```
# -*- coding: UTF-8 -*-
```
```
import compressout
```
```
import os
```
```
import time
```
```
import cgitb
```
```
cgitb.enable()
```
```
import sitemapdata
```

def lastmod_changefreq(filename, mod_compressout):

```
    changefreq = [
```

        ('hourly',  21600),     # 6 hours

        ('daily',   259200),    # 3 days

        ('weekly',  1209600),   # 2 weeks

        ('monthly', 5184000),   # 60 days

        ('yearly',  -1)             # Stop

```
    ]
```

    last_modifed = os.stat(filename).st_mtime

    mod_compressout.write_b('    <lastmod>{}</lastmod>\n'.format(

        time.strftime('%Y-%m-%d', time.gmtime(last_modifed))

```
    ))
```

    # Use highest frequency that is at most 2/T

    # T is the time between the previous change and the next change.

    # As the next change has not yet occurred, assume that

    # now = previous + T/2, ie. right in the middle.

    time_delta = time.time() - last_modifed

```
    changefreq_kw = None
```
```
    for keyword, limit in changefreq:
```
```
        if limit/2.0 > time_delta:
```
```
            break
```
```
        changefreq_kw = keyword
```
```
        if limit < 0:
```
```
            break
```
```
    mod_compressout.write_b(
```

        '    <changefreq>{}</changefreq>\n'.format(keyword)

```
    )
```
```
def pretty(text):
```
```
    text = text.replace('\n', ' ')
```
```
    while 2*' ' in text:
```

        text = text.replace(2*' ', 1*' ')

```
    text = text.replace('&', '&amp;')
```
```
    text = text.replace('<', '&lt;')
```
```
    text = text.replace('>', '&gt;')
```
```
    return text.strip()
```
```
def XML():
```

    compressout.write_h('Content-Type: application/xml; charset=UTF-8\n\n')

    compressout.write_b('<?xml version="1.0" encoding="UTF-8"?>\n')

```
    compressout.write_b(
```

        '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'

```
    )
```
```
    compressout.write_b(
```

        'xmlns:i="http://www.google.com/schemas/sitemap-image/1.1"\n'

```
    )
```
```
    compressout.write_b(
```

        'xmlns:v="http://www.google.com/schemas/sitemap-video/1.1"\n'

```
    )
```
```
    compressout.write_b('>\n\n')
```
```
    # Flatten subsitemaps
```
```
    for url in sitemapdata.sitemap:
```
```
        if 'subsitemap' in url:
```

            sitemapdata.sitemap.extend(url['subsitemap'])

```
            del url['subsitemap']
```
```
    for url in sitemapdata.sitemap:
```
```
        compressout.write_b('<url>\n')
```

        compressout.write_b('    <loc>{}</loc>\n'.format(

            sitemapdata.site + url['URL']

```
        ))
```
```
        if 'priority' in url:
```

            compressout.write_b('    <priority>{}</priority>\n'.format(

```
                url['priority']
```
```
            ))
```
```
        if 'file' in url:
```

            lastmod_changefreq(url['file'], compressout)

```
        if 'images' in url:
```
```
            for image in url['images']:
```

                compressout.write_b('    <i:image>\n')

```
                compressout.write_b(
```

                    '        <i:loc>{}</i:loc>\n'.format(

                        sitemapdata.imgsite + image['URL']

```
                    )
```
```
                )
```

                if 'description' in image:

                    compressout.write_b(

                        '        <i:caption>{}</i:caption>\n'.format(

                            pretty(image['description'])

```
                        )
```
```
                    )
```
```
                # Image license.
```
```
                if 'license' in image:
```

                    license = image['license']

```
                else:
```

                    license = sitemapdata.imglicense

```
                if license is not None:
```

                    compressout.write_b(

                        '        <i:license>{}</i:license>\n'.format(

```
                            license
```
```
                        )
```
```
                    )
```

                compressout.write_b('    </i:image>\n')

```
        if 'videos' in url:
```
```
            for video in url['videos']:
```

                compressout.write_b('    <v:video>\n')

```
                compressout.write_b(
```

                    '        <v:title>{}</v:title>\n'.format(

```
                        video['title']
```
```
                    )
```
```
                )
```
```
                if 'content' in video:
```

                    compressout.write_b(

                        '        <v:content_loc>{}</v:content_loc>\n'.format(

                            video['content']

```
                        )
```
```
                    )
```
```
                if 'player' in video:
```

                    compressout.write_b(

                        '        <v:player_loc>{}</v:player_loc>\n'.format(

                            video['player']

```
                        )
```
```
                    )
```
```
                compressout.write_b(
```

                    '        <v:description>{}</v:description>\n'.format(

                        video['description']

```
                    )
```
```
                )
```
```
                compressout.write_b(
```

                    '        <v:thumbnail_loc>{}</v:thumbnail_loc>\n'.format(

                        video['thumbnail']

```
                    )
```
```
                )
```

                compressout.write_b('    </v:video>\n')

```
        compressout.write_b('</url>\n')
```

    compressout.write_b('\n</urlset>\n')

```
def HTML():
```

    if 'application/xhtml+xml' in os.getenv('HTTP_ACCEPT', ''):

        content_type = 'application/xhtml+xml'

```
    else:
```
```
        content_type = 'text/html'
```

    compressout.write_h('Content-Type: {}; charset=UTF-8\n\n'.format(

```
        content_type
```
```
    ))
```

    compressout.write_b('''<!DOCTYPE html>

<html lang="en" xmlns="http://www.w3.org/1999/xhtml">

```
    <head>
```
```
        <meta charset="utf-8"/>
```

        <meta name="viewport" content="width=device-width, initial-scale=1"/>

        <link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>

        <link rel="icon" type="image/png" href="/favicon.png"/>

```

```
```
''')
```
```
    compressout.write_b('''
```
```
        <title>Sitemap</title>
```

        <meta name="robots" content="noindex, follow"/>

```
    </head>
```
```
    <body>
```
```
        
```
```

```

<nav><div id="navigation"><div id="nav_inner">

<p><a href="#content" class="textonly">Skip navigation</a></p>

```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-&gt; My IBM thinkpad</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">]</span><span class="sub active">Sitemap</span><span class="textonly" translate="no">[</span>

```
</p>
```
```
<hr class="textonly"/>
```
```
</div></div></nav>
```
```

```
```
        <main><div id="content">
```
```
            <h1 id="title">Sitemap</h1>
```
```
''')
```
```
    def write_sitemap(sitemap):
```
```
        compressout.write_b('<ul>\n')
```
```
        for url in sitemap:
```

            compressout.write_b('<li><a href="{}">{}</a>'.format(

                url['URL'].replace('&', '&amp;').replace('"', '&quot;'),

                pretty(url['description'])

```
            ))
```
```
            if 'subsitemap' in url:
```

                write_sitemap(url['subsitemap'])

            compressout.write_b('</li>\n')

```
        compressout.write_b('</ul>\n')
```
```
    write_sitemap(sitemapdata.sitemap)
```
```
    compressout.write_b('''
```
```
            <h2>Other sitemaps</h2>
```
```
            <ul>
```
```
    ''')
```

    for URL, name in sitemapdata.html_sitemaps:

        compressout.write_b('<li><a href="{}">{}</a></li>\n'.format(

            URL.replace('&', '&amp;').replace('"', '&quot;'),

```
            pretty(name)
```
```
        ))
```
```
    compressout.write_b('''
```
```
            </ul>
```
```
        </div></main>
```
```
        
```
```

```
```
<footer><div id="footer">
```
```
    <hr class="textonly"/>
```
```
    <p>
```

        Website content released under the <a

        href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"

        target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license

        and my software usually under the <span class="a"><a target="_blank"

```
        rel="noopener"
```

        href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license

```
        (2-clause)</a>.</span>
```
```
        <br/>
```

        Images may be from other sites, I should have cited useful sources

```
        somewhere on the page.
```

        <span class="notprint">Contact me if I haven't.</span>

```
    </p>
```
```
    <p id="contact" class="notprint">
```

        You can contact me at: <a href="mailto:oskar@oskog97.com"

        rel="noopener" target="_blank">oskar@oskog97.com</a>

        <span class="a">(<a href="/pgp-pub/oskar.asc"

                            >PGP public key</a>)</span>

```
    </p>
```

    <p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">

```
            CSS Stylesheet
```
```
        </a>
```

        <a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"

            target="_blank" class="notprint"><span

            class="img">Valid HTML5</span

```
        ></a><br/>
```
```
    </p>
```
```
</div></footer>
```
```

```
```
    </body>
```
```
</html>
```
```
    ''')
```
```
def main():
```
```
    os.chdir(sitemapdata.basedir)
```
```
    compressout.init()
```

    compressout.write_h('X-Robots-Tag: noindex, follow\n')

    query_string = os.getenv('QUERY_STRING')

```
    if query_string == 'xml':
```
```
        XML()
```
```
    else:
```
```
        HTML()
```
```
    compressout.done()
```
```
if __name__ == '__main__':
```
```
    main()
```