source code of /read/index.py

Last modified	2023-02-02
Lines	1469

Parent directory Download CGIread sitemap Main page

Quick links: cat code contact content description download footer forms handle_injection_attempt if_none_match index_page is_injection_attempt isword ls main mk_description mk_navigation mk_referer_param navigation noindex ol_content redirect_spam sitemap syntax title

```
#!/usr/bin/python3
```
```
# -*- coding: utf-8 -*-
```
```
root = '/var/www'
```
```
owner = 'Oskar Skog'
```
```
my_url = '/read/'
```

canonical_url = 'https://oskog97.com/read/'

```
html403file = '/var/www/oops/403.html'
```
```
html404file = '/var/www/oops/404.html'
```

html503file = '/var/www/oops/cgi503.html'

```
import sys
```
```
sys.path.append(root)
```
```
import cgi
```
```
import os
```
```
import errno
```
```
import compressout
```
```
import base64
```
```
import re
```
```
import time
```
```
import htmlescape
```
```
import string
```
```
import spammy
```

import sitemap as mod_sitemap  # Name conflict with already existing function.

```
import cgitb
```
```
cgitb.enable()
```
```
rootlen = len(root)
```

#html_mime = 'text/html'      # Set to XHTML later.

html_page = 'Content-Type: text/html; charset=UTF-8\n'  # Set to XHTML later.

```
conf = eval(open('read.cfg').read())
```
```
def redirect_spam(destination):
```

    '''`destination` is the URL to which assholes should be redirected.'''

    compressout.write_h('Status: 303\n')

    compressout.write_h('Location: {}\n'.format(destination))

```
    compressout.write_h('\n')
```
```
def status400(message):
```

    '''HTTP 400; `message` goes UNESCAPED inside a <pre> element.'''

    compressout.write_h('Status: 400\n')

```
    compressout.write_h(html_page)
```
```
    compressout.write_h('\n')
```

    compressout.write_b('''<!DOCTYPE html>

<html lang="en" xmlns="http://www.w3.org/1999/xhtml">

```
    <head>
```
```
        <meta charset="utf-8"/>
```

        <meta name="viewport" content="width=device-width, initial-scale=1"/>

        <link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>

        <link rel="icon" type="image/png" href="/favicon.png"/>

        <link rel="canonical" href="https://oskog97.com/read/"/>

```

```

        <title>400 - Bad Request</title>

```
    </head>
```
```
    <body>
```
```
        
```
```

```

<nav><div id="navigation"><div id="nav_inner">

<p><a href="#content" class="textonly">Skip navigation</a></p>

```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-&gt; My IBM thinkpad</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">]</span><span class="sub active">Website's scripts</span><span class="textonly" translate="no">[</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<hr class="textonly"/>
```
```
</div></div></nav>
```
```

```
```
        <main><div id="content">
```

            <h1 id="title">400 - Bad Request</h1>

```
            <pre>{}</pre>
```
```
            <p>
```

                Your request can't be understood.

```
                Check the parameters.
```
```
            </p>
```

            <p><a href="/read/">Documentation for the parameters</a></p>

```
        </div></main>
```
```
'''.format(message))
```
```
    compressout.write_b('''
```
```
        
```
```

```
```
<footer><div id="footer">
```
```
    <hr class="textonly"/>
```
```
    <p>
```
```
        Copyright © Oskar Skog<br/>
```

        Website content released under the <a

        href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"

        target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license

        and my software usually under the <span class="a"><a target="_blank"

```
        rel="noopener"
```

        href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license

```
        (2-clause)</a>.</span>
```
```
        <br/>
```

        Images may be from other sites, I should have cited useful sources

```
        somewhere on the page.
```

        <span class="notprint">Contact me if I haven't.</span>

```
    </p>
```
```
    <p id="contact" class="notprint">
```

        You can contact me at: <a href="mailto:oskar@oskog97.com"

        rel="noopener" target="_blank">oskar@oskog97.com</a>

        <span class="a">(<a href="/pgp-pub/oskar.asc"

                            >PGP public key</a>)</span>

```
    </p>
```

    <p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">

```
            CSS Stylesheet
```
```
        </a>
```

        <a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"

            target="_blank" class="notprint"><span

            class="img">Valid HTML5</span

```
        ></a><br/>
```
```
    </p>
```
```
</div></footer>
```
```

```
```
    </body>
```
```
</html>''')
```
```
def status403():
```
```
    '''HTTP 403'''
```
```
    compressout.write_h(html_page)
```

    compressout.write_h('Status: 403\n\n')

    compressout.write_b(open(html403file).read())

```
def status404():
```
```
    '''HTTP 404'''
```

    compressout.write_h('Status: 404\n')

```
    compressout.write_h(html_page)
```
```
    compressout.write_h('\n')
```

    compressout.write_b(open(html404file).read())

```
def status503():
```
```
    '''
```
```
    HTTP 503
```
```
    
```

    Call this if there is too much load on the server to do something.

```
    (Used by the sitemap function.)
```
```
    '''
```

    compressout.write_h('Status: 503\n')

```
    compressout.write_h(html_page)
```

    # One factor is load avg for 1 minute, add some slop to the delay for bots.

    compressout.write_h('Retry-After: 90\n')

```
    compressout.write_h('\n')
```

    compressout.write_b(open(html503file).read())

```
def index_page():
```
```
    '''https://oskog97.com/read/'''
```
```
    # Handle 304s.
```
```
    ETag = '"{}{}{}"'.format(
```

        'x'*('application/xhtml+xml' in html_page),

        'z'*('gzip' in os.getenv('HTTP_ACCEPT_ENCODING', '')),

```
        os.stat('index.py').st_mtime,
```
```
    )
```

    compressout.write_h('Vary: If-None-Match\n')

    compressout.write_h('ETag: {}\n'.format(ETag))

```
    compressout.write_h(html_page)
```

    if os.getenv('HTTP_IF_NONE_MATCH') == ETag:

        compressout.write_h('Status: 304\n\n')

```
        return
```
```
    compressout.write_h('\n')
```

    if os.getenv('REQUEST_METHOD') == 'HEAD':

```
        return
```
```
    # Write out a static page.
```

    compressout.write_b('''<!DOCTYPE html>

<html lang="en" xmlns="http://www.w3.org/1999/xhtml">

```
    <head>
```
```
        <meta charset="utf-8"/>
```

        <meta name="viewport" content="width=device-width, initial-scale=1"/>

        <link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>

        <link rel="icon" type="image/png" href="/favicon.png"/>

        <link rel="canonical" href="https://oskog97.com/read/"/>

```

```
```
    
```

        <link rel="stylesheet" type="text/css" href="/read/style.css"/>

        <meta name="description" content="Interested in the scripts I have

        on my website? Come and take a look at them."/>

        <title>Website's scripts</title>

```
    </head>
```
```
    <body>
```
```
        
```
```

```

<nav><div id="navigation"><div id="nav_inner">

<p><a href="#content" class="textonly">Skip navigation</a></p>

```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-&gt; My IBM thinkpad</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">]</span><span class="sub active">Website's scripts</span><span class="textonly" translate="no">[</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<hr class="textonly"/>
```
```
</div></div></nav>
```
```

```
```
        <main><div id="content">
```

            <h1 id="title">Website's scripts</h1>

```
    ''')
```
```
    compressout.write_b('''
```
```
            <p>
```

                Interested in the scripts I have on my website?

                Go take a look at them; start crawling the

                <a href="{0}?path=/">root directory</a> or take a look

                at the <span class="a"><a href="{0}?sitemap=html"

                >(sub)sitemap</a>.</span>

```
            </p>
```
```
            <div id="syntax">
```

                <h2>Parameter syntax</h2>

```
                <p>
```

                    Descriptions for the parameters can be found in

```
                    the request forms.
```
```
                </p>
```
```
                <ul>
```
```
                    <li>
```

                        Asterisks <q>*</q> represent a value that can be

                        (almost) anything.

```
                    </li>
```

                    <li>Square brackets <q>[]</q> represent optional.</li>

                    <li>Curly brackets <q>&#x7b;&#x7d;</q> represent mandatory.</li>

                    <li>Pipes <q>|</q> represent either or.</li>

```
                </ul>
```

                <p>There are three acceptable "sets" of parameters:</p>

```
                <ol>
```

<li><pre>{0}?sitemap=&#x7b;html|xml&#x7d;</pre></li>

<li><pre>{0}?path=*[&amp;download=yes]</pre></li>

<li><pre>{0}?path=*[&amp;referer=*[&amp;title=*]]</pre></li>

```
                </ol>
```
```
                <p>
```

                    The order of the valid parameters doesn't matter, but

                    this is the recommended/canonical order.

```
                </p>
```
```
            </div>
```
```
            <div id="forms">
```
```
                <h2>Request forms</h2>
```
```
                <p><strong>
```

                    Notice that these are three different forms.

```
                </strong></p>
```

                <form action="{0}" method="get">

```
                <h3>Sitemap</h3>
```
```
                <p>
```

                    The <code>sitemap</code> parameter can be either

                    <q><code>html</code></q>, <q><code>xml</code></q>

                    or the default <q><code>none</code></q>.

                    It can't be used together with any other parameters.

```
                </p>
```
```
                <p>
```

                    <input type="radio" name="sitemap" value="html"/>

                    Request an HTML sitemap instead of a page<br/>

                    <input type="radio" name="sitemap" value="xml"/>

                    request an XML sitemap instead of a page<br/>

                    <input type="submit"/>

```
                </p>
```
```
                </form>
```

                <form action="{0}" method="get">

```
                <h3>Page</h3>
```
```
                <p>
```

                    A page (source code of a CGI script) is selected with the

                    <code>path</code> parameter.  The value of the

                    <code>path</code> parameter is a URL relative to this

                    site, ie. an URL beginning with a single slash.

```
                </p>
```
```
                <p>
```

                    The <code>path</code> is the site-local URL to the CGI

                    script or directory you're interested in.  If you set the

                    value to <q><code>/read/index.py</code></q>, you'll get the

                    source code for this script. And if you set it to

                    <q><code>/</code></q>, you'll get a directory listing

                    of the site's root directory.

```
                </p>
```
```
                <p>
```

                    Path/URL: <input type="text" name="path" value="/"/>

                    <input type="submit"/><br/>

                    <input type="checkbox" name="download" value="yes"/>

                    Download / see it as plain text

```
                    
```
```
                </p>
```
```
                <p>
```

                    The <code>download</code> parameter can be set to either

                    <q><code>yes</code></q> or the default

                    <q><code>no</code></q>.  The download option does

                    obviously not work with directories.

```
                </p>
```
```
                </form>
```

                <form action="{0}" method="get">

                <h3>Link back to a referencing page</h3>

```
                <p>
```

                    If <code>download</code> is <q><code>no</code></q> or

                    unset and a page (not a sitemap) was requested, it is

                    possible to change the navigation to make the requested

                    page link back to a referring page.

```
                </p>
```
```
                <p>
```

                    The <code>referer</code> (yes, misspelled like the HTTP

                    Referer) parameter is the URL of the referencing page.

                    (Don't try to specify a site that isn't mine.)

                    The <code>title</code> parameter gives the back link a

                    different text than <q>Back</q>.

```
                </p>
```
```
                <table>
```
```
                    <tr>
```

                        <th><code>path</code></th>

                        <td><input type="text" name="path" value="/"/></td>

```
                    </tr>
```
```
                    <tr>
```

                        <th><code>referer</code></th>

                        <td><input type="text" name="referer"/></td>

```
                    </tr>
```
```
                    <tr>
```

                        <th><code>title</code></th>

                        <td><input type="text" name="title"/></td>

```
                    </tr>
```
```
                    <tr>
```
```
                        <td></td>
```

                        <td><input type="submit"/></td>

```
                    </tr>
```
```
                </table>
```
```
                </form>
```
```
            </div>
```
```
        </div></main>
```
```
    '''.format(my_url))
```
```
    compressout.write_b('''
```
```
        
```
```

```
```
<footer><div id="footer">
```
```
    <hr class="textonly"/>
```
```
    <p>
```
```
        Copyright © Oskar Skog<br/>
```

        Website content released under the <a

        href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"

        target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license

        and my software usually under the <span class="a"><a target="_blank"

```
        rel="noopener"
```

        href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license

```
        (2-clause)</a>.</span>
```
```
        <br/>
```

        Images may be from other sites, I should have cited useful sources

```
        somewhere on the page.
```

        <span class="notprint">Contact me if I haven't.</span>

```
    </p>
```
```
    <p id="contact" class="notprint">
```

        You can contact me at: <a href="mailto:oskar@oskog97.com"

        rel="noopener" target="_blank">oskar@oskog97.com</a>

        <span class="a">(<a href="/pgp-pub/oskar.asc"

                            >PGP public key</a>)</span>

```
    </p>
```

    <p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">

```
            CSS Stylesheet
```
```
        </a>
```

        <a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"

            target="_blank" class="notprint"><span

            class="img">Valid HTML5</span

```
        ></a><br/>
```
```
    </p>
```
```
</div></footer>
```
```

```
```
    </body>
```
```
</html>
```
```
''')
```
```
def noindex(path):
```
```
    '''
```

    Returns True if `path` should be noindexed.

```
    
```

    `path` is an absolute **filesystem** path.

```
    '''
```
```
    def isword(w):
```

        letters = string.ascii_letters + ',.'

```
        for ch in w:
```
```
            if w not in letters:
```
```
                return False
```
```
        return True
```
```
    # 1. White list
```
```
    # 2. Black list
```

    # 3. Page quality (not applicable for directories)

```
    
```
```
    # Check whitelist first.
```
```
    for regex in conf['doindex']:
```

        if re.match(regex, path[rootlen:]) is not None:

```
            return False
```
```
            break
```
```
    
```
```
    # Blacklist (two kinds):
```
```
    # - Generated from another file.
```

    # - Explicitly blacklisted in 'read.cfg'.

    for match, replace in conf['madefrom']:

        if re.match(match, path[rootlen:]) is not None:

```
            try:
```

                os.stat(root + re.sub(match, replace, path[rootlen:]))

```
                return True
```
```
            except:
```
```
                pass
```

    for regex in conf['noindex'] + conf['hide']:

        if re.match(regex, path[rootlen:]) is not None:

```
            return True
```
```
    
```
```
    # Quality:
```
```
    #   - Text file
```

    #   - At least 3072 Unicode code points

```
    #   - At least 300 words
```
```
    #   - At least 60 lines
```

    #   - Half the limitations if a meta description and title is found

    #   - A third of the limimitations if an onpage description is found

```
    try:
```
```
        os.listdir(path)
```
```
        return False
```
```
    except:
```
```
        pass
```
```
    # Normal file.
```
```
    try:
```
```
        if sys.version_info[0] > 2:
```
```
            text = open(path).read()
```
```
        else:
```

            text = open(path).read().decode('utf-8')

```
    except:
```
```
        return True
```

    min_chars, min_words, min_lines, min_comments = 3072, 300, 60, 24

    quality = mk_description(path)[0] + 1

    min_chars //= quality; min_words //= quality

    min_lines //= quality; min_comments //= quality

```
    if len(text) < min_chars:
```
```
        return True
```

    if text.count('\n') + 1 < min_lines:

```
        return True
```
```
    n_comments = 0
```

    is_comment = re.compile('^(.*#.*| *\\* .*|.*<!--.*|.*\'\'\'.*)$')

```
    for line in text.split('\n'):
```

        if re.match(is_comment, line) is not None:

```
            n_comments += 1
```
```
    if n_comments < min_comments:
```
```
        return True
```

    if len(list(filter(isword, text.replace('\n', ' ').split(' ')))) < min_words:

```
        return True
```
```
    # Passed the quality tests:
```
```
    return False
```
```
def mk_navigation(referer, title):
```
```
    '''
```

    Returns a string which is the navigation bar's HTML.

```
    
```

    `title` is the title of the requested page.

```
    
```

    `referer` is used to **optionally** ``integrate`` a page.

    `referer` is a tuple of (URL, title) for the "back" link.

```
    '''
```
```
    if referer[0]:
```

        return htmlescape.escape('''<!-- Navigation generated by CGIread. -->

<nav><div id="navigation"><div id="nav_inner">

<p><a href="#content" class="textonly">Skip navigation</a></p>

```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="{URL}">{title}</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">]</span><span class="sub active">{me}</span><span class="textonly" translate="no">[</span>

<span class="textonly" translate="no">[</span><a class="sub" href="{my_url}?sitemap=html">Sitemap for website's scripts</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">[</span><a class="sub" href="/read/">Website's scripts</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/pages/policy.html">Privacy policy &amp; terms of use</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<hr class="textonly"/>
```
```
</div></div></nav>
```
```
''',
```
```
            URL=(2, referer[0]),
```
```
            title=(1, referer[1]),
```
```
            me=(1, title),
```
```
            my_url=(0, my_url),
```
```
        )
```
```
    else:
```
```
        return '''
```
```

```

<nav><div id="navigation"><div id="nav_inner">

<p><a href="#content" class="textonly">Skip navigation</a></p>

```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-&gt; My IBM thinkpad</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">]</span><span class="sub active">Website's scripts</span><span class="textonly" translate="no">[</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<hr class="textonly"/>
```
```
</div></div></nav>
```
```

```
```
'''
```
```
def mk_referer_param(referer):
```
```
    '''Returns one of:
```
```
        ''
```
```
        '&referer=' + referer[0]
```

        '&referer=' + referer[0] + '&title=' + referer[1]

    to be added to links from the requested page.

```
    
```

    `referer` is used to **optionally** ``integrate`` a page.

```
    See `mk_navigation`
```
```
    '''
```
```
    if referer[0]:
```
```
        if referer[1] != 'Back':
```

            title = '&title={}'.format(referer[1])

```
        else:
```
```
            title = ''
```

        return '&referer={}{}'.format(referer[0], title)

```
    else:
```
```
        return ''
```
```
def mk_description(path):
```
```
    '''
```

    Return three strings: (good, title, meta_description, onpage_description)

```
    
```

    `path` is the absolute filesystem path to the requested page.

```
    
```
```
    `good` is
```

        0       no title and description

        1       title and meta description only

        2       also an onpage description

```
    
```
```
    `title` is the title of the page.
```
```
    
```

    `meta_description` is the content of the description meta tag.

```
    
```

    `onpage_description` is HTML content for the onpage description.

```
    requested page.
```
```
    '''
```
```
    good = 0
```

    title = "source code of {}".format(path[rootlen:])

```
    meta_description = ''
```
```
    onpage_description = None
```
```
    try:
```

        content = open(path + '.info').read().split('\n')

```
        good = 1
```
```
    except:
```
```
        pass
```
```
    if good:
```
```
        title = content[0]
```
```
        try:
```
```
            sep = content.index('.')
```
```
        except ValueError:
```
```
            sep = None
```
```
        if sep is not None:
```
```
            good = 2
```

            meta_description = '\n'.join(content[1:sep])

            onpage_description = '\n'.join(content[sep+1:])

```
        else:
```

            meta_description = '\n'.join(content[1:])

```
    if onpage_description is None:
```

        onpage_description = htmlescape.escape('<p>{}</p>',1,meta_description)

    return good, title, meta_description, onpage_description

```
def sitemap(sitemap_type):
```
```
    '''
```
```
    Write out an XML or HTML sitemap.
```
```
    sitemap_type in ('xml', 'html')
```
```
    
```

    The XML sitemap will exclude entries from `conf['noxmlsitemap']`.

```
    '''    
```
```
    
```

    if os.getenv('REQUEST_METHOD') != 'HEAD': # NOTICE

        # Prevent over-revving the server.

        # HEAD requests are basically no-ops.

        maxload = conf['sitemap-maxload']

        if os.getloadavg()[0] > maxload['load-avg1']:

```
            status503()
```
```
            return
```
```
        try:
```
```
            access_times = list(map(
```

                float, open('read.throttlecontrol').read().strip().split(':')

```
            ))
```
```
        except:
```
```
            access_times = [0]
```

        if time.time() - access_times[-1] < maxload['throttle-time']:

```
            status503()
```
```
            return
```

        access_times.insert(0, time.time())

        access_times = access_times[:maxload['throttle-requests']]

        f = open('read.throttlecontrol', 'w')

        f.write(':'.join(list(map(str, access_times))) + '\n')

```
        f.close()
```

    # Write headers before doing anything else.

    # A HEAD request doesn't need to know the length (it's TE chunked).

```
    if sitemap_type == 'xml':
```

        compressout.write_h('Content-Type: application/xml; charset=UTF-8\n')

```
        compressout.write_h(
```

            'Link: <{my_url}?sitemap=html>'.format(my_url=canonical_url) +

```
            '; rel="canonical"' +
```
```
            '; type="text/html"\n'
```
```
        )
```

        compressout.write_h('X-Robots-Tag: noindex\n\n') # NOTE: last.

```
    elif sitemap_type == 'html':
```
```
        compressout.write_h(html_page)
```
```
        compressout.write_h('\n')
```
```
    else:
```

        assert False, "Neither 'xml' nor 'html'"

    if os.getenv('REQUEST_METHOD') == 'HEAD': # NOTICE

```
        return
```
```
    
```

    # Find the pages worth being in the sitemap.

    no_access = conf['noaccess'] + conf['hide'] + conf['topsecret']

```
    paths = []
```
```
    
```

    for basedir, dirs, files in os.walk(root, topdown=True):

```
        # Exclude hidden directories:
```
```
        remove_list = []
```

        sys.stderr.write('In {}\n'.format(basedir))

        sys.stderr.write('Dirs: {}\n'.format(repr(dirs)))

```
        for dirname in dirs:
```

            dirpath = os.path.join(basedir, dirname)[rootlen:]

```
            for regex in no_access:
```

                if re.match(regex, dirpath) is not None:

                    #dirs.remove(dirname)

                    # BUG: The for loop will skip items in the list if

                    # other items are removed while looping.

                    # This caused some real' nasty stuff like sshin to

                    # be crawled, took a whopping .65 seconds.

                    remove_list.append(dirname)

```
                    break
```

        sys.stderr.write('Removed dirs: {}\n'.format(repr(remove_list)))

```
        for dirname in remove_list:
```
```
            dirs.remove(dirname)
```
```
        
```
```
        # Iterate over files:
```
```
        for filename in files:
```

            filepath = os.path.join(basedir, filename)

```
            # No symlinks allowed.
```

            #if os.stat(filepath).st_mode == os.lstat(filepath).st_mode:

            if not os.path.islink(filepath):

```
                #try:
```

                    description = mk_description(filepath)

```
                    if description[0]:
```

                        # Only indexable content allowed.

                        if not noindex(filepath):

                            paths.append((filepath[rootlen:], description[3]))

```
                        else:
```

                            sys.stderr.write('{} is noindexed\n'.format(filepath))

```
                    else:
```

                        sys.stderr.write('{} has no description\n'.format(filepath))

                #except IOError as error:

                    #assert error.errno in (

                        #errno.EISDIR, errno.EACCES

```
                    #), error.errno
```
```
            else:
```

                sys.stderr.write('{} is link\n'.format(filepath))

```
    
```
```
    paths.sort(key=lambda x: x[0])
```
```
    
```
```
    # Print the body.
```
```
    if sitemap_type == 'xml':
```

        compressout.write_b('''<?xml version="1.0" encoding="UTF-8"?>

<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">

```
''')
```
```
        #
```
```
        for path, description in paths:
```

            # Loop through all the regexes:

            for regex in conf['noxmlsitemap']:

                if re.match(regex, path) is not None:

```
                    break
```
```
            else:
```

                compressout.write_b(htmlescape.escape('''<url>

    <loc>{canonical_url}?path={path}</loc>

```
    <priority>0.5</priority>
```
```
''',
```

                    canonical_url=(0, canonical_url),

```
                    path=(1, path),
```
```
                ))
```

                mod_sitemap.lastmod_changefreq(

```
                    root + path,
```
```
                    compressout,
```
```
                )
```

                compressout.write_b('</url>\n')

```
        #
```

        compressout.write_b('</urlset>\n')

```
    elif sitemap_type == 'html':
```

        compressout.write_b('''<!DOCTYPE html>

<html lang="en" xmlns="http://www.w3.org/1999/xhtml">

```
    <head>
```
```
        <meta charset="utf-8"/>
```

        <meta name="viewport" content="width=device-width, initial-scale=1"/>

        <link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>

        <link rel="icon" type="image/png" href="/favicon.png"/>

```

```

        <link rel="canonical" href="{canonical_url}?sitemap=html"/>

        <link rel="alternate" href="{canonical_url}?sitemap=xml"

```
            type="application/xml"/>
```

        <meta name="robots" content="noindex, follow"/>

        <title>Sitemap for scripts' source code</title>

        <meta name="description" content="

            Sitemap of all scripts available through /read/.

```
        "/>
```
```
    </head>
```
```
    <body>
```
```
        
```
```

```

<nav><div id="navigation"><div id="nav_inner">

<p><a href="#content" class="textonly">Skip navigation</a></p>

```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-&gt; My IBM thinkpad</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">]</span><span class="sub active">Website's scripts</span><span class="textonly" translate="no">[</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<hr class="textonly"/>
```
```
</div></div></nav>
```
```

```

        <main><div id="content" class="sitemap">

            <h1 id="title">Sitemap for scripts' source code</h1>

            <p><a href="{my_url}?path=/">Root directory</a></p>

```
            <dl>
```

'''.format(my_url=my_url, canonical_url=canonical_url))

```
        #
```
```
        indent = 16 * ' '
```
```
        for path, description in paths:
```

            compressout.write_b(indent + htmlescape.escape(

                '''<dt><a translate="no" href="{my_url}?path={path}">

```
                    {path}
```
```
                </a></dt>\n''',
```
```
                path=(0, path),
```

                my_url=(0, canonical_url),

```
            ))
```

            compressout.write_b(indent +

                htmlescape.escape('<dd>{}</dd>\n', 0, description)

```
            )
```
```
        #
```

        compressout.write_b('''            </dl>

```
        </div></main>
```
```
        
```
```

```
```
<footer><div id="footer">
```
```
    <hr class="textonly"/>
```
```
    <p>
```
```
        Copyright © Oskar Skog<br/>
```

        Website content released under the <a

        href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"

        target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license

        and my software usually under the <span class="a"><a target="_blank"

```
        rel="noopener"
```

        href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license

```
        (2-clause)</a>.</span>
```
```
        <br/>
```

        Images may be from other sites, I should have cited useful sources

```
        somewhere on the page.
```

        <span class="notprint">Contact me if I haven't.</span>

```
    </p>
```
```
    <p id="contact" class="notprint">
```

        You can contact me at: <a href="mailto:oskar@oskog97.com"

        rel="noopener" target="_blank">oskar@oskog97.com</a>

        <span class="a">(<a href="/pgp-pub/oskar.asc"

                            >PGP public key</a>)</span>

```
    </p>
```

    <p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">

```
            CSS Stylesheet
```
```
        </a>
```

        <a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"

            target="_blank" class="notprint"><span

            class="img">Valid HTML5</span

```
        ></a><br/>
```
```
    </p>
```
```
</div></footer>
```
```

```
```
    </body>
```
```
</html>
```
```
''')
```
```
    else:
```

        assert False, "Neither 'xml' nor 'html'"

```
def ls(path, referer):
```
```
    '''
```
```
    '''
```
```
    compressout.write_h(html_page)
```
```
    compressout.write_h('\n')
```

    if os.getenv('REQUEST_METHOD') == 'HEAD':

```
        return
```

    compressout.write_b('''<!DOCTYPE html>

<html lang="en" xmlns="http://www.w3.org/1999/xhtml">

```
    <head>
```
```
        <meta charset="utf-8"/>
```

        <meta name="viewport" content="width=device-width, initial-scale=1"/>

        <link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>

        <link rel="icon" type="image/png" href="/favicon.png"/>

```

```
```
''')
```

    compressout.write_b(htmlescape.escape('''

        <link rel="stylesheet" type="text/css" href="/read/style.css"/>

```
        <title>Index of {name}</title>
```

        <meta name="robots" content="{robots_follow}, noindex"/>

        <link rel="canonical" href="{canonical_url}?path={name}"/>

```
    </head>
```
```
    <body>
```
```
        {navigation}
```

        <main><div id="content" class="ls">

            <h1 id="title">Index of <span translate="no">{name}</span></h1>

```
            <p class="read-nav">
```

                {isroot_commentout_start}

                    <a href="{my_url}?path={parent_path}{referer_params}">

                        Parent directory

```
                    </a>
```
```
                {isroot_commentout_end}
```

                <a href="{my_url}?sitemap=html">CGIread sitemap</a>

                <a href="{my_url}">Main page</a>

```
            </p>
```
```
            <table id="ls">
```
```
            ''',
```

            name          =(1, path[rootlen:] + '/'),

            parent_path   =(2, '/'.join(path.split('/')[:-1])[rootlen:]+'/'),

            robots_follow =(2, 'no'*noindex(path)+'follow'),

            navigation    =(0, mk_navigation(

                                referer,

                                "Index of "+path[rootlen:]+'/'

```
                            )),
```

            referer_params=(2, mk_referer_param(referer)),

```
            my_url=(0, my_url),
```

            canonical_url=(0, canonical_url),

            isroot_commentout_start=(0, '<!--'*(path == root)),

            isroot_commentout_end=(0, '-->'*(path == root)),

```
        ))
```

    no_access = conf['noaccess'] + conf['hide'] + conf['topsecret']

```
    
```
```
    for x in sorted(os.listdir(path)):
```

        full_path = os.path.join(path, x)

```
        
```
```
        forbidden = False
```
```
        for regex in no_access:
```

            if re.match(regex, full_path[rootlen:]) is not None:

```
                forbidden = True
```
```
                break
```
```
        if forbidden:
```
```
            continue
```
```
        
```

        #url = cgi.escape(full_path, quote=True)

```
        try:
```
```
            os.listdir(full_path)
```
```
            is_dir = 1
```
```
        except:
```
```
            is_dir = 0
```
```
        # mobile_desc
```
```
        # desktop_desc
```
```
        if is_dir:
```

           mobile_desc = '<span class="yeah">-&gt;</span>'

           desktop_desc = '<span class="yeah">Directory</span>'

```
        else:
```
```
            try:
```

                content = open(full_path).read()        # This fails on Python 3 !!!

                if sys.version_info[0] == 2:

                    content.decode('UTF-8')

```
                binary = False
```
```
            except:
```
```
                binary = True
```
```
            if binary:
```
```
                desktop_desc = 'Binary'
```
```
                mobile_desc = ':-('
```
```
            else:
```

                good, title, meta_d, onpage_d = mk_description(full_path)

```
                if good == 2:
```

                    desktop_desc = htmlescape.escape(

                        '<span class="thenumberofthebeast">{}</span>',

```
                        1, meta_d
```
```
                    )
```

                    if noindex(full_path):

                        mobile_desc = '<span class="yeah">:-)</span>'

```
                    else:
```

                        mobile_desc = '<span class="thenumberofthebeast">:-D</span>'

                elif not noindex(full_path):

                    mobile_desc = '<span class="yeah">:-)</span>'

                    if compressout.debug_cookie:

                        desktop_desc = '<span class="yeah">Text; indexable</span>'

```
                    else:
```

                        desktop_desc = '<span class="yeah">Text</span>'

```
                else:
```
```
                    mobile_desc = ':-|'
```

                    if compressout.debug_cookie:

                        desktop_desc = 'Boring; unindexable'

```
                    else:
```

                        desktop_desc = 'Looks boring'

```
                    
```
```
        compressout.write_b(
```
```
            htmlescape.escape(
```

                '''<tr><td class="mobile">{mobile_desc}</td>

```
                <td><a translate="no"
```

                    href="{site}?path={path}{referer}">{text}</a></td>

                <td class="desktop">{desktop_desc}</td></tr>

```
                ''',
```
```
                site=(0, my_url),
```

                path=(2, full_path[rootlen:] + '/'*is_dir),

                referer=(2, mk_referer_param(referer)),

                text=(1, x + '/'*is_dir),

                mobile_desc=(0, mobile_desc),

                desktop_desc=(0, desktop_desc),

```
            )
```
```
        )
```

    compressout.write_b('''            <!--</p>--></table>

```
        </div></main>
```
```
        
```
```

```
```
<footer><div id="footer">
```
```
    <hr class="textonly"/>
```
```
    <p>
```
```
        Copyright © Oskar Skog<br/>
```

        Website content released under the <a

        href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"

        target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license

        and my software usually under the <span class="a"><a target="_blank"

```
        rel="noopener"
```

        href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license

```
        (2-clause)</a>.</span>
```
```
        <br/>
```

        Images may be from other sites, I should have cited useful sources

```
        somewhere on the page.
```

        <span class="notprint">Contact me if I haven't.</span>

```
    </p>
```
```
    <p id="contact" class="notprint">
```

        You can contact me at: <a href="mailto:oskar@oskog97.com"

        rel="noopener" target="_blank">oskar@oskog97.com</a>

        <span class="a">(<a href="/pgp-pub/oskar.asc"

                            >PGP public key</a>)</span>

```
    </p>
```

    <p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">

```
            CSS Stylesheet
```
```
        </a>
```

        <a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"

            target="_blank" class="notprint"><span

            class="img">Valid HTML5</span

```
        ></a><br/>
```
```
    </p>
```
```
</div></footer>
```
```

```
```
    </body>
```
```
</html>\n''')
```
```
def download(path):
```
```
    if noindex(path):
```

        compressout.write_h('X-Robots-Tag: noindex\n')

```
    else:
```

        compressout.write_h('X-Robots-Tag: index\n') # For verbosity.

```
    try:
```
```
        content = open(path).read()
```
```
        if sys.version_info[0] == 2:
```
```
            content.decode('utf-8')
```

        compressout.write_h('Content-Type: text/plain; charset=UTF-8\n')

        compressout.write_h(htmlescape.escape(

```
                'Link: <{}?path={}>',
```
```
                0, canonical_url,
```
```
                2, path[rootlen:]
```

            ) + '; rel="canonical"; type="text/html"\n'

```
        )
```
```
    except:
```

        compressout.write_h(htmlescape.escape(

            'Link: <{}?path={}>; rel="canonical"\n',

```
            0, canonical_url,
```
```
            2, path[rootlen:]
```
```
        )) # No type specified.
```
```
    if if_none_match(path):
```
```
        compressout.write_h('\n')
```

        if os.getenv('REQUEST_METHOD') != 'HEAD':

            compressout.write_b(content)

```
def cat(path, referer):
```
```
    '''
```
```
    '''
```
```
    def ol_content(text):
```
```
        out_lines = []
```
```
        ids = []
```

        allowed_chars = string.ascii_letters + '_-'

        for index, line in enumerate(text.split('\n')):

            # Create a "permanent" fragment this line.

```
            this_id = ''
```

            # Find ids in Python and XHTML

            for decltype in ('def', 'class'):

                if line.strip().startswith(decltype + ' ') and '(' in line:

                    this_id = line.split(decltype, 1)[1].split('(')[0].strip()

```
            if 'id="' in line:
```

                this_id = line.split('id="')[1].split('"')[0]

```
            # Prevent bad ids.
```
```
            for ch in this_id:
```

                if ch not in allowed_chars:

```
                    this_id = ''
```
```
                    break
```
```
            if this_id in ids:
```
```
                this_id = ''
```

            # Create the fragment identifier for the line.

```
            if this_id:
```
```
                ids.append(this_id)
```

                idline = 'id="content_{}"'.format(this_id)

```
            else:
```
```
                idline = ''
```
```
            # Create line
```

            out_lines.append(htmlescape.escape(

                    '    <li id="{}"><pre translate="no" {}>{}</pre></li>\n',

```
                    0, index + 1,
```
```
                    0, idline,
```
```
                    1, line,
```
```
            ))
```
```
        fragment_links = []
```
```
        for fragment in sorted(ids):
```
```
            fragment_links.append(
```
```
                (
```

                    '<a class="quick" href="#content_{0}" translate="no"' +

```
                    '>{0}</a>\n'
```
```
                ).format(
```
```
                    fragment
```
```
                )
```
```
            )
```

        return ''.join(out_lines), ''.join(fragment_links)

```
    
```
```
    try:
```
```
        content = open(path).read()
```
```
        if sys.version_info[0] == 2:
```
```
            content.decode('utf-8')
```
```
    except:
```
```
        if noindex(path):
```

            compressout.write_h('X-Robots-Tag: noindex\n')

```
        else:
```

            compressout.write_h('X-Robots-Tag: index\n')

```
        compressout.write_h('\n')
```
```
        compressout.write_b(content)
```
```
        return
```
```
    compressout.write_h(html_page)
```
```
    compressout.write_h('\n')
```

    if os.getenv('REQUEST_METHOD') == 'HEAD':

```
        return
```
```
    
```

    ignore, title, meta_description, p_description = mk_description(path)

    last_modified = time.strftime('%F', time.gmtime(os.stat(path).st_mtime))

```
    
```

    lines, fragment_links = ol_content(content)

```
    if not fragment_links:
```
```
        fragment_links = '(none)'
```
```
    
```

    compressout.write_b('''<!DOCTYPE html>

<html lang="en" xmlns="http://www.w3.org/1999/xhtml">

```
    <head>
```
```
        <meta charset="utf-8"/>
```

        <meta name="viewport" content="width=device-width, initial-scale=1"/>

        <link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>

        <link rel="icon" type="image/png" href="/favicon.png"/>

```

```
```
''')
```
```
    compressout.write_b('''
```
```
<script type="application/ld+json">
```
```
{
```
```
    "@context":
```
```
    {
```
```
        "@vocab": "http://schema.org/"
```
```
    },
```
```
    "@type": "SoftwareSourceCode",
```

    "license": "https://opensource.org/licenses/BSD-2-Clause",

```
    "author":
```
```
    {
```
```
    ''')
```
```
    compressout.write_b('''
```
```
        "@type": "Person",
```
```
        "@id": "https://oskog97.com/",
```
```
        "name": "{0}",
```
```
        "url": "https://oskog97.com/"
```
```
    '''.format(owner))
```
```
    compressout.write_b('''
```
```
    },
```

    "publisher": {"@id": "https://oskog97.com/"},

    "copyrightHolder": {"@id": "https://oskog97.com/"},

```
    ''')
```
```
    compressout.write_b('''
```
```
    "url": "{}#code",
```
```
    "DateModified": "{}"
```
```
    '''.format(
```

        canonical_url + '?path=' + path[rootlen:],

```
        last_modified,
```
```
    ))
```
```
    compressout.write_b('''
```
```
}
```
```
</script>
```
```
    ''')
```

    parent_link = '/'.join(path.split('/')[:-1])[rootlen:]+'/'

    compressout.write_b(htmlescape.escape('''

        <link rel="stylesheet" type="text/css" href="/read/style.css"/>

```
        <title>{title}</title>
```

        <link rel="canonical" href="{canonical}"/>

```
        <link
```
```
            rel="alternate"
```

            href="{canonical}&amp;download=yes"

```
            type="text/plain"
```
```
        />
```

        <meta name="robots" content="{noindex_no}index"/>

        <meta name="description" content="{meta_description}"/>

```
    </head>
```
```
    <body>
```
```
        {navigation}
```
```
<main><div id="content">
```

    <h1 id="title" translate="no">{title}</h1>

```
    <div id="description">
```
```
        {content_description}
```
```
    </div>
```
```
    <table>
```
```
        <tr>
```
```
            <td>Last modified</td>
```

            <td><time datetime="{last_modified}">{last_modified}</time></td>

```
        </tr>
```
```
        <tr>
```
```
            <td>Lines</td>
```
```
            <td>{linecount}</td>
```
```
        </tr>
```
```
        {begin_debug}<tr>
```
```
            <td>Indexable</td>
```
```
            <td>{indexable}</td>
```
```
        </tr>{end_debug}
```
```
    </table>
```
```
    <p class="notprint read-nav">
```

        <a href="{my_url}?path={parent_dir}">Parent directory</a>

        <a href="{my_url}?path={path}&amp;download=yes" target="_blank">Download</a>

        <a href="{my_url}?sitemap=html">CGIread sitemap</a>

        <a href="{my_url}">Main page</a>

```
    </p>
```
```
    <p class="notprint">
```
```
        Quick links:\n{fragments}
```
```
    </p>
```
```
<ol id="code">
```
```
{content}
```
```
</ol>
```
```
</div></main>
```
```
''',
```
```
        title=(2, title),
```
```
        content=(0, lines),
```

        parent_dir=(2, parent_link + mk_referer_param(referer)),

        navigation=(0, mk_navigation(referer, path[rootlen:])),

        canonical=(2, canonical_url + '?path=' + path[rootlen:]),

```
        path=(2, path[rootlen:]),
```

        noindex_no=(2, 'no' * noindex(path)),

        meta_description=(2, meta_description),

        content_description=(0, p_description),

        last_modified=(2, last_modified),

        linecount=(1, content.count('\n') + 1),

        indexable=(0, {True: 'No', False: 'Yes'}[noindex(path)]),

```
        fragments=(0, fragment_links),
```
```
        my_url=(0, my_url),
```

        begin_debug=(0,['<!--',''][compressout.debug_cookie]),

        end_debug=(0,['-->',''][compressout.debug_cookie]),

```
    ))
```
```
    compressout.write_b('''
```
```
        
```
```

```
```
<footer><div id="footer">
```
```
    <hr class="textonly"/>
```
```
    <p>
```

        Website content released under the <a

        href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"

        target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license

        and my software usually under the <span class="a"><a target="_blank"

```
        rel="noopener"
```

        href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license

```
        (2-clause)</a>.</span>
```
```
        <br/>
```

        Images may be from other sites, I should have cited useful sources

```
        somewhere on the page.
```

        <span class="notprint">Contact me if I haven't.</span>

```
    </p>
```
```
    <p id="contact" class="notprint">
```

        You can contact me at: <a href="mailto:oskar@oskog97.com"

        rel="noopener" target="_blank">oskar@oskog97.com</a>

        <span class="a">(<a href="/pgp-pub/oskar.asc"

                            >PGP public key</a>)</span>

```
    </p>
```

    <p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">

```
            CSS Stylesheet
```
```
        </a>
```

        <a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"

            target="_blank" class="notprint"><span

            class="img">Valid HTML5</span

```
        ></a><br/>
```
```
    </p>
```
```
</div></footer>
```
```

```
```
    </body>
```
```
</html>
```
```
''')
```
```
def if_none_match(path):
```
```
    '''
```

    ETag handling for `cat`, `ls` and `download`:

```
    
```
```
    
```

    Returns `True` if content needs to be generated.

    Outputs necessary headers and 304 statuses.

```
    '''
```
```
    try:
```

        meta_time = os.stat(path + '.info').st_mtime

```
    except:
```
```
        meta_time = 0
```
```
    if sys.version_info[0] > 2:
```

        query_string = os.getenv('QUERY_STRING', '').encode('utf-8')

```
    else:
```

        query_string = os.getenv('QUERY_STRING', '')

    ETag = '"{}{}-{}({})-{}-({}-{})"'.format(

        'x'*('application/xhtml+xml' in html_page),

        'z'*('gzip' in os.getenv('HTTP_ACCEPT_ENCODING', '')),

```
        os.stat(path).st_mtime,
```
```
        meta_time,
```
```
        base64.b64encode(query_string),
```
```
        os.stat('index.py').st_mtime,
```
```
        os.stat('read.cfg').st_mtime,
```
```
    )
```

    compressout.write_h('Vary: If-None-Match\n')

    compressout.write_h('ETag: {}\n'.format(ETag))

```
    compressout.write_h(
```

'''X-ETag-Synopsis: [x][z]-<f_time>(<m_time>)-<query>-(<s_time>-<c_time>)

X-ETag-Description-x: "Client accepts application/xhtml+xml"

X-ETag-Description-z: "Content-Encoding: gzip"

X-ETag-Description-f_time: "Unix last modified time for the requested file"

X-ETag-Description-m_time: "Unix last modified time for the file's metadata"

X-ETag-Description-query: "base64 encoded $QUERY_STRING"

X-ETag-Description-s_time: "Unix last modified time for '/read/index.py'"

X-ETag-Description-c_time: "Unix last modified time for '/read/read.cfg'"

```
''')
```

    if os.getenv('HTTP_IF_NONE_MATCH', '') == ETag:

        compressout.write_h('Status: 304\n\n')

```
        return False
```
```
    else:
```
```
        return True
```

def is_injection_attempt(path_param, referer_URI, referer_title):

```
    '''
```

    Various checks to see if any form of injection attempt has been

    made.  This function checks the `path`, `referer` and `title`

```
    parameters.
```
```
    
```

    Returns True if the request is an injection attempt.

```
    
```
```
    - XSS
```
```
    - URL injection
```
```
    - Spam injection
```
```
    - Restricted files access
```
```
    '''
```

    # If the path parameter contains an XSS attempt, it can't be corrected

```
    evil = False
```
```
    # Prevent attacks.
```
```
    if '..' in path_param:
```
```
        return True
```

    for var in referer_URI, referer_title:

```
        for ch in var:
```
```
            if ord(ch) < 32:
```
```
                return True
```
```
            if ch in '<>&\'"':
```
```
                return True
```

            # NOTICE: The following will limit parameters to ASCII.

```
            if ord(ch) > 126:
```
```
                return True
```
```
    # Prevent linking to Mallory.
```

    for start in ('http://', 'https://', '//', 'ftp://'):

        if referer_URI.startswith(start):

            hostname = referer_URI.split('//')[1].split('/')[0]

            if hostname not in conf['allowed-referer-hosts']:

```
                return True
```
```
            else:
```
```
                break
```
```
    else:
```
```
        if ':' in referer_URI:
```
```
            return True
```
```
    # Prevent injected spam
```

    if spammy.spammy(referer_title) or len(referer_title) > 42:

```
        return True
```
```
    # No match.
```
```
    return False
```

def handle_injection_attempt(path_param, referer_URI, referer_title):

```
    '''
```

    Decide if the injection attempt was due to innocently following

    a malicious link or due to creating one.

```
    '''
```

    # Check if the URL can be sanitized.

    if is_injection_attempt(path_param, '', ''):

        destination = 'https://en.wikipedia.org/wiki/Data_validation'

```
    else:
```

        destination = my_url + '?path=' + path_param

```
    redirect_spam(destination)
```
```
def main():
```
```
    '''
```

    `compressout.init` MUST be called before `main`

```
    and `compressout.done` after.
```
```
    '''
```
```
    # HTML vs XHTML
```
```
    global html_page
```
```
    html_page = 'Vary: Accept\n'
```

    if 'application/xhtml+xml' in os.getenv('HTTP_ACCEPT', ''):

        html_page += 'Content-Type: application/xhtml+xml; charset=UTF-8\n'

```
    else:
```

        html_page += 'Content-Type: text/html; charset=UTF-8\n'

    # Check that the method is either GET, HEAD or OPTIONS.

    if os.getenv('REQUEST_METHOD') not in ('GET', 'HEAD'):

        if os.getenv('REQUEST_METHOD') != 'OPTIONS':

            compressout.write_h('Status: 405\n')

        compressout.write_h('Allow: GET, HEAD, OPTIONS\n')

        compressout.write_h('Content-Type: text/plain\n')

```
        compressout.write_h('\n')
```

        if os.getenv('REQUEST_METHOD') != 'OPTIONS':

            compressout.write_b('Method not allowed!\n')

        compressout.write_b('Allowed methods: GET, HEAD, OPTIONS\n')

```
        return
```
```
    # Get the parameters.
```
```
    params = cgi.FieldStorage()
```

    path = path_param = params.getfirst('path', default='')

    referer_URI = params.getfirst('referer', default='')

    referer_title = params.getfirst('title', default='Back')

    referer = (referer_URI, referer_title)

    download_flag = params.getfirst('download', default='no')

    sitemap_param = params.getfirst('sitemap', default='none')

```
    
```
```
    if not os.getenv('QUERY_STRING'):
```
```
        index_page()
```
```
        return
```
```
        
```

    # Bad request, but will match the evil patterns.

```
    # Keep it before the evil stopper.
```

    if bool(path_param) and not path_param.startswith('/'):

        status400('`path` is not relative to this site. (No leading slash.)')

```
        return
```
```
    
```
```
    # Do not allow evil requests.
```
```
    allow = True
```

    # Keep things within the server root.

```
    try:
```

        path = os.path.realpath(root + path)

```
    except:
```
```
        allow = False
```

    if path != root and not path.startswith(root + '/'):

```
        allow = False
```
```
    # Stop at forbidden paths. #1/2
```
```
    for regex in conf['noaccess']:
```

        if re.match(regex, path[rootlen:]) is not None:

```
            allow = False
```
```
    
```

    # Prevent XSS, URL injection, spam injection and miscellaneous assholery.

    if is_injection_attempt(path_param, referer_URI, referer_title):

```
        allow = False
```
```
    if not allow:
```

        handle_injection_attempt(path_param, referer_URI, referer_title)

```
        return
```
```
    
```
```
    # Bad requests:
```

    if download_flag not in ('yes', 'no'):

        status400('`download` MUST be "yes", "no" or unset.')

```
        return
```

    if bool(path_param) and sitemap_param != 'none':

        status400('The `sitemap` parameter cannot be used with any other.')

```
        return
```

    if download_flag == 'yes' and bool(referer_URI):

        status400("`download=yes` can't be used with the `referer` parameter.")

```
        return
```

    if sitemap_param not in ('none', 'xml', 'html'):

        status400('`sitemap` MUST be "html", "xml" or unset.')

```
        return
```

    if download_flag == 'yes' and not bool(path_param):

        status400('Nothing to `download`. Use the `path` parameter.')

```
        return
```

    if bool(referer_URI) and not bool(path_param):

        status400('`referer` cannot be used without `path`')

```
        return
```

    if referer_title != 'Back' and not bool(referer_URI):

        status400('`referer` is not set.')

```
        return
```
```
    
```
```
    if allow:
```
```
    # Generate sitemap?
```
```
        if sitemap_param != 'none':
```
```
            sitemap(sitemap_param)
```
```
        else:
```

            # Stop at forbidden paths. #2/2

            for regex in conf['topsecret']:

                if re.match(regex, path[rootlen:]) is not None:

```
                    status404()
```
```
                    break
```
```
            else:
```
```
                # Allowed to be seen.
```
```
                try:
```
```
                    os.listdir(path)
```

                    if download_flag == 'no':

                        if if_none_match(path):

                            ls(path, referer)

```
                    else:
```

                        status400("Can't download a directory.")

```
                except OSError as e:
```

                    if e.errno == errno.ENOTDIR:

                        if download_flag == 'no':

                            if if_none_match(path):

                                cat(path, referer)

```
                        else:
```

                            # `download` sets a few headers.

                            download(path)

                    elif e.errno == errno.ENOENT:

```
                        status404()
```
```
                    else:
```

                        raise ValueError(

                            'errno must be either ENOTDIR or ENOENT'

```
                        )
```
```
if __name__ == '__main__':
```
```
    compressout.init()
```
```
    main()
```
```
    compressout.done()
```