CGI source code reader script

This is (the source of) the script that generates this very page.

Through this, you can see the source code for all the scripts on my site.

Always indexable if whitelisted
Not manually blacklisted
Not made from another file
Text file
At least 3072/1536/1024 Unicode code points
At least 300/150/100 "words"
At least 60/30/20 lines
At least 24/12/8 comments

Last modified	2022-06-08
Lines	1156

Parent directory Download CGIread sitemap Main page

Quick links: cat code content description download forms handle_injection_attempt if_none_match index_page is_injection_attempt isword ls main mk_description mk_navigation mk_referer_param navigation noindex ol_content redirect_spam sitemap syntax title

```
#!/usr/bin/python3
```
```
# -*- coding: utf-8 -*-
```
```
root = '/var/www'
```
```
owner = 'Oskar Skog'
```
```
my_url = '/read/'
```

canonical_url = 'https://__HOST__/read/'

```
html403file = '/var/www/oops/403.html'
```
```
html404file = '/var/www/oops/404.html'
```

html503file = '/var/www/oops/cgi503.html'

```
import sys
```
```
sys.path.append(root)
```
```
import cgi
```
```
import os
```
```
import errno
```
```
import compressout
```
```
import base64
```
```
import re
```
```
import time
```
```
import htmlescape
```
```
import string
```
```
import spammy
```

import sitemap as mod_sitemap  # Name conflict with already existing function.

```
import cgitb
```
```
cgitb.enable()
```
```
rootlen = len(root)
```

#html_mime = 'text/html'      # Set to XHTML later.

html_page = 'Content-Type: text/html; charset=UTF-8\n'  # Set to XHTML later.

```
conf = eval(open('read.cfg').read())
```
```
def redirect_spam(destination):
```

    '''`destination` is the URL to which assholes should be redirected.'''

    compressout.write_h('Status: 303\n')

    compressout.write_h('Location: {}\n'.format(destination))

```
    compressout.write_h('\n')
```
```
def status400(message):
```

    '''HTTP 400; `message` goes UNESCAPED inside a <pre> element.'''

    compressout.write_h('Status: 400\n')

```
    compressout.write_h(html_page)
```
```
    compressout.write_h('\n')
```
```
    compressout.write_b('''__HTML5__
```

        <title>400 - Bad Request</title>

```
    </head>
```
```
    <body>
```
```
        __NAVIGATION__
```
```
        <main><div id="content">
```

            <h1 id="title">400 - Bad Request</h1>

```
            <pre>{}</pre>
```
```
            <p>
```

                Your request can't be understood.

```
                Check the parameters.
```
```
            </p>
```

            <p><a href="/read/">Documentation for the parameters</a></p>

```
        </div></main>
```
```
'''.format(message))
```
```
    compressout.write_b('''
```
```
        __FOOTER__
```
```
    </body>
```
```
</html>''')
```
```
def status403():
```
```
    '''HTTP 403'''
```
```
    compressout.write_h(html_page)
```

    compressout.write_h('Status: 403\n\n')

    compressout.write_b(open(html403file).read())

```
def status404():
```
```
    '''HTTP 404'''
```

    compressout.write_h('Status: 404\n')

```
    compressout.write_h(html_page)
```
```
    compressout.write_h('\n')
```

    compressout.write_b(open(html404file).read())

```
def status503():
```
```
    '''
```
```
    HTTP 503
```
```
    
```

    Call this if there is too much load on the server to do something.

```
    (Used by the sitemap function.)
```
```
    '''
```

    compressout.write_h('Status: 503\n')

```
    compressout.write_h(html_page)
```

    # One factor is load avg for 1 minute, add some slop to the delay for bots.

    compressout.write_h('Retry-After: 90\n')

```
    compressout.write_h('\n')
```

    compressout.write_b(open(html503file).read())

```
def index_page():
```
```
    '''https://oskog97.com/read/'''
```
```
    # Handle 304s.
```
```
    ETag = '"{}{}{}"'.format(
```

        'x'*('application/xhtml+xml' in html_page),

        'z'*('gzip' in os.getenv('HTTP_ACCEPT_ENCODING', '')),

```
        os.stat('index.py').st_mtime,
```
```
    )
```

    compressout.write_h('Vary: If-None-Match\n')

    compressout.write_h('ETag: {}\n'.format(ETag))

```
    compressout.write_h(html_page)
```

    if os.getenv('HTTP_IF_NONE_MATCH') == ETag:

        compressout.write_h('Status: 304\n\n')

```
        return
```
```
    compressout.write_h('\n')
```

    if os.getenv('REQUEST_METHOD') == 'HEAD':

```
        return
```
```
    # Write out a static page.
```
```
    compressout.write_b('''__HTML5__
```
```
    
```

        <link rel="stylesheet" type="text/css" href="/read/style.css"/>

        <meta name="description" content="Interested in the scripts I have

        on my website? Come and take a look at them."/>

```
        __TITLE__
```
```
    </head>
```
```
    <body>
```
```
        __NAVIGATION__
```
```
        <main><div id="content">
```
```
            __H1__
```
```
    ''')
```
```
    compressout.write_b('''
```
```
            <p>
```

                Interested in the scripts I have on my website?

                Go take a look at them; start crawling the

                <a href="{0}?path=/">root directory</a> or take a look

                at the <span class="a"><a href="{0}?sitemap=html"

                >(sub)sitemap</a>.</span>

```
            </p>
```
```
            <div id="syntax">
```

                <h2>Parameter syntax</h2>

```
                <p>
```

                    Descriptions for the parameters can be found in

```
                    the request forms.
```
```
                </p>
```
```
                <ul>
```
```
                    <li>
```

                        Asterisks <q>*</q> represent a value that can be

                        (almost) anything.

```
                    </li>
```

                    <li>Square brackets <q>[]</q> represent optional.</li>

                    <li>Curly brackets <q>&#x7b;&#x7d;</q> represent mandatory.</li>

                    <li>Pipes <q>|</q> represent either or.</li>

```
                </ul>
```

                <p>There are three acceptable "sets" of parameters:</p>

```
                <ol>
```

<li><pre>{0}?sitemap=&#x7b;html|xml&#x7d;</pre></li>

<li><pre>{0}?path=*[&amp;download=yes]</pre></li>

<li><pre>{0}?path=*[&amp;referer=*[&amp;title=*]]</pre></li>

```
                </ol>
```
```
                <p>
```

                    The order of the valid parameters doesn't matter, but

                    this is the recommended/canonical order.

```
                </p>
```
```
            </div>
```
```
            <div id="forms">
```
```
                <h2>Request forms</h2>
```
```
                <p><strong>
```

                    Notice that these are three different forms.

```
                </strong></p>
```

                <form action="{0}" method="get">

```
                <h3>Sitemap</h3>
```
```
                <p>
```

                    The <code>sitemap</code> parameter can be either

                    <q><code>html</code></q>, <q><code>xml</code></q>

                    or the default <q><code>none</code></q>.

                    It can't be used together with any other parameters.

```
                </p>
```
```
                <p>
```

                    <input type="radio" name="sitemap" value="html"/>

                    Request an HTML sitemap instead of a page<br/>

                    <input type="radio" name="sitemap" value="xml"/>

                    request an XML sitemap instead of a page<br/>

                    <input type="submit"/>

```
                </p>
```
```
                </form>
```

                <form action="{0}" method="get">

```
                <h3>Page</h3>
```
```
                <p>
```

                    A page (source code of a CGI script) is selected with the

                    <code>path</code> parameter.  The value of the

                    <code>path</code> parameter is a URL relative to this

                    site, ie. an URL beginning with a single slash.

```
                </p>
```
```
                <p>
```

                    The <code>path</code> is the site-local URL to the CGI

                    script or directory you're interested in.  If you set the

                    value to <q><code>/read/index.py</code></q>, you'll get the

                    source code for this script. And if you set it to

                    <q><code>/</code></q>, you'll get a directory listing

                    of the site's root directory.

```
                </p>
```
```
                <p>
```

                    Path/URL: <input type="text" name="path" value="/"/>

                    <input type="submit"/><br/>

                    <input type="checkbox" name="download" value="yes"/>

                    Download / see it as plain text

```
                    
```
```
                </p>
```
```
                <p>
```

                    The <code>download</code> parameter can be set to either

                    <q><code>yes</code></q> or the default

                    <q><code>no</code></q>.  The download option does

                    obviously not work with directories.

```
                </p>
```
```
                </form>
```

                <form action="{0}" method="get">

                <h3>Link back to a referencing page</h3>

```
                <p>
```

                    If <code>download</code> is <q><code>no</code></q> or

                    unset and a page (not a sitemap) was requested, it is

                    possible to change the navigation to make the requested

                    page link back to a referring page.

```
                </p>
```
```
                <p>
```

                    The <code>referer</code> (yes, misspelled like the HTTP

                    Referer) parameter is the URL of the referencing page.

                    (Don't try to specify a site that isn't mine.)

                    The <code>title</code> parameter gives the back link a

                    different text than <q>Back</q>.

```
                </p>
```
```
                <table>
```
```
                    <tr>
```

                        <th><code>path</code></th>

                        <td><input type="text" name="path" value="/"/></td>

```
                    </tr>
```
```
                    <tr>
```

                        <th><code>referer</code></th>

                        <td><input type="text" name="referer"/></td>

```
                    </tr>
```
```
                    <tr>
```

                        <th><code>title</code></th>

                        <td><input type="text" name="title"/></td>

```
                    </tr>
```
```
                    <tr>
```
```
                        <td></td>
```

                        <td><input type="submit"/></td>

```
                    </tr>
```
```
                </table>
```
```
                </form>
```
```
            </div>
```
```
        </div></main>
```
```
    '''.format(my_url))
```
```
    compressout.write_b('''
```
```
        __FOOTER__
```
```
    </body>
```
```
</html>
```
```
''')
```
```
def noindex(path):
```
```
    '''
```

    Returns True if `path` should be noindexed.

```
    
```

    `path` is an absolute **filesystem** path.

```
    '''
```
```
    def isword(w):
```

        letters = string.ascii_letters + ',.'

```
        for ch in w:
```
```
            if w not in letters:
```
```
                return False
```
```
        return True
```
```
    # 1. White list
```
```
    # 2. Black list
```

    # 3. Page quality (not applicable for directories)

```
    
```
```
    # Check whitelist first.
```
```
    for regex in conf['doindex']:
```

        if re.match(regex, path[rootlen:]) is not None:

```
            return False
```
```
            break
```
```
    
```
```
    # Blacklist (two kinds):
```
```
    # - Generated from another file.
```

    # - Explicitly blacklisted in 'read.cfg'.

    for match, replace in conf['madefrom']:

        if re.match(match, path[rootlen:]) is not None:

```
            try:
```

                os.stat(root + re.sub(match, replace, path[rootlen:]))

```
                return True
```
```
            except:
```
```
                pass
```

    for regex in conf['noindex'] + conf['hide']:

        if re.match(regex, path[rootlen:]) is not None:

```
            return True
```
```
    
```
```
    # Quality:
```
```
    #   - Text file
```

    #   - At least 3072 Unicode code points

```
    #   - At least 300 words
```
```
    #   - At least 60 lines
```

    #   - Half the limitations if a meta description and title is found

    #   - A third of the limimitations if an onpage description is found

```
    try:
```
```
        os.listdir(path)
```
```
        return False
```
```
    except:
```
```
        pass
```
```
    # Normal file.
```
```
    try:
```
```
        if sys.version_info[0] > 2:
```
```
            text = open(path).read()
```
```
        else:
```

            text = open(path).read().decode('utf-8')

```
    except:
```
```
        return True
```

    min_chars, min_words, min_lines, min_comments = 3072, 300, 60, 24

    quality = mk_description(path)[0] + 1

    min_chars //= quality; min_words //= quality

    min_lines //= quality; min_comments //= quality

```
    if len(text) < min_chars:
```
```
        return True
```

    if text.count('\n') + 1 < min_lines:

```
        return True
```
```
    n_comments = 0
```

    is_comment = re.compile('^(.*#.*| *\\* .*|.*<!--.*|.*\'\'\'.*)$')

```
    for line in text.split('\n'):
```

        if re.match(is_comment, line) is not None:

```
            n_comments += 1
```
```
    if n_comments < min_comments:
```
```
        return True
```

    if len(list(filter(isword, text.replace('\n', ' ').split(' ')))) < min_words:

```
        return True
```
```
    # Passed the quality tests:
```
```
    return False
```
```
def mk_navigation(referer, title):
```
```
    '''
```

    Returns a string which is the navigation bar's HTML.

```
    
```

    `title` is the title of the requested page.

```
    
```

    `referer` is used to **optionally** ``integrate`` a page.

    `referer` is a tuple of (URL, title) for the "back" link.

```
    '''
```
```
    if referer[0]:
```

        return htmlescape.escape('''<!-- Navigation generated by CGIread. -->

<nav><div id="navigation"><div id="nav_inner">

<p><a href="#content" class="textonly">Skip navigation</a></p>

```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="{URL}">{title}</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">]</span><span class="sub active">{me}</span><span class="textonly" translate="no">[</span>

<span class="textonly" translate="no">[</span><a class="sub" href="{my_url}?sitemap=html">Sitemap for website's scripts</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<p class="row">
```

<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>

```
&gt;&gt;
```

<span class="textonly" translate="no">[</span><a class="sub" href="/read/">Website's scripts</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/pages/policy.html">Privacy policy &amp; terms of use</a><span class="textonly" translate="no">]</span>

<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>

```
</p>
```
```
<hr class="textonly"/>
```
```
</div></div></nav>
```
```
''',
```
```
            URL=(2, referer[0]),
```
```
            title=(1, referer[1]),
```
```
            me=(1, title),
```
```
            my_url=(0, my_url),
```
```
        )
```
```
    else:
```
```
        return '''__NAVIGATION__'''
```
```
def mk_referer_param(referer):
```
```
    '''Returns one of:
```
```
        ''
```
```
        '&referer=' + referer[0]
```

        '&referer=' + referer[0] + '&title=' + referer[1]

    to be added to links from the requested page.

```
    
```

    `referer` is used to **optionally** ``integrate`` a page.

```
    See `mk_navigation`
```
```
    '''
```
```
    if referer[0]:
```
```
        if referer[1] != 'Back':
```

            title = '&title={}'.format(referer[1])

```
        else:
```
```
            title = ''
```

        return '&referer={}{}'.format(referer[0], title)

```
    else:
```
```
        return ''
```
```
def mk_description(path):
```
```
    '''
```

    Return three strings: (good, title, meta_description, onpage_description)

```
    
```

    `path` is the absolute filesystem path to the requested page.

```
    
```
```
    `good` is
```

        0       no title and description

        1       title and meta description only

        2       also an onpage description

```
    
```
```
    `title` is the title of the page.
```
```
    
```

    `meta_description` is the content of the description meta tag.

```
    
```

    `onpage_description` is HTML content for the onpage description.

```
    requested page.
```
```
    '''
```
```
    good = 0
```

    title = "source code of {}".format(path[rootlen:])

```
    meta_description = ''
```
```
    onpage_description = None
```
```
    try:
```

        content = open(path + '.info').read().split('\n')

```
        good = 1
```
```
    except:
```
```
        pass
```
```
    if good:
```
```
        title = content[0]
```
```
        try:
```
```
            sep = content.index('.')
```
```
        except ValueError:
```
```
            sep = None
```
```
        if sep is not None:
```
```
            good = 2
```

            meta_description = '\n'.join(content[1:sep])

            onpage_description = '\n'.join(content[sep+1:])

```
        else:
```

            meta_description = '\n'.join(content[1:])

```
    if onpage_description is None:
```

        onpage_description = htmlescape.escape('<p>{}</p>',1,meta_description)

    return good, title, meta_description, onpage_description

```
def sitemap(sitemap_type):
```
```
    '''
```
```
    Write out an XML or HTML sitemap.
```
```
    sitemap_type in ('xml', 'html')
```
```
    
```

    The XML sitemap will exclude entries from `conf['noxmlsitemap']`.

```
    '''    
```
```
    
```

    if os.getenv('REQUEST_METHOD') != 'HEAD': # NOTICE

        # Prevent over-revving the server.

        # HEAD requests are basically no-ops.

        maxload = conf['sitemap-maxload']

        if os.getloadavg()[0] > maxload['load-avg1']:

```
            status503()
```
```
            return
```
```
        try:
```
```
            access_times = list(map(
```

                float, open('read.throttlecontrol').read().strip().split(':')

```
            ))
```
```
        except:
```
```
            access_times = [0]
```

        if time.time() - access_times[-1] < maxload['throttle-time']:

```
            status503()
```
```
            return
```

        access_times.insert(0, time.time())

        access_times = access_times[:maxload['throttle-requests']]

        f = open('read.throttlecontrol', 'w')

        f.write(':'.join(list(map(str, access_times))) + '\n')

```
        f.close()
```

    # Write headers before doing anything else.

    # A HEAD request doesn't need to know the length (it's TE chunked).

```
    if sitemap_type == 'xml':
```

        compressout.write_h('Content-Type: application/xml; charset=UTF-8\n')

```
        compressout.write_h(
```

            'Link: <{my_url}?sitemap=html>'.format(my_url=canonical_url) +

```
            '; rel="canonical"' +
```
```
            '; type="text/html"\n'
```
```
        )
```

        compressout.write_h('X-Robots-Tag: noindex\n\n') # NOTE: last.

```
    elif sitemap_type == 'html':
```
```
        compressout.write_h(html_page)
```
```
        compressout.write_h('\n')
```
```
    else:
```

        assert False, "Neither 'xml' nor 'html'"

    if os.getenv('REQUEST_METHOD') == 'HEAD': # NOTICE

```
        return
```
```
    
```

    # Find the pages worth being in the sitemap.

    no_access = conf['noaccess'] + conf['hide'] + conf['topsecret']

```
    paths = []
```
```
    
```

    for basedir, dirs, files in os.walk(root, topdown=True):

```
        # Exclude hidden directories:
```
```
        remove_list = []
```

        sys.stderr.write('In {}\n'.format(basedir))

        sys.stderr.write('Dirs: {}\n'.format(repr(dirs)))

```
        for dirname in dirs:
```

            dirpath = os.path.join(basedir, dirname)[rootlen:]

```
            for regex in no_access:
```

                if re.match(regex, dirpath) is not None:

                    #dirs.remove(dirname)

                    # BUG: The for loop will skip items in the list if

                    # other items are removed while looping.

                    # This caused some real' nasty stuff like sshin to

                    # be crawled, took a whopping .65 seconds.

                    remove_list.append(dirname)

```
                    break
```

        sys.stderr.write('Removed dirs: {}\n'.format(repr(remove_list)))

```
        for dirname in remove_list:
```
```
            dirs.remove(dirname)
```
```
        
```
```
        # Iterate over files:
```
```
        for filename in files:
```

            filepath = os.path.join(basedir, filename)

```
            # No symlinks allowed.
```

            #if os.stat(filepath).st_mode == os.lstat(filepath).st_mode:

            if not os.path.islink(filepath):

```
                #try:
```

                    description = mk_description(filepath)

```
                    if description[0]:
```

                        # Only indexable content allowed.

                        if not noindex(filepath):

                            paths.append((filepath[rootlen:], description[3]))

```
                        else:
```

                            sys.stderr.write('{} is noindexed\n'.format(filepath))

```
                    else:
```

                        sys.stderr.write('{} has no description\n'.format(filepath))

                #except IOError as error:

                    #assert error.errno in (

                        #errno.EISDIR, errno.EACCES

```
                    #), error.errno
```
```
            else:
```

                sys.stderr.write('{} is link\n'.format(filepath))

```
    
```
```
    paths.sort(key=lambda x: x[0])
```
```
    
```
```
    # Print the body.
```
```
    if sitemap_type == 'xml':
```

        compressout.write_b('''<?xml version="1.0" encoding="UTF-8"?>

<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">

```
''')
```
```
        #
```
```
        for path, description in paths:
```

            # Loop through all the regexes:

            for regex in conf['noxmlsitemap']:

                if re.match(regex, path) is not None:

```
                    break
```
```
            else:
```

                compressout.write_b(htmlescape.escape('''<url>

    <loc>{canonical_url}?path={path}</loc>

```
    <priority>0.5</priority>
```
```
''',
```

                    canonical_url=(0, canonical_url),

```
                    path=(1, path),
```
```
                ))
```

                mod_sitemap.lastmod_changefreq(

```
                    root + path,
```
```
                    compressout,
```
```
                )
```

                compressout.write_b('</url>\n')

```
        #
```

        compressout.write_b('</urlset>\n')

```
    elif sitemap_type == 'html':
```

        compressout.write_b('''__HTML5NC__

        <link rel="canonical" href="{canonical_url}?sitemap=html"/>

        <link rel="alternate" href="{canonical_url}?sitemap=xml"

```
            type="application/xml"/>
```

        <meta name="robots" content="noindex, follow"/>

        <title>Sitemap for scripts' source code</title>

        <meta name="description" content="

            Sitemap of all scripts available through /read/.

```
        "/>
```
```
    </head>
```
```
    <body>
```
```
        __NAVIGATION__
```

        <main><div id="content" class="sitemap">

            <h1 id="title">Sitemap for scripts' source code</h1>

            <p><a href="{my_url}?path=/">Root directory</a></p>

```
            <dl>
```

'''.format(my_url=my_url, canonical_url=canonical_url))

```
        #
```
```
        indent = 16 * ' '
```
```
        for path, description in paths:
```

            compressout.write_b(indent + htmlescape.escape(

                '''<dt><a translate="no" href="{my_url}?path={path}">

```
                    {path}
```
```
                </a></dt>\n''',
```
```
                path=(0, path),
```

                my_url=(0, canonical_url),

```
            ))
```

            compressout.write_b(indent +

                htmlescape.escape('<dd>{}</dd>\n', 0, description)

```
            )
```
```
        #
```

        compressout.write_b('''            </dl>

```
        </div></main>
```
```
        __FOOTER__
```
```
    </body>
```
```
</html>
```
```
''')
```
```
    else:
```

        assert False, "Neither 'xml' nor 'html'"

```
def ls(path, referer):
```
```
    '''
```
```
    '''
```
```
    compressout.write_h(html_page)
```
```
    compressout.write_h('\n')
```

    if os.getenv('REQUEST_METHOD') == 'HEAD':

```
        return
```

    compressout.write_b('''__HTML5NC__''')

    compressout.write_b(htmlescape.escape('''

        <link rel="stylesheet" type="text/css" href="/read/style.css"/>

```
        <title>Index of {name}</title>
```

        <meta name="robots" content="{robots_follow}, noindex"/>

        <link rel="canonical" href="{canonical_url}?path={name}"/>

```
    </head>
```
```
    <body>
```
```
        {navigation}
```

        <main><div id="content" class="ls">

            <h1 id="title">Index of <span translate="no">{name}</span></h1>

```
            <p class="read-nav">
```

                {isroot_commentout_start}

                    <a href="{my_url}?path={parent_path}{referer_params}">

                        Parent directory

```
                    </a>
```
```
                {isroot_commentout_end}
```

                <a href="{my_url}?sitemap=html">CGIread sitemap</a>

                <a href="{my_url}">Main page</a>

```
            </p>
```
```
            <table id="ls">
```
```
            ''',
```

            name          =(1, path[rootlen:] + '/'),

            parent_path   =(2, '/'.join(path.split('/')[:-1])[rootlen:]+'/'),

            robots_follow =(2, 'no'*noindex(path)+'follow'),

            navigation    =(0, mk_navigation(

                                referer,

                                "Index of "+path[rootlen:]+'/'

```
                            )),
```

            referer_params=(2, mk_referer_param(referer)),

```
            my_url=(0, my_url),
```

            canonical_url=(0, canonical_url),

            isroot_commentout_start=(0, '<!--'*(path == root)),

            isroot_commentout_end=(0, '-->'*(path == root)),

```
        ))
```

    no_access = conf['noaccess'] + conf['hide'] + conf['topsecret']

```
    
```
```
    for x in sorted(os.listdir(path)):
```

        full_path = os.path.join(path, x)

```
        
```
```
        forbidden = False
```
```
        for regex in no_access:
```

            if re.match(regex, full_path[rootlen:]) is not None:

```
                forbidden = True
```
```
                break
```
```
        if forbidden:
```
```
            continue
```
```
        
```

        #url = cgi.escape(full_path, quote=True)

```
        try:
```
```
            os.listdir(full_path)
```
```
            is_dir = 1
```
```
        except:
```
```
            is_dir = 0
```
```
        # mobile_desc
```
```
        # desktop_desc
```
```
        if is_dir:
```

           mobile_desc = '<span class="yeah">-&gt;</span>'

           desktop_desc = '<span class="yeah">Directory</span>'

```
        else:
```
```
            try:
```

                content = open(full_path).read()        # This fails on Python 3 !!!

                if sys.version_info[0] == 2:

                    content.decode('UTF-8')

```
                binary = False
```
```
            except:
```
```
                binary = True
```
```
            if binary:
```
```
                desktop_desc = 'Binary'
```
```
                mobile_desc = ':-('
```
```
            else:
```

                good, title, meta_d, onpage_d = mk_description(full_path)

```
                if good == 2:
```

                    desktop_desc = htmlescape.escape(

                        '<span class="thenumberofthebeast">{}</span>',

```
                        1, meta_d
```
```
                    )
```

                    if noindex(full_path):

                        mobile_desc = '<span class="yeah">:-)</span>'

```
                    else:
```

                        mobile_desc = '<span class="thenumberofthebeast">:-D</span>'

                elif not noindex(full_path):

                    mobile_desc = '<span class="yeah">:-)</span>'

                    if compressout.debug_cookie:

                        desktop_desc = '<span class="yeah">Text; indexable</span>'

```
                    else:
```

                        desktop_desc = '<span class="yeah">Text</span>'

```
                else:
```
```
                    mobile_desc = ':-|'
```

                    if compressout.debug_cookie:

                        desktop_desc = 'Boring; unindexable'

```
                    else:
```

                        desktop_desc = 'Looks boring'

```
                    
```
```
        compressout.write_b(
```
```
            htmlescape.escape(
```

                '''<tr><td class="mobile">{mobile_desc}</td>

```
                <td><a translate="no"
```

                    href="{site}?path={path}{referer}">{text}</a></td>

                <td class="desktop">{desktop_desc}</td></tr>

```
                ''',
```
```
                site=(0, my_url),
```

                path=(2, full_path[rootlen:] + '/'*is_dir),

                referer=(2, mk_referer_param(referer)),

                text=(1, x + '/'*is_dir),

                mobile_desc=(0, mobile_desc),

                desktop_desc=(0, desktop_desc),

```
            )
```
```
        )
```

    compressout.write_b('''            <!--</p>--></table>

```
        </div></main>
```
```
        __FOOTER__
```
```
    </body>
```
```
</html>\n''')
```
```
def download(path):
```
```
    if noindex(path):
```

        compressout.write_h('X-Robots-Tag: noindex\n')

```
    else:
```

        compressout.write_h('X-Robots-Tag: index\n') # For verbosity.

```
    try:
```
```
        content = open(path).read()
```
```
        if sys.version_info[0] == 2:
```
```
            content.decode('utf-8')
```

        compressout.write_h('Content-Type: text/plain; charset=UTF-8\n')

        compressout.write_h(htmlescape.escape(

```
                'Link: <{}?path={}>',
```
```
                0, canonical_url,
```
```
                2, path[rootlen:]
```

            ) + '; rel="canonical"; type="text/html"\n'

```
        )
```
```
    except:
```

        compressout.write_h(htmlescape.escape(

            'Link: <{}?path={}>; rel="canonical"\n',

```
            0, canonical_url,
```
```
            2, path[rootlen:]
```
```
        )) # No type specified.
```
```
    if if_none_match(path):
```
```
        compressout.write_h('\n')
```

        if os.getenv('REQUEST_METHOD') != 'HEAD':

            compressout.write_b(content)

```
def cat(path, referer):
```
```
    '''
```
```
    '''
```
```
    def ol_content(text):
```
```
        out_lines = []
```
```
        ids = []
```

        allowed_chars = string.ascii_letters + '_-'

        for index, line in enumerate(text.split('\n')):

            # Create a "permanent" fragment this line.

```
            this_id = ''
```

            # Find ids in Python and XHTML

            for decltype in ('def', 'class'):

                if line.strip().startswith(decltype + ' ') and '(' in line:

                    this_id = line.split(decltype, 1)[1].split('(')[0].strip()

```
            if 'id="' in line:
```

                this_id = line.split('id="')[1].split('"')[0]

```
            # Prevent bad ids.
```
```
            for ch in this_id:
```

                if ch not in allowed_chars:

```
                    this_id = ''
```
```
                    break
```
```
            if this_id in ids:
```
```
                this_id = ''
```

            # Create the fragment identifier for the line.

```
            if this_id:
```
```
                ids.append(this_id)
```

                idline = 'id="content_{}"'.format(this_id)

```
            else:
```
```
                idline = ''
```
```
            # Create line
```

            out_lines.append(htmlescape.escape(

                    '    <li id="{}"><pre translate="no" {}>{}</pre></li>\n',

```
                    0, index + 1,
```
```
                    0, idline,
```
```
                    1, line,
```
```
            ))
```
```
        fragment_links = []
```
```
        for fragment in sorted(ids):
```
```
            fragment_links.append(
```
```
                (
```

                    '<a class="quick" href="#content_{0}" translate="no"' +

```
                    '>{0}</a>\n'
```
```
                ).format(
```
```
                    fragment
```
```
                )
```
```
            )
```

        return ''.join(out_lines), ''.join(fragment_links)

```
    
```
```
    try:
```
```
        content = open(path).read()
```
```
        if sys.version_info[0] == 2:
```
```
            content.decode('utf-8')
```
```
    except:
```
```
        if noindex(path):
```

            compressout.write_h('X-Robots-Tag: noindex\n')

```
        else:
```

            compressout.write_h('X-Robots-Tag: index\n')

```
        compressout.write_h('\n')
```
```
        compressout.write_b(content)
```
```
        return
```
```
    compressout.write_h(html_page)
```
```
    compressout.write_h('\n')
```

    if os.getenv('REQUEST_METHOD') == 'HEAD':

```
        return
```
```
    
```

    ignore, title, meta_description, p_description = mk_description(path)

    last_modified = time.strftime('%F', time.gmtime(os.stat(path).st_mtime))

```
    
```

    lines, fragment_links = ol_content(content)

```
    if not fragment_links:
```
```
        fragment_links = '(none)'
```
```
    
```

    compressout.write_b('''__HTML5NC__''')

```
    compressout.write_b('''
```
```
<script type="application/ld+json">
```
```
{
```
```
    "@context":
```
```
    {
```
```
        "@vocab": "http://schema.org/"
```
```
    },
```
```
    "@type": "SoftwareSourceCode",
```

    "license": "https://opensource.org/licenses/BSD-2-Clause",

```
    "author":
```
```
    {
```
```
    ''')
```
```
    compressout.write_b('''
```
```
        "@type": "Person",
```
```
        "@id": "__SITE__/",
```
```
        "name": "{0}",
```
```
        "url": "__SITE__/"
```
```
    '''.format(owner))
```
```
    compressout.write_b('''
```
```
    },
```
```
    "publisher": {"@id": "__SITE__/"},
```

    "copyrightHolder": {"@id": "__SITE__/"},

```
    ''')
```
```
    compressout.write_b('''
```
```
    "url": "{}#code",
```
```
    "DateModified": "{}"
```
```
    '''.format(
```

        canonical_url + '?path=' + path[rootlen:],

```
        last_modified,
```
```
    ))
```
```
    compressout.write_b('''
```
```
}
```
```
</script>
```
```
    ''')
```

    parent_link = '/'.join(path.split('/')[:-1])[rootlen:]+'/'

    compressout.write_b(htmlescape.escape('''

        <link rel="stylesheet" type="text/css" href="/read/style.css"/>

```
        <title>{title}</title>
```

        <link rel="canonical" href="{canonical}"/>

```
        <link
```
```
            rel="alternate"
```

            href="{canonical}&amp;download=yes"

```
            type="text/plain"
```
```
        />
```

        <meta name="robots" content="{noindex_no}index"/>

        <meta name="description" content="{meta_description}"/>

```
    </head>
```
```
    <body>
```
```
        {navigation}
```
```
<main><div id="content">
```

    <h1 id="title" translate="no">{title}</h1>

```
    <div id="description">
```
```
        {content_description}
```
```
    </div>
```
```
    <table>
```
```
        <tr>
```
```
            <td>Last modified</td>
```

            <td><time datetime="{last_modified}">{last_modified}</time></td>

```
        </tr>
```
```
        <tr>
```
```
            <td>Lines</td>
```
```
            <td>{linecount}</td>
```
```
        </tr>
```
```
        {begin_debug}<tr>
```
```
            <td>Indexable</td>
```
```
            <td>{indexable}</td>
```
```
        </tr>{end_debug}
```
```
    </table>
```
```
    <p class="notprint read-nav">
```

        <a href="{my_url}?path={parent_dir}">Parent directory</a>

        <a href="{my_url}?path={path}&amp;download=yes" target="_blank">Download</a>

        <a href="{my_url}?sitemap=html">CGIread sitemap</a>

        <a href="{my_url}">Main page</a>

```
    </p>
```
```
    <p class="notprint">
```
```
        Quick links:\n{fragments}
```
```
    </p>
```
```
<ol id="code">
```
```
{content}
```
```
</ol>
```
```
</div></main>
```
```
''',
```
```
        title=(2, title),
```
```
        content=(0, lines),
```

        parent_dir=(2, parent_link + mk_referer_param(referer)),

        navigation=(0, mk_navigation(referer, path[rootlen:])),

        canonical=(2, canonical_url + '?path=' + path[rootlen:]),

```
        path=(2, path[rootlen:]),
```

        noindex_no=(2, 'no' * noindex(path)),

        meta_description=(2, meta_description),

        content_description=(0, p_description),

        last_modified=(2, last_modified),

        linecount=(1, content.count('\n') + 1),

        indexable=(0, {True: 'No', False: 'Yes'}[noindex(path)]),

```
        fragments=(0, fragment_links),
```
```
        my_url=(0, my_url),
```

        begin_debug=(0,['<!--',''][compressout.debug_cookie]),

        end_debug=(0,['-->',''][compressout.debug_cookie]),

```
    ))
```
```
    compressout.write_b('''
```
```
        __FOOTER__
```
```
    </body>
```
```
</html>
```
```
''')
```
```
def if_none_match(path):
```
```
    '''
```

    ETag handling for `cat`, `ls` and `download`:

```
    
```
```
    
```

    Returns `True` if content needs to be generated.

    Outputs necessary headers and 304 statuses.

```
    '''
```
```
    try:
```

        meta_time = os.stat(path + '.info').st_mtime

```
    except:
```
```
        meta_time = 0
```
```
    if sys.version_info[0] > 2:
```

        query_string = os.getenv('QUERY_STRING', '').encode('utf-8')

```
    else:
```

        query_string = os.getenv('QUERY_STRING', '')

    ETag = '"{}{}-{}({})-{}-({}-{})"'.format(

        'x'*('application/xhtml+xml' in html_page),

        'z'*('gzip' in os.getenv('HTTP_ACCEPT_ENCODING', '')),

```
        os.stat(path).st_mtime,
```
```
        meta_time,
```
```
        base64.b64encode(query_string),
```
```
        os.stat('index.py').st_mtime,
```
```
        os.stat('read.cfg').st_mtime,
```
```
    )
```

    compressout.write_h('Vary: If-None-Match\n')

    compressout.write_h('ETag: {}\n'.format(ETag))

```
    compressout.write_h(
```

'''X-ETag-Synopsis: [x][z]-<f_time>(<m_time>)-<query>-(<s_time>-<c_time>)

X-ETag-Description-x: "Client accepts application/xhtml+xml"

X-ETag-Description-z: "Content-Encoding: gzip"

X-ETag-Description-f_time: "Unix last modified time for the requested file"

X-ETag-Description-m_time: "Unix last modified time for the file's metadata"

X-ETag-Description-query: "base64 encoded $QUERY_STRING"

X-ETag-Description-s_time: "Unix last modified time for '/read/index.py'"

X-ETag-Description-c_time: "Unix last modified time for '/read/read.cfg'"

```
''')
```

    if os.getenv('HTTP_IF_NONE_MATCH', '') == ETag:

        compressout.write_h('Status: 304\n\n')

```
        return False
```
```
    else:
```
```
        return True
```

def is_injection_attempt(path_param, referer_URI, referer_title):

```
    '''
```

    Various checks to see if any form of injection attempt has been

    made.  This function checks the `path`, `referer` and `title`

```
    parameters.
```
```
    
```

    Returns True if the request is an injection attempt.

```
    
```
```
    - XSS
```
```
    - URL injection
```
```
    - Spam injection
```
```
    - Restricted files access
```
```
    '''
```

    # If the path parameter contains an XSS attempt, it can't be corrected

```
    evil = False
```
```
    # Prevent attacks.
```
```
    if '..' in path_param:
```
```
        return True
```

    for var in referer_URI, referer_title:

```
        for ch in var:
```
```
            if ord(ch) < 32:
```
```
                return True
```
```
            if ch in '<>&\'"':
```
```
                return True
```

            # NOTICE: The following will limit parameters to ASCII.

```
            if ord(ch) > 126:
```
```
                return True
```
```
    # Prevent linking to Mallory.
```

    for start in ('http://', 'https://', '//', 'ftp://'):

        if referer_URI.startswith(start):

            hostname = referer_URI.split('//')[1].split('/')[0]

            if hostname not in conf['allowed-referer-hosts']:

```
                return True
```
```
            else:
```
```
                break
```
```
    else:
```
```
        if ':' in referer_URI:
```
```
            return True
```
```
    # Prevent injected spam
```

    if spammy.spammy(referer_title) or len(referer_title) > 42:

```
        return True
```
```
    # No match.
```
```
    return False
```

def handle_injection_attempt(path_param, referer_URI, referer_title):

```
    '''
```

    Decide if the injection attempt was due to innocently following

    a malicious link or due to creating one.

```
    '''
```

    # Check if the URL can be sanitized.

    if is_injection_attempt(path_param, '', ''):

        destination = 'https://en.wikipedia.org/wiki/Data_validation'

```
    else:
```

        destination = my_url + '?path=' + path_param

```
    redirect_spam(destination)
```
```
def main():
```
```
    '''
```

    `compressout.init` MUST be called before `main`

```
    and `compressout.done` after.
```
```
    '''
```
```
    # HTML vs XHTML
```
```
    global html_page
```
```
    html_page = 'Vary: Accept\n'
```

    if 'application/xhtml+xml' in os.getenv('HTTP_ACCEPT', ''):

        html_page += 'Content-Type: application/xhtml+xml; charset=UTF-8\n'

```
    else:
```

        html_page += 'Content-Type: text/html; charset=UTF-8\n'

    # Check that the method is either GET, HEAD or OPTIONS.

    if os.getenv('REQUEST_METHOD') not in ('GET', 'HEAD'):

        if os.getenv('REQUEST_METHOD') != 'OPTIONS':

            compressout.write_h('Status: 405\n')

        compressout.write_h('Allow: GET, HEAD, OPTIONS\n')

        compressout.write_h('Content-Type: text/plain\n')

```
        compressout.write_h('\n')
```

        if os.getenv('REQUEST_METHOD') != 'OPTIONS':

            compressout.write_b('Method not allowed!\n')

        compressout.write_b('Allowed methods: GET, HEAD, OPTIONS\n')

```
        return
```
```
    # Get the parameters.
```
```
    params = cgi.FieldStorage()
```

    path = path_param = params.getfirst('path', default='')

    referer_URI = params.getfirst('referer', default='')

    referer_title = params.getfirst('title', default='Back')

    referer = (referer_URI, referer_title)

    download_flag = params.getfirst('download', default='no')

    sitemap_param = params.getfirst('sitemap', default='none')

```
    
```
```
    if not os.getenv('QUERY_STRING'):
```
```
        index_page()
```
```
        return
```
```
        
```

    # Bad request, but will match the evil patterns.

```
    # Keep it before the evil stopper.
```

    if bool(path_param) and not path_param.startswith('/'):

        status400('`path` is not relative to this site. (No leading slash.)')

```
        return
```
```
    
```
```
    # Do not allow evil requests.
```
```
    allow = True
```

    # Keep things within the server root.

```
    try:
```

        path = os.path.realpath(root + path)

```
    except:
```
```
        allow = False
```

    if path != root and not path.startswith(root + '/'):

```
        allow = False
```
```
    # Stop at forbidden paths. #1/2
```
```
    for regex in conf['noaccess']:
```

        if re.match(regex, path[rootlen:]) is not None:

```
            allow = False
```
```
    
```

    # Prevent XSS, URL injection, spam injection and miscellaneous assholery.

    if is_injection_attempt(path_param, referer_URI, referer_title):

```
        allow = False
```
```
    if not allow:
```

        handle_injection_attempt(path_param, referer_URI, referer_title)

```
        return
```
```
    
```
```
    # Bad requests:
```

    if download_flag not in ('yes', 'no'):

        status400('`download` MUST be "yes", "no" or unset.')

```
        return
```

    if bool(path_param) and sitemap_param != 'none':

        status400('The `sitemap` parameter cannot be used with any other.')

```
        return
```

    if download_flag == 'yes' and bool(referer_URI):

        status400("`download=yes` can't be used with the `referer` parameter.")

```
        return
```

    if sitemap_param not in ('none', 'xml', 'html'):

        status400('`sitemap` MUST be "html", "xml" or unset.')

```
        return
```

    if download_flag == 'yes' and not bool(path_param):

        status400('Nothing to `download`. Use the `path` parameter.')

```
        return
```

    if bool(referer_URI) and not bool(path_param):

        status400('`referer` cannot be used without `path`')

```
        return
```

    if referer_title != 'Back' and not bool(referer_URI):

        status400('`referer` is not set.')

```
        return
```
```
    
```
```
    if allow:
```
```
    # Generate sitemap?
```
```
        if sitemap_param != 'none':
```
```
            sitemap(sitemap_param)
```
```
        else:
```

            # Stop at forbidden paths. #2/2

            for regex in conf['topsecret']:

                if re.match(regex, path[rootlen:]) is not None:

```
                    status404()
```
```
                    break
```
```
            else:
```
```
                # Allowed to be seen.
```
```
                try:
```
```
                    os.listdir(path)
```

                    if download_flag == 'no':

                        if if_none_match(path):

                            ls(path, referer)

```
                    else:
```

                        status400("Can't download a directory.")

```
                except OSError as e:
```

                    if e.errno == errno.ENOTDIR:

                        if download_flag == 'no':

                            if if_none_match(path):

                                cat(path, referer)

```
                        else:
```

                            # `download` sets a few headers.

                            download(path)

                    elif e.errno == errno.ENOENT:

```
                        status404()
```
```
                    else:
```

                        raise ValueError(

                            'errno must be either ENOTDIR or ENOENT'

```
                        )
```
```
if __name__ == '__main__':
```
```
    compressout.init()
```
```
    main()
```
```
    compressout.done()
```