source code of /read/index.py
Last modified | |
Lines | 1469 |
Parent directory Download CGIread sitemap Main page
Quick links: cat code contact content description download footer forms handle_injection_attempt if_none_match index_page is_injection_attempt isword ls main mk_description mk_navigation mk_referer_param navigation noindex ol_content redirect_spam sitemap syntax title
#!/usr/bin/python3
# -*- coding: utf-8 -*-
root = '/var/www'
owner = 'Oskar Skog'
my_url = '/read/'
canonical_url = 'https://oskog97.com/read/'
html403file = '/var/www/oops/403.html'
html404file = '/var/www/oops/404.html'
html503file = '/var/www/oops/cgi503.html'
import sys
sys.path.append(root)
import cgi
import os
import errno
import compressout
import base64
import re
import time
import htmlescape
import string
import spammy
import sitemap as mod_sitemap # Name conflict with already existing function.
import cgitb
cgitb.enable()
rootlen = len(root)
#html_mime = 'text/html' # Set to XHTML later.
html_page = 'Content-Type: text/html; charset=UTF-8\n' # Set to XHTML later.
conf = eval(open('read.cfg').read())
def redirect_spam(destination):
'''`destination` is the URL to which assholes should be redirected.'''
compressout.write_h('Status: 303\n')
compressout.write_h('Location: {}\n'.format(destination))
compressout.write_h('\n')
def status400(message):
'''HTTP 400; `message` goes UNESCAPED inside a <pre> element.'''
compressout.write_h('Status: 400\n')
compressout.write_h(html_page)
compressout.write_h('\n')
compressout.write_b('''<!DOCTYPE html>
<html lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>
<link rel="icon" type="image/png" href="/favicon.png"/>
<link rel="canonical" href="https://oskog97.com/read/"/>
<!-- End html5 macro. -->
<title>400 - Bad Request</title>
</head>
<body>
<!-- BEGIN autogenerated navigation -->
<nav><div id="navigation"><div id="nav_inner">
<p><a href="#content" class="textonly">Skip navigation</a></p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-> My IBM thinkpad</a><span class="textonly" translate="no">]</span>
</p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">]</span><span class="sub active">Website's scripts</span><span class="textonly" translate="no">[</span>
</p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>
</p>
<hr class="textonly"/>
</div></div></nav>
<!-- END autogenerated navigation -->
<main><div id="content">
<h1 id="title">400 - Bad Request</h1>
<pre>{}</pre>
<p>
Your request can't be understood.
Check the parameters.
</p>
<p><a href="/read/">Documentation for the parameters</a></p>
</div></main>
'''.format(message))
compressout.write_b('''
<!-- INCLUDED FOOTER -->
<footer><div id="footer">
<hr class="textonly"/>
<p>
Copyright © Oskar Skog<br/>
Website content released under the <a
href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"
target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license
and my software usually under the <span class="a"><a target="_blank"
rel="noopener"
href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license
(2-clause)</a>.</span>
<br/>
Images may be from other sites, I should have cited useful sources
somewhere on the page.
<span class="notprint">Contact me if I haven't.</span>
</p>
<p id="contact" class="notprint">
You can contact me at: <a href="mailto:oskar@oskog97.com"
rel="noopener" target="_blank">oskar@oskog97.com</a>
<span class="a">(<a href="/pgp-pub/oskar.asc"
>PGP public key</a>)</span>
</p>
<p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">
CSS Stylesheet
</a>
<a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"
target="_blank" class="notprint"><span
class="img">Valid HTML5</span
></a><br/>
</p>
</div></footer>
<!-- END OF INCLUDED FOOTER -->
</body>
</html>''')
def status403():
'''HTTP 403'''
compressout.write_h(html_page)
compressout.write_h('Status: 403\n\n')
compressout.write_b(open(html403file).read())
def status404():
'''HTTP 404'''
compressout.write_h('Status: 404\n')
compressout.write_h(html_page)
compressout.write_h('\n')
compressout.write_b(open(html404file).read())
def status503():
'''
HTTP 503
Call this if there is too much load on the server to do something.
(Used by the sitemap function.)
'''
compressout.write_h('Status: 503\n')
compressout.write_h(html_page)
# One factor is load avg for 1 minute, add some slop to the delay for bots.
compressout.write_h('Retry-After: 90\n')
compressout.write_h('\n')
compressout.write_b(open(html503file).read())
def index_page():
'''https://oskog97.com/read/'''
# Handle 304s.
ETag = '"{}{}{}"'.format(
'x'*('application/xhtml+xml' in html_page),
'z'*('gzip' in os.getenv('HTTP_ACCEPT_ENCODING', '')),
os.stat('index.py').st_mtime,
)
compressout.write_h('Vary: If-None-Match\n')
compressout.write_h('ETag: {}\n'.format(ETag))
compressout.write_h(html_page)
if os.getenv('HTTP_IF_NONE_MATCH') == ETag:
compressout.write_h('Status: 304\n\n')
return
compressout.write_h('\n')
if os.getenv('REQUEST_METHOD') == 'HEAD':
return
# Write out a static page.
compressout.write_b('''<!DOCTYPE html>
<html lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>
<link rel="icon" type="image/png" href="/favicon.png"/>
<link rel="canonical" href="https://oskog97.com/read/"/>
<!-- End html5 macro. -->
<!-- With canonical link tag. -->
<link rel="stylesheet" type="text/css" href="/read/style.css"/>
<meta name="description" content="Interested in the scripts I have
on my website? Come and take a look at them."/>
<title>Website's scripts</title>
</head>
<body>
<!-- BEGIN autogenerated navigation -->
<nav><div id="navigation"><div id="nav_inner">
<p><a href="#content" class="textonly">Skip navigation</a></p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-> My IBM thinkpad</a><span class="textonly" translate="no">]</span>
</p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">]</span><span class="sub active">Website's scripts</span><span class="textonly" translate="no">[</span>
</p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>
</p>
<hr class="textonly"/>
</div></div></nav>
<!-- END autogenerated navigation -->
<main><div id="content">
<h1 id="title">Website's scripts</h1>
''')
compressout.write_b('''
<p>
Interested in the scripts I have on my website?
Go take a look at them; start crawling the
<a href="{0}?path=/">root directory</a> or take a look
at the <span class="a"><a href="{0}?sitemap=html"
>(sub)sitemap</a>.</span>
</p>
<div id="syntax">
<h2>Parameter syntax</h2>
<p>
Descriptions for the parameters can be found in
the request forms.
</p>
<ul>
<li>
Asterisks <q>*</q> represent a value that can be
(almost) anything.
</li>
<li>Square brackets <q>[]</q> represent optional.</li>
<li>Curly brackets <q>{}</q> represent mandatory.</li>
<li>Pipes <q>|</q> represent either or.</li>
</ul>
<p>There are three acceptable "sets" of parameters:</p>
<ol>
<li><pre>{0}?sitemap={html|xml}</pre></li>
<li><pre>{0}?path=*[&download=yes]</pre></li>
<li><pre>{0}?path=*[&referer=*[&title=*]]</pre></li>
</ol>
<p>
The order of the valid parameters doesn't matter, but
this is the recommended/canonical order.
</p>
</div>
<div id="forms">
<h2>Request forms</h2>
<p><strong>
Notice that these are three different forms.
</strong></p>
<form action="{0}" method="get">
<h3>Sitemap</h3>
<p>
The <code>sitemap</code> parameter can be either
<q><code>html</code></q>, <q><code>xml</code></q>
or the default <q><code>none</code></q>.
It can't be used together with any other parameters.
</p>
<p>
<input type="radio" name="sitemap" value="html"/>
Request an HTML sitemap instead of a page<br/>
<input type="radio" name="sitemap" value="xml"/>
request an XML sitemap instead of a page<br/>
<input type="submit"/>
</p>
</form>
<form action="{0}" method="get">
<h3>Page</h3>
<p>
A page (source code of a CGI script) is selected with the
<code>path</code> parameter. The value of the
<code>path</code> parameter is a URL relative to this
site, ie. an URL beginning with a single slash.
</p>
<p>
The <code>path</code> is the site-local URL to the CGI
script or directory you're interested in. If you set the
value to <q><code>/read/index.py</code></q>, you'll get the
source code for this script. And if you set it to
<q><code>/</code></q>, you'll get a directory listing
of the site's root directory.
</p>
<p>
Path/URL: <input type="text" name="path" value="/"/>
<input type="submit"/><br/>
<input type="checkbox" name="download" value="yes"/>
Download / see it as plain text
</p>
<p>
The <code>download</code> parameter can be set to either
<q><code>yes</code></q> or the default
<q><code>no</code></q>. The download option does
obviously not work with directories.
</p>
</form>
<form action="{0}" method="get">
<h3>Link back to a referencing page</h3>
<p>
If <code>download</code> is <q><code>no</code></q> or
unset and a page (not a sitemap) was requested, it is
possible to change the navigation to make the requested
page link back to a referring page.
</p>
<p>
The <code>referer</code> (yes, misspelled like the HTTP
Referer) parameter is the URL of the referencing page.
(Don't try to specify a site that isn't mine.)
The <code>title</code> parameter gives the back link a
different text than <q>Back</q>.
</p>
<table>
<tr>
<th><code>path</code></th>
<td><input type="text" name="path" value="/"/></td>
</tr>
<tr>
<th><code>referer</code></th>
<td><input type="text" name="referer"/></td>
</tr>
<tr>
<th><code>title</code></th>
<td><input type="text" name="title"/></td>
</tr>
<tr>
<td></td>
<td><input type="submit"/></td>
</tr>
</table>
</form>
</div>
</div></main>
'''.format(my_url))
compressout.write_b('''
<!-- INCLUDED FOOTER -->
<footer><div id="footer">
<hr class="textonly"/>
<p>
Copyright © Oskar Skog<br/>
Website content released under the <a
href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"
target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license
and my software usually under the <span class="a"><a target="_blank"
rel="noopener"
href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license
(2-clause)</a>.</span>
<br/>
Images may be from other sites, I should have cited useful sources
somewhere on the page.
<span class="notprint">Contact me if I haven't.</span>
</p>
<p id="contact" class="notprint">
You can contact me at: <a href="mailto:oskar@oskog97.com"
rel="noopener" target="_blank">oskar@oskog97.com</a>
<span class="a">(<a href="/pgp-pub/oskar.asc"
>PGP public key</a>)</span>
</p>
<p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">
CSS Stylesheet
</a>
<a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"
target="_blank" class="notprint"><span
class="img">Valid HTML5</span
></a><br/>
</p>
</div></footer>
<!-- END OF INCLUDED FOOTER -->
</body>
</html>
''')
def noindex(path):
'''
Returns True if `path` should be noindexed.
`path` is an absolute **filesystem** path.
'''
def isword(w):
letters = string.ascii_letters + ',.'
for ch in w:
if w not in letters:
return False
return True
# 1. White list
# 2. Black list
# 3. Page quality (not applicable for directories)
# Check whitelist first.
for regex in conf['doindex']:
if re.match(regex, path[rootlen:]) is not None:
return False
break
# Blacklist (two kinds):
# - Generated from another file.
# - Explicitly blacklisted in 'read.cfg'.
for match, replace in conf['madefrom']:
if re.match(match, path[rootlen:]) is not None:
try:
os.stat(root + re.sub(match, replace, path[rootlen:]))
return True
except:
pass
for regex in conf['noindex'] + conf['hide']:
if re.match(regex, path[rootlen:]) is not None:
return True
# Quality:
# - Text file
# - At least 3072 Unicode code points
# - At least 300 words
# - At least 60 lines
# - Half the limitations if a meta description and title is found
# - A third of the limimitations if an onpage description is found
try:
os.listdir(path)
return False
except:
pass
# Normal file.
try:
if sys.version_info[0] > 2:
text = open(path).read()
else:
text = open(path).read().decode('utf-8')
except:
return True
min_chars, min_words, min_lines, min_comments = 3072, 300, 60, 24
quality = mk_description(path)[0] + 1
min_chars //= quality; min_words //= quality
min_lines //= quality; min_comments //= quality
if len(text) < min_chars:
return True
if text.count('\n') + 1 < min_lines:
return True
n_comments = 0
is_comment = re.compile('^(.*#.*| *\\* .*|.*<!--.*|.*\'\'\'.*)$')
for line in text.split('\n'):
if re.match(is_comment, line) is not None:
n_comments += 1
if n_comments < min_comments:
return True
if len(list(filter(isword, text.replace('\n', ' ').split(' ')))) < min_words:
return True
# Passed the quality tests:
return False
def mk_navigation(referer, title):
'''
Returns a string which is the navigation bar's HTML.
`title` is the title of the requested page.
`referer` is used to **optionally** ``integrate`` a page.
`referer` is a tuple of (URL, title) for the "back" link.
'''
if referer[0]:
return htmlescape.escape('''<!-- Navigation generated by CGIread. -->
<nav><div id="navigation"><div id="nav_inner">
<p><a href="#content" class="textonly">Skip navigation</a></p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="{URL}">{title}</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">]</span><span class="sub active">{me}</span><span class="textonly" translate="no">[</span>
<span class="textonly" translate="no">[</span><a class="sub" href="{my_url}?sitemap=html">Sitemap for website's scripts</a><span class="textonly" translate="no">]</span>
</p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">[</span><a class="sub" href="/read/">Website's scripts</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/pages/policy.html">Privacy policy & terms of use</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>
</p>
<hr class="textonly"/>
</div></div></nav>
<!-- End of navigation. -->''',
URL=(2, referer[0]),
title=(1, referer[1]),
me=(1, title),
my_url=(0, my_url),
)
else:
return '''
<!-- BEGIN autogenerated navigation -->
<nav><div id="navigation"><div id="nav_inner">
<p><a href="#content" class="textonly">Skip navigation</a></p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-> My IBM thinkpad</a><span class="textonly" translate="no">]</span>
</p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">]</span><span class="sub active">Website's scripts</span><span class="textonly" translate="no">[</span>
</p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>
</p>
<hr class="textonly"/>
</div></div></nav>
<!-- END autogenerated navigation -->
'''
def mk_referer_param(referer):
'''Returns one of:
''
'&referer=' + referer[0]
'&referer=' + referer[0] + '&title=' + referer[1]
to be added to links from the requested page.
`referer` is used to **optionally** ``integrate`` a page.
See `mk_navigation`
'''
if referer[0]:
if referer[1] != 'Back':
title = '&title={}'.format(referer[1])
else:
title = ''
return '&referer={}{}'.format(referer[0], title)
else:
return ''
def mk_description(path):
'''
Return three strings: (good, title, meta_description, onpage_description)
`path` is the absolute filesystem path to the requested page.
`good` is
0 no title and description
1 title and meta description only
2 also an onpage description
`title` is the title of the page.
`meta_description` is the content of the description meta tag.
`onpage_description` is HTML content for the onpage description.
requested page.
'''
good = 0
title = "source code of {}".format(path[rootlen:])
meta_description = ''
onpage_description = None
try:
content = open(path + '.info').read().split('\n')
good = 1
except:
pass
if good:
title = content[0]
try:
sep = content.index('.')
except ValueError:
sep = None
if sep is not None:
good = 2
meta_description = '\n'.join(content[1:sep])
onpage_description = '\n'.join(content[sep+1:])
else:
meta_description = '\n'.join(content[1:])
if onpage_description is None:
onpage_description = htmlescape.escape('<p>{}</p>',1,meta_description)
return good, title, meta_description, onpage_description
def sitemap(sitemap_type):
'''
Write out an XML or HTML sitemap.
sitemap_type in ('xml', 'html')
The XML sitemap will exclude entries from `conf['noxmlsitemap']`.
'''
if os.getenv('REQUEST_METHOD') != 'HEAD': # NOTICE
# Prevent over-revving the server.
# HEAD requests are basically no-ops.
maxload = conf['sitemap-maxload']
if os.getloadavg()[0] > maxload['load-avg1']:
status503()
return
try:
access_times = list(map(
float, open('read.throttlecontrol').read().strip().split(':')
))
except:
access_times = [0]
if time.time() - access_times[-1] < maxload['throttle-time']:
status503()
return
access_times.insert(0, time.time())
access_times = access_times[:maxload['throttle-requests']]
f = open('read.throttlecontrol', 'w')
f.write(':'.join(list(map(str, access_times))) + '\n')
f.close()
# Write headers before doing anything else.
# A HEAD request doesn't need to know the length (it's TE chunked).
if sitemap_type == 'xml':
compressout.write_h('Content-Type: application/xml; charset=UTF-8\n')
compressout.write_h(
'Link: <{my_url}?sitemap=html>'.format(my_url=canonical_url) +
'; rel="canonical"' +
'; type="text/html"\n'
)
compressout.write_h('X-Robots-Tag: noindex\n\n') # NOTE: last.
elif sitemap_type == 'html':
compressout.write_h(html_page)
compressout.write_h('\n')
else:
assert False, "Neither 'xml' nor 'html'"
if os.getenv('REQUEST_METHOD') == 'HEAD': # NOTICE
return
# Find the pages worth being in the sitemap.
no_access = conf['noaccess'] + conf['hide'] + conf['topsecret']
paths = []
for basedir, dirs, files in os.walk(root, topdown=True):
# Exclude hidden directories:
remove_list = []
sys.stderr.write('In {}\n'.format(basedir))
sys.stderr.write('Dirs: {}\n'.format(repr(dirs)))
for dirname in dirs:
dirpath = os.path.join(basedir, dirname)[rootlen:]
for regex in no_access:
if re.match(regex, dirpath) is not None:
#dirs.remove(dirname)
# BUG: The for loop will skip items in the list if
# other items are removed while looping.
# This caused some real' nasty stuff like sshin to
# be crawled, took a whopping .65 seconds.
remove_list.append(dirname)
break
sys.stderr.write('Removed dirs: {}\n'.format(repr(remove_list)))
for dirname in remove_list:
dirs.remove(dirname)
# Iterate over files:
for filename in files:
filepath = os.path.join(basedir, filename)
# No symlinks allowed.
#if os.stat(filepath).st_mode == os.lstat(filepath).st_mode:
if not os.path.islink(filepath):
#try:
description = mk_description(filepath)
if description[0]:
# Only indexable content allowed.
if not noindex(filepath):
paths.append((filepath[rootlen:], description[3]))
else:
sys.stderr.write('{} is noindexed\n'.format(filepath))
else:
sys.stderr.write('{} has no description\n'.format(filepath))
#except IOError as error:
#assert error.errno in (
#errno.EISDIR, errno.EACCES
#), error.errno
else:
sys.stderr.write('{} is link\n'.format(filepath))
paths.sort(key=lambda x: x[0])
# Print the body.
if sitemap_type == 'xml':
compressout.write_b('''<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
''')
#
for path, description in paths:
# Loop through all the regexes:
for regex in conf['noxmlsitemap']:
if re.match(regex, path) is not None:
break
else:
compressout.write_b(htmlescape.escape('''<url>
<loc>{canonical_url}?path={path}</loc>
<priority>0.5</priority>
''',
canonical_url=(0, canonical_url),
path=(1, path),
))
mod_sitemap.lastmod_changefreq(
root + path,
compressout,
)
compressout.write_b('</url>\n')
#
compressout.write_b('</urlset>\n')
elif sitemap_type == 'html':
compressout.write_b('''<!DOCTYPE html>
<html lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>
<link rel="icon" type="image/png" href="/favicon.png"/>
<!-- End html5nc macro. -->
<link rel="canonical" href="{canonical_url}?sitemap=html"/>
<link rel="alternate" href="{canonical_url}?sitemap=xml"
type="application/xml"/>
<meta name="robots" content="noindex, follow"/>
<title>Sitemap for scripts' source code</title>
<meta name="description" content="
Sitemap of all scripts available through /read/.
"/>
</head>
<body>
<!-- BEGIN autogenerated navigation -->
<nav><div id="navigation"><div id="nav_inner">
<p><a href="#content" class="textonly">Skip navigation</a></p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>
<span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-> My IBM thinkpad</a><span class="textonly" translate="no">]</span>
</p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="head" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>
>>
<span class="textonly" translate="no">]</span><span class="sub active">Website's scripts</span><span class="textonly" translate="no">[</span>
</p>
<p class="row">
<span class="textonly" translate="no">[</span><a class="sub" href="/sitemap.py">Sitemap</a><span class="textonly" translate="no">]</span>
</p>
<hr class="textonly"/>
</div></div></nav>
<!-- END autogenerated navigation -->
<main><div id="content" class="sitemap">
<h1 id="title">Sitemap for scripts' source code</h1>
<p><a href="{my_url}?path=/">Root directory</a></p>
<dl>
'''.format(my_url=my_url, canonical_url=canonical_url))
#
indent = 16 * ' '
for path, description in paths:
compressout.write_b(indent + htmlescape.escape(
'''<dt><a translate="no" href="{my_url}?path={path}">
{path}
</a></dt>\n''',
path=(0, path),
my_url=(0, canonical_url),
))
compressout.write_b(indent +
htmlescape.escape('<dd>{}</dd>\n', 0, description)
)
#
compressout.write_b(''' </dl>
</div></main>
<!-- INCLUDED FOOTER -->
<footer><div id="footer">
<hr class="textonly"/>
<p>
Copyright © Oskar Skog<br/>
Website content released under the <a
href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"
target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license
and my software usually under the <span class="a"><a target="_blank"
rel="noopener"
href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license
(2-clause)</a>.</span>
<br/>
Images may be from other sites, I should have cited useful sources
somewhere on the page.
<span class="notprint">Contact me if I haven't.</span>
</p>
<p id="contact" class="notprint">
You can contact me at: <a href="mailto:oskar@oskog97.com"
rel="noopener" target="_blank">oskar@oskog97.com</a>
<span class="a">(<a href="/pgp-pub/oskar.asc"
>PGP public key</a>)</span>
</p>
<p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">
CSS Stylesheet
</a>
<a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"
target="_blank" class="notprint"><span
class="img">Valid HTML5</span
></a><br/>
</p>
</div></footer>
<!-- END OF INCLUDED FOOTER -->
</body>
</html>
''')
else:
assert False, "Neither 'xml' nor 'html'"
def ls(path, referer):
'''
'''
compressout.write_h(html_page)
compressout.write_h('\n')
if os.getenv('REQUEST_METHOD') == 'HEAD':
return
compressout.write_b('''<!DOCTYPE html>
<html lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>
<link rel="icon" type="image/png" href="/favicon.png"/>
<!-- End html5nc macro. -->
''')
compressout.write_b(htmlescape.escape('''
<link rel="stylesheet" type="text/css" href="/read/style.css"/>
<title>Index of {name}</title>
<meta name="robots" content="{robots_follow}, noindex"/>
<link rel="canonical" href="{canonical_url}?path={name}"/>
</head>
<body>
{navigation}
<main><div id="content" class="ls">
<h1 id="title">Index of <span translate="no">{name}</span></h1>
<p class="read-nav">
{isroot_commentout_start}
<a href="{my_url}?path={parent_path}{referer_params}">
Parent directory
</a>
{isroot_commentout_end}
<a href="{my_url}?sitemap=html">CGIread sitemap</a>
<a href="{my_url}">Main page</a>
</p>
<table id="ls">
''',
name =(1, path[rootlen:] + '/'),
parent_path =(2, '/'.join(path.split('/')[:-1])[rootlen:]+'/'),
robots_follow =(2, 'no'*noindex(path)+'follow'),
navigation =(0, mk_navigation(
referer,
"Index of "+path[rootlen:]+'/'
)),
referer_params=(2, mk_referer_param(referer)),
my_url=(0, my_url),
canonical_url=(0, canonical_url),
isroot_commentout_start=(0, '<!--'*(path == root)),
isroot_commentout_end=(0, '-->'*(path == root)),
))
no_access = conf['noaccess'] + conf['hide'] + conf['topsecret']
for x in sorted(os.listdir(path)):
full_path = os.path.join(path, x)
forbidden = False
for regex in no_access:
if re.match(regex, full_path[rootlen:]) is not None:
forbidden = True
break
if forbidden:
continue
#url = cgi.escape(full_path, quote=True)
try:
os.listdir(full_path)
is_dir = 1
except:
is_dir = 0
# mobile_desc
# desktop_desc
if is_dir:
mobile_desc = '<span class="yeah">-></span>'
desktop_desc = '<span class="yeah">Directory</span>'
else:
try:
content = open(full_path).read() # This fails on Python 3 !!!
if sys.version_info[0] == 2:
content.decode('UTF-8')
binary = False
except:
binary = True
if binary:
desktop_desc = 'Binary'
mobile_desc = ':-('
else:
good, title, meta_d, onpage_d = mk_description(full_path)
if good == 2:
desktop_desc = htmlescape.escape(
'<span class="thenumberofthebeast">{}</span>',
1, meta_d
)
if noindex(full_path):
mobile_desc = '<span class="yeah">:-)</span>'
else:
mobile_desc = '<span class="thenumberofthebeast">:-D</span>'
elif not noindex(full_path):
mobile_desc = '<span class="yeah">:-)</span>'
if compressout.debug_cookie:
desktop_desc = '<span class="yeah">Text; indexable</span>'
else:
desktop_desc = '<span class="yeah">Text</span>'
else:
mobile_desc = ':-|'
if compressout.debug_cookie:
desktop_desc = 'Boring; unindexable'
else:
desktop_desc = 'Looks boring'
compressout.write_b(
htmlescape.escape(
'''<tr><td class="mobile">{mobile_desc}</td>
<td><a translate="no"
href="{site}?path={path}{referer}">{text}</a></td>
<td class="desktop">{desktop_desc}</td></tr>
''',
site=(0, my_url),
path=(2, full_path[rootlen:] + '/'*is_dir),
referer=(2, mk_referer_param(referer)),
text=(1, x + '/'*is_dir),
mobile_desc=(0, mobile_desc),
desktop_desc=(0, desktop_desc),
)
)
compressout.write_b(''' <!--</p>--></table>
</div></main>
<!-- INCLUDED FOOTER -->
<footer><div id="footer">
<hr class="textonly"/>
<p>
Copyright © Oskar Skog<br/>
Website content released under the <a
href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"
target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license
and my software usually under the <span class="a"><a target="_blank"
rel="noopener"
href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license
(2-clause)</a>.</span>
<br/>
Images may be from other sites, I should have cited useful sources
somewhere on the page.
<span class="notprint">Contact me if I haven't.</span>
</p>
<p id="contact" class="notprint">
You can contact me at: <a href="mailto:oskar@oskog97.com"
rel="noopener" target="_blank">oskar@oskog97.com</a>
<span class="a">(<a href="/pgp-pub/oskar.asc"
>PGP public key</a>)</span>
</p>
<p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">
CSS Stylesheet
</a>
<a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"
target="_blank" class="notprint"><span
class="img">Valid HTML5</span
></a><br/>
</p>
</div></footer>
<!-- END OF INCLUDED FOOTER -->
</body>
</html>\n''')
def download(path):
if noindex(path):
compressout.write_h('X-Robots-Tag: noindex\n')
else:
compressout.write_h('X-Robots-Tag: index\n') # For verbosity.
try:
content = open(path).read()
if sys.version_info[0] == 2:
content.decode('utf-8')
compressout.write_h('Content-Type: text/plain; charset=UTF-8\n')
compressout.write_h(htmlescape.escape(
'Link: <{}?path={}>',
0, canonical_url,
2, path[rootlen:]
) + '; rel="canonical"; type="text/html"\n'
)
except:
compressout.write_h(htmlescape.escape(
'Link: <{}?path={}>; rel="canonical"\n',
0, canonical_url,
2, path[rootlen:]
)) # No type specified.
if if_none_match(path):
compressout.write_h('\n')
if os.getenv('REQUEST_METHOD') != 'HEAD':
compressout.write_b(content)
def cat(path, referer):
'''
'''
def ol_content(text):
out_lines = []
ids = []
allowed_chars = string.ascii_letters + '_-'
for index, line in enumerate(text.split('\n')):
# Create a "permanent" fragment this line.
this_id = ''
# Find ids in Python and XHTML
for decltype in ('def', 'class'):
if line.strip().startswith(decltype + ' ') and '(' in line:
this_id = line.split(decltype, 1)[1].split('(')[0].strip()
if 'id="' in line:
this_id = line.split('id="')[1].split('"')[0]
# Prevent bad ids.
for ch in this_id:
if ch not in allowed_chars:
this_id = ''
break
if this_id in ids:
this_id = ''
# Create the fragment identifier for the line.
if this_id:
ids.append(this_id)
idline = 'id="content_{}"'.format(this_id)
else:
idline = ''
# Create line
out_lines.append(htmlescape.escape(
' <li id="{}"><pre translate="no" {}>{}</pre></li>\n',
0, index + 1,
0, idline,
1, line,
))
fragment_links = []
for fragment in sorted(ids):
fragment_links.append(
(
'<a class="quick" href="#content_{0}" translate="no"' +
'>{0}</a>\n'
).format(
fragment
)
)
return ''.join(out_lines), ''.join(fragment_links)
try:
content = open(path).read()
if sys.version_info[0] == 2:
content.decode('utf-8')
except:
if noindex(path):
compressout.write_h('X-Robots-Tag: noindex\n')
else:
compressout.write_h('X-Robots-Tag: index\n')
compressout.write_h('\n')
compressout.write_b(content)
return
compressout.write_h(html_page)
compressout.write_h('\n')
if os.getenv('REQUEST_METHOD') == 'HEAD':
return
ignore, title, meta_description, p_description = mk_description(path)
last_modified = time.strftime('%F', time.gmtime(os.stat(path).st_mtime))
lines, fragment_links = ol_content(content)
if not fragment_links:
fragment_links = '(none)'
compressout.write_b('''<!DOCTYPE html>
<html lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>
<link rel="icon" type="image/png" href="/favicon.png"/>
<!-- End html5nc macro. -->
''')
compressout.write_b('''
<script type="application/ld+json">
{
"@context":
{
"@vocab": "http://schema.org/"
},
"@type": "SoftwareSourceCode",
"license": "https://opensource.org/licenses/BSD-2-Clause",
"author":
{
''')
compressout.write_b('''
"@type": "Person",
"@id": "https://oskog97.com/",
"name": "{0}",
"url": "https://oskog97.com/"
'''.format(owner))
compressout.write_b('''
},
"publisher": {"@id": "https://oskog97.com/"},
"copyrightHolder": {"@id": "https://oskog97.com/"},
''')
compressout.write_b('''
"url": "{}#code",
"DateModified": "{}"
'''.format(
canonical_url + '?path=' + path[rootlen:],
last_modified,
))
compressout.write_b('''
}
</script>
''')
parent_link = '/'.join(path.split('/')[:-1])[rootlen:]+'/'
compressout.write_b(htmlescape.escape('''
<link rel="stylesheet" type="text/css" href="/read/style.css"/>
<title>{title}</title>
<link rel="canonical" href="{canonical}"/>
<link
rel="alternate"
href="{canonical}&download=yes"
type="text/plain"
/>
<meta name="robots" content="{noindex_no}index"/>
<meta name="description" content="{meta_description}"/>
</head>
<body>
{navigation}
<main><div id="content">
<h1 id="title" translate="no">{title}</h1>
<div id="description">
{content_description}
</div>
<table>
<tr>
<td>Last modified</td>
<td><time datetime="{last_modified}">{last_modified}</time></td>
</tr>
<tr>
<td>Lines</td>
<td>{linecount}</td>
</tr>
{begin_debug}<tr>
<td>Indexable</td>
<td>{indexable}</td>
</tr>{end_debug}
</table>
<p class="notprint read-nav">
<a href="{my_url}?path={parent_dir}">Parent directory</a>
<a href="{my_url}?path={path}&download=yes" target="_blank">Download</a>
<a href="{my_url}?sitemap=html">CGIread sitemap</a>
<a href="{my_url}">Main page</a>
</p>
<p class="notprint">
Quick links:\n{fragments}
</p>
<ol id="code">
{content}
</ol>
</div></main>
''',
title=(2, title),
content=(0, lines),
parent_dir=(2, parent_link + mk_referer_param(referer)),
navigation=(0, mk_navigation(referer, path[rootlen:])),
canonical=(2, canonical_url + '?path=' + path[rootlen:]),
path=(2, path[rootlen:]),
noindex_no=(2, 'no' * noindex(path)),
meta_description=(2, meta_description),
content_description=(0, p_description),
last_modified=(2, last_modified),
linecount=(1, content.count('\n') + 1),
indexable=(0, {True: 'No', False: 'Yes'}[noindex(path)]),
fragments=(0, fragment_links),
my_url=(0, my_url),
begin_debug=(0,['<!--',''][compressout.debug_cookie]),
end_debug=(0,['-->',''][compressout.debug_cookie]),
))
compressout.write_b('''
<!-- INCLUDED FOOTER -->
<footer><div id="footer">
<hr class="textonly"/>
<p>
Copyright © Oskar Skog<br/>
Website content released under the <a
href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"
target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license
and my software usually under the <span class="a"><a target="_blank"
rel="noopener"
href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license
(2-clause)</a>.</span>
<br/>
Images may be from other sites, I should have cited useful sources
somewhere on the page.
<span class="notprint">Contact me if I haven't.</span>
</p>
<p id="contact" class="notprint">
You can contact me at: <a href="mailto:oskar@oskog97.com"
rel="noopener" target="_blank">oskar@oskog97.com</a>
<span class="a">(<a href="/pgp-pub/oskar.asc"
>PGP public key</a>)</span>
</p>
<p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">
CSS Stylesheet
</a>
<a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"
target="_blank" class="notprint"><span
class="img">Valid HTML5</span
></a><br/>
</p>
</div></footer>
<!-- END OF INCLUDED FOOTER -->
</body>
</html>
''')
def if_none_match(path):
'''
ETag handling for `cat`, `ls` and `download`:
Returns `True` if content needs to be generated.
Outputs necessary headers and 304 statuses.
'''
try:
meta_time = os.stat(path + '.info').st_mtime
except:
meta_time = 0
if sys.version_info[0] > 2:
query_string = os.getenv('QUERY_STRING', '').encode('utf-8')
else:
query_string = os.getenv('QUERY_STRING', '')
ETag = '"{}{}-{}({})-{}-({}-{})"'.format(
'x'*('application/xhtml+xml' in html_page),
'z'*('gzip' in os.getenv('HTTP_ACCEPT_ENCODING', '')),
os.stat(path).st_mtime,
meta_time,
base64.b64encode(query_string),
os.stat('index.py').st_mtime,
os.stat('read.cfg').st_mtime,
)
compressout.write_h('Vary: If-None-Match\n')
compressout.write_h('ETag: {}\n'.format(ETag))
compressout.write_h(
'''X-ETag-Synopsis: [x][z]-<f_time>(<m_time>)-<query>-(<s_time>-<c_time>)
X-ETag-Description-x: "Client accepts application/xhtml+xml"
X-ETag-Description-z: "Content-Encoding: gzip"
X-ETag-Description-f_time: "Unix last modified time for the requested file"
X-ETag-Description-m_time: "Unix last modified time for the file's metadata"
X-ETag-Description-query: "base64 encoded $QUERY_STRING"
X-ETag-Description-s_time: "Unix last modified time for '/read/index.py'"
X-ETag-Description-c_time: "Unix last modified time for '/read/read.cfg'"
''')
if os.getenv('HTTP_IF_NONE_MATCH', '') == ETag:
compressout.write_h('Status: 304\n\n')
return False
else:
return True
def is_injection_attempt(path_param, referer_URI, referer_title):
'''
Various checks to see if any form of injection attempt has been
made. This function checks the `path`, `referer` and `title`
parameters.
Returns True if the request is an injection attempt.
- XSS
- URL injection
- Spam injection
- Restricted files access
'''
# If the path parameter contains an XSS attempt, it can't be corrected
evil = False
# Prevent attacks.
if '..' in path_param:
return True
for var in referer_URI, referer_title:
for ch in var:
if ord(ch) < 32:
return True
if ch in '<>&\'"':
return True
# NOTICE: The following will limit parameters to ASCII.
if ord(ch) > 126:
return True
# Prevent linking to Mallory.
for start in ('http://', 'https://', '//', 'ftp://'):
if referer_URI.startswith(start):
hostname = referer_URI.split('//')[1].split('/')[0]
if hostname not in conf['allowed-referer-hosts']:
return True
else:
break
else:
if ':' in referer_URI:
return True
# Prevent injected spam
if spammy.spammy(referer_title) or len(referer_title) > 42:
return True
# No match.
return False
def handle_injection_attempt(path_param, referer_URI, referer_title):
'''
Decide if the injection attempt was due to innocently following
a malicious link or due to creating one.
'''
# Check if the URL can be sanitized.
if is_injection_attempt(path_param, '', ''):
destination = 'https://en.wikipedia.org/wiki/Data_validation'
else:
destination = my_url + '?path=' + path_param
redirect_spam(destination)
def main():
'''
`compressout.init` MUST be called before `main`
and `compressout.done` after.
'''
# HTML vs XHTML
global html_page
html_page = 'Vary: Accept\n'
if 'application/xhtml+xml' in os.getenv('HTTP_ACCEPT', ''):
html_page += 'Content-Type: application/xhtml+xml; charset=UTF-8\n'
else:
html_page += 'Content-Type: text/html; charset=UTF-8\n'
# Check that the method is either GET, HEAD or OPTIONS.
if os.getenv('REQUEST_METHOD') not in ('GET', 'HEAD'):
if os.getenv('REQUEST_METHOD') != 'OPTIONS':
compressout.write_h('Status: 405\n')
compressout.write_h('Allow: GET, HEAD, OPTIONS\n')
compressout.write_h('Content-Type: text/plain\n')
compressout.write_h('\n')
if os.getenv('REQUEST_METHOD') != 'OPTIONS':
compressout.write_b('Method not allowed!\n')
compressout.write_b('Allowed methods: GET, HEAD, OPTIONS\n')
return
# Get the parameters.
params = cgi.FieldStorage()
path = path_param = params.getfirst('path', default='')
referer_URI = params.getfirst('referer', default='')
referer_title = params.getfirst('title', default='Back')
referer = (referer_URI, referer_title)
download_flag = params.getfirst('download', default='no')
sitemap_param = params.getfirst('sitemap', default='none')
if not os.getenv('QUERY_STRING'):
index_page()
return
# Bad request, but will match the evil patterns.
# Keep it before the evil stopper.
if bool(path_param) and not path_param.startswith('/'):
status400('`path` is not relative to this site. (No leading slash.)')
return
# Do not allow evil requests.
allow = True
# Keep things within the server root.
try:
path = os.path.realpath(root + path)
except:
allow = False
if path != root and not path.startswith(root + '/'):
allow = False
# Stop at forbidden paths. #1/2
for regex in conf['noaccess']:
if re.match(regex, path[rootlen:]) is not None:
allow = False
# Prevent XSS, URL injection, spam injection and miscellaneous assholery.
if is_injection_attempt(path_param, referer_URI, referer_title):
allow = False
if not allow:
handle_injection_attempt(path_param, referer_URI, referer_title)
return
# Bad requests:
if download_flag not in ('yes', 'no'):
status400('`download` MUST be "yes", "no" or unset.')
return
if bool(path_param) and sitemap_param != 'none':
status400('The `sitemap` parameter cannot be used with any other.')
return
if download_flag == 'yes' and bool(referer_URI):
status400("`download=yes` can't be used with the `referer` parameter.")
return
if sitemap_param not in ('none', 'xml', 'html'):
status400('`sitemap` MUST be "html", "xml" or unset.')
return
if download_flag == 'yes' and not bool(path_param):
status400('Nothing to `download`. Use the `path` parameter.')
return
if bool(referer_URI) and not bool(path_param):
status400('`referer` cannot be used without `path`')
return
if referer_title != 'Back' and not bool(referer_URI):
status400('`referer` is not set.')
return
if allow:
# Generate sitemap?
if sitemap_param != 'none':
sitemap(sitemap_param)
else:
# Stop at forbidden paths. #2/2
for regex in conf['topsecret']:
if re.match(regex, path[rootlen:]) is not None:
status404()
break
else:
# Allowed to be seen.
try:
os.listdir(path)
if download_flag == 'no':
if if_none_match(path):
ls(path, referer)
else:
status400("Can't download a directory.")
except OSError as e:
if e.errno == errno.ENOTDIR:
if download_flag == 'no':
if if_none_match(path):
cat(path, referer)
else:
# `download` sets a few headers.
download(path)
elif e.errno == errno.ENOENT:
status404()
else:
raise ValueError(
'errno must be either ENOTDIR or ENOENT'
)
if __name__ == '__main__':
compressout.init()
main()
compressout.done()