Fetchwilma (defunct)

This script allowed me to

  • stay logged in,
  • receive messages and
  • check my homework sorted it per deadline rather than the time I got it
on some piece of crap called Wilma.

Unfortunately, it no longer works because the idiots at Starsoft kept making pointless cosmetic changes. One of them was just too much for me. (Starsoft has since been bought by Visma.)

Last modified
Lines 440
Indexable Yes

Parent directory Download CGIread sitemap Main page

Quick links: GET POST configure de_html defuck download fetch login main

  1. #!/usr/bin/python
  2. #Copyright (c) 2014-2015, Oskar Skog <oskar.skog.finland@gmail.com>
  3. #All rights reserved.
  4. #
  5. #Redistribution and use in source and binary forms, with or without
  6. #modification, are permitted provided that the following conditions are met:
  7. #
  8. #1.  Redistributions of source code must retain the above copyright notice,
  9. #    this list of conditions and the following disclaimer.
  10. #
  11. #2.  Redistributions in binary form must reproduce the above copyright notice,
  12. #    this list of conditions and the following disclaimer in the documentation
  13. #    and/or other materials provided with the distribution.
  14. #
  15. #THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. #AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. #IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. #ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  19. #LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20. #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21. #SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22. #INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. #CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24. #ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25. #POSSIBILITY OF SUCH DAMAGE.
  26. import sys
  27. import time
  28. import os
  29. import string
  30. import pprint
  31. if sys.version_info[0] == 2:
  32.     import httplib as httpmod
  33. else:
  34.     import http.client as httpmod
  35. def download(method, url, body=None):
  36.     '''status, headers, obody = download(method, url, ibody)
  37.     status is an integer
  38.     headers is a dictionary.
  39.     Cookies are automagically handled.
  40.     body is a string.
  41.     
  42.     This function is used internally by GET and POST.
  43.     '''
  44.     # Wilma returns lots of 'Connection: close', so I just make new
  45.     # connections all the time.
  46.     global cookies
  47.     
  48.     #print(body)
  49.     
  50.     #connection = httpmod.HTTPSConnection(conf['Host'], conf['port'])
  51.     headers = {}
  52.     for header in ('User-Agent', 'Evil', 'DNT'):
  53.         if conf[header] is not None:
  54.             headers[header] = conf[header]
  55.     
  56.     if cookies:
  57.         tmp = []
  58.         for key in cookies:
  59.             tmp.append('{}={}'.format(key, cookies[key]))
  60.         headers['Cookie'] = '; '.join(tmp)
  61.     
  62.     headers['Connection'] = 'keep-alive'
  63.     if method == 'POST':
  64.         headers['Content-Type'] = 'application/x-www-form-urlencoded'
  65.     
  66.     connection.request(method, url, body, headers)
  67.     response = connection.getresponse()
  68.     #connection.close()
  69.     
  70.     response_headers = {}
  71.     tuple_headers = response.getheaders()
  72.     for key, value in tuple_headers:
  73.         if key == 'Set-Cookie':
  74.             cookie_key, cookie_value = value.split(';')[0].split('=')
  75.             cookies[cookie_key] = cookie_value
  76.         else:
  77.             response_headers[key] = value
  78.     
  79.     #return response.status, response_headers, response.read().decode('iso-8859-1')
  80.     return response.status, response_headers, response.read().decode('utf-8')
  81. def GET(url):
  82.     '''status, headers, body = GET(url)
  83.     status is an integer
  84.     headers is a dictionary.
  85.     Cookies are automagically handled.
  86.     '''
  87.     a, b, c = download('GET', url)
  88.     return a, b, c
  89. def POST(url, form):
  90.     '''status, headers, body = POST(url, form)
  91.     status is an integer
  92.     form is a dictionary.
  93.     headers is a dictionary.
  94.     Cookies are automagically handled.
  95.     '''
  96.     body = []
  97.     tmp = ''
  98.     for key in form:
  99.         tmp, value = key+'=', form[key]
  100.         for ch in value:
  101.             if ch == ' ':
  102.                 tmp += '+'
  103.             elif ch in (string.ascii_letters + string.digits + '.-_'):
  104.                 tmp += ch
  105.             else:
  106.                 tmp += '%' + hex(ord(ch))[2:]
  107.         body.append(tmp)
  108.     a, b, c = download('POST', url, '&'.join(body))
  109.     return a, b, c
  110.     # TypeError: 'NoneType' object is not iterable
  111.     # Why? I forgot the return statement.
  112. def configure():
  113.     global conf
  114.     assert len(sys.argv) > 1
  115.     content = {}
  116.     f = open(sys.argv[1])
  117.     for line in f:
  118.         # If '#' is not found, the newline will be cut.
  119.         line = line[:line.find('#')]
  120.         if ':' in line:
  121.             key, value = map(lambda x: x.strip(), line.strip().split(':', 1))
  122.             content[key] = value
  123.     f.close()
  124.     # Fetch simple data.
  125.     strings = [
  126.         'User-Agent',
  127.         'Host',
  128.         'mesg_file',
  129.         'add_homework',
  130.         'mesg_dir',
  131.         'IDfile',
  132.         'logfile',
  133.         #'username',
  134.         #'password',
  135.         #'cookie',
  136.     ]
  137.     flags = [
  138.         'DNT',
  139.         'Evil',
  140.     ]
  141.     lists = [
  142.         'time',
  143.     ]
  144.     ints = [
  145.         'port',
  146.         'refresh',
  147.         'failsleep',
  148.         'tries',
  149.     ]
  150.     for s in strings:
  151.         if s in content:
  152.             conf[s] = content[s]
  153.     for f in flags:
  154.         if f in content:
  155.             if content[f] in ('True', 'true', 'Yes', 'yes', 'y', 'Y'):
  156.                 conf[f] = '1'
  157.             elif content[f] in ('False', 'false', 'No', 'no', 'n', 'N'):
  158.                 conf[f] = '0'
  159.             else:
  160.                 conf[f] = str(int(content[f]))
  161.     for l in lists:
  162.         if l in content:
  163.             conf[l] = list(filter(lambda x: x != '',
  164.                         map(lambda s: s.split(' '),
  165.                             content[l].split('\t'))))
  166.     for i in ints:
  167.         if i in content:
  168.             conf[i] = int(content[i])
  169.     # Fetch the class table.
  170.     classnum = 0
  171.     classes = []
  172.     class_ids = []
  173.     while str(classnum) in content:
  174.         classes.append(list(filter(lambda x: x != '',
  175.                         map(lambda s: s.split(' '),
  176.                             content[str(classnum)].split('\t')))))
  177.         classnum += 1
  178.     for y in classes:
  179.         for x in y:
  180.             if x not in class_ids:
  181.                 class_ids.append(x)
  182.     conf['classes'] = zip(*classes)
  183.     conf['class_ids'] = class_ids
  184.     
  185.     conf['mesg_dir'] = conf['mesg_dir'].rstrip('/') + '/'
  186. def login():
  187.     '''
  188.     DOES NOT WORK!
  189.     StarSoft OY
  190.     *   There are browsers that doesn't understand redirections
  191.     *   There are browsers that doesn't understand cookies
  192.     *   All browsers support JavaScript
  193.     '''
  194.     status, headers, body = GET('/')
  195.     assert status == 200
  196.     right_line = filter(lambda x: 'SESSIONID' in x, body.split('\n'))[0]
  197.     session_id = right_line.split('value="')[1].split('"')[0]
  198.     form_data = {
  199.         'Login': conf['username'],
  200.         'Password': conf['password'],
  201.         'submit': 'Logga in',           # ###
  202.         'SESSIONID': session_id,
  203.     }
  204.     print(session_id)
  205.     status, headers, body = POST('/login', form_data)
  206.     print(status)
  207.     print(headers)
  208.     print(body)
  209.     assert status == 303 and headers['Location'].endswith('/?checkcookie')
  210.     status, headers, body = GET('/?checkcookie')
  211.     assert status == 302
  212.     status, headers, body = GET('/')
  213.     assert status == 200
  214.     # Session ID is stored in a cookie.
  215. def de_html(line):
  216.     tmp = ''
  217.     write = True
  218.     for c in line:
  219.         if write and c != '<':
  220.             tmp += c
  221.         elif write and c == '<':
  222.             write = False
  223.         elif not write and c != '>':
  224.             pass
  225.         elif not write and c == '>':
  226.             write = True
  227.         else:
  228.             assert False, 'This is impossible'
  229.     return tmp
  230. def defuck(text):
  231.     '''
  232.     * M$ line endings, yuck!
  233.     * Empty lines...
  234.     * HiroshimaScript, yuck!
  235.     * Stylesheets...
  236.     * Links, dead!
  237.     '''
  238.     out = []
  239.     master_read = False
  240.     read = True
  241.     title=None
  242.     for line in text.replace(u'\r\n', u'\n').split(u'\n'):
  243.         if '<title>' in line:
  244.             title = line
  245.         if line == '<!-- Sivukohtainen alue loppuu -->':
  246.             out.append('</body></html>\n')
  247.             break
  248.         if line == '<!-- Sivukohtainen alue alkaa -->':
  249.             master_read = True
  250.             out.append('<html><head>')
  251.             if line is not None:
  252.                 out.append(title)
  253.             out.append('</head><body>')
  254.             continue
  255.         if master_read and line != '':
  256.             if not ('<script ' in line and '</script>' in line):
  257.                 if '<script ' in line:
  258.                     read = False
  259.                 if '</script>' in line:
  260.                     read = True
  261.                     continue
  262.                 if read and '<div ' not in line and '</div>' not in line:
  263.                     if '<input ' not in line:
  264.                         if '<a href="/messages/' not in line:
  265.                             out.append(line)
  266.     return '\n'.join(out)
  267. def fetch():
  268.     '''
  269.     '''
  270.     status, headers, body = GET('/')
  271.     assert status == 200
  272.     # grep 'Hemuppgifter'
  273.     # <><>ENA2.2<><><>Hemuppgifter: foo<><><>
  274.     # <><><>ENA2.2<><><>Hemuppgifter: foo<><><><><><><><><><><><><><>
  275.     # s.split('>')[2].split('<')[0]
  276.     # s.split('>')[5].split('<')[0].split(': ', 1)[1]
  277.     # grep meddelande foo | grep -v Skriv
  278.     for line in body.split('\n'):
  279.         if u'Hemuppgifter' in line:
  280.             #colon_sep = line.split(':')
  281.             #value = colon_sep[3].split('<')[0]
  282.             #key = colon_sep[0].split('>')[2].split('<')[0]
  283.             try:
  284.                 key = line.split('>')[2].split('<')[0]
  285.                 value = line.split('>')[5].split('<')[0].split(': ', 1)[1]
  286.             except:
  287.                 key = line.split('>')[3].split('<')[0]
  288.                 value = line.split('>')[6].split('<')[0].split(': ', 1)[1]
  289.             key = key.split(' ')[0] # "MAA8.2 gMAA8.2" all of a sudden
  290.             add_homework = key not in homework
  291.             if not add_homework:
  292.                 add_homework = homework[key] != value
  293.             
  294.             if add_homework:
  295.                 # TODO
  296.                 homework[key] = value
  297.                 #print(u'{}: {}'.format(key, value))
  298.                 os.spawnv(os.P_WAIT,
  299.                     conf['add_homework'],
  300.                     ('', key.encode('utf-8'),
  301.                     value.encode('utf-8')))
  302.     status, headers, body = GET('/messages')
  303.     assert status == 200
  304.     mesg_dict = eval(open(conf['mesg_file']).read())
  305.     for line in body.split('\n'):
  306.         if '/messages/' in line:
  307.             if '/messages/compose/' not in line:
  308.                 if 'href="' in line:
  309.                     url = line.split('href="')[1].split('"')[0]
  310.                 else:
  311.                     url = ''
  312.                 #print(msg_line)
  313.                 if 'data-sortvalue="' in line:
  314.                     msg = line.split('data-sortvalue="')[1].split('"')[0]
  315.                 else:
  316.                     msg = ''
  317.                 #msg = de_html(msg_line)
  318.                 if any([str(x) in url for x in range(10)]):
  319.                     #print(u'{}: {}'.format(msg, url))
  320.                     key = url.split('/')[2]
  321.                     if key not in mesg_dict:
  322.                         status, headers, body = GET(url)
  323.                         f = open(conf['mesg_dir'] + key + '.html', 'w')
  324.                         if status == 200:
  325.                             f.write(defuck(body).encode('utf-8'))
  326.                         else:
  327.                             f.write(u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  328.     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  329. <html xmlns="http://www.w3.org/1999/xhtml" lang="sv" xml:lang="sv">
  330.     <head>
  331.         <title>ERROR {0} writing {1}</title>
  332.         <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
  333.         <meta name="robots" content="noindex,nofollow"/>
  334.     </head>
  335.     <body>
  336.         <h1>ERROR {0} writing {1}</h1>
  337.         <pre>
  338. {2}
  339.         </pre>
  340.     </body>
  341. </html>
  342. '''.format(status, key, pprint.pformat(headers)).encode('utf-8'))
  343.                         f.close()
  344.                         mesg_dict[key] = msg
  345.     f = open(conf['mesg_file'], 'w')
  346.     f.write(pprint.pformat(mesg_dict))
  347.     f.close()
  348.     
  349.     f = open(conf['mesg_dir'] + 'index.html', 'w')
  350.     f.write('''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  351.     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  352. <html xmlns="http://www.w3.org/1999/xhtml" lang="sv" xml:lang="sv">
  353.     <head>
  354.         <title>Wilma meddelanden</title>
  355.         <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
  356.         <meta name="robots" content="noindex,nofollow"/>
  357.     </head>
  358.     <body>
  359.         <h1>Wilma meddelanden</h1>
  360. ''')
  361.     for key in sorted([k for k in mesg_dict],
  362.                         reverse=True, key=lambda x: int(x)):
  363.         f.write(u'''        <a href="{}">{}</a><br/>
  364. '''.format(key + '.html', mesg_dict[key]).encode('utf-8'))
  365.     f.write('''    </body>
  366. </html>
  367. ''')
  368. def main():
  369.     '''
  370.     '''
  371.     global connection
  372.     configure()
  373.     sys.stderr = open(conf['logfile'], 'a')
  374.     #sys.stderr.encoding = 'utf-8'
  375.     #if len(sys.argv) > 2 and conf['cookie'] is not None:
  376.         #cookies['Wilma2SID'] = conf['cookie']
  377.     #else:
  378.         #connection = httpmod.HTTPSConnection(conf['Host'], conf['port'])
  379.         #login()
  380.         #connection.close()
  381.     #cookies['Wilma2SID'] = sys.argv[2]
  382.     cookies['Wilma2SID'] = open(conf['IDfile']).read().strip()
  383.     while True:
  384.         for i in range(conf['tries']):
  385.             try:
  386.                 connection = httpmod.HTTPSConnection(conf['Host'], conf['port'])
  387.             except:
  388.                 sys.stderr.write('Something went wrong\n')
  389.                 sys.stderr.flush()
  390.                 time.sleep(conf['failsleep'])
  391.                 continue
  392.             break
  393.         fetch()
  394.         connection.close()
  395.         time.sleep(conf['refresh'])
  396. # Configuration vars.
  397. conf = {
  398.     'User-Agent':   None, # HTTP User-Agent
  399.     'Host':         None, # str
  400.     'port':         None, # int
  401.     'mesg_file':    None, # File to write message index to.
  402.     'mesg_dir':     None, # Where to download messages to.
  403.     'add_homework': None, # Command to add new homework, %time %class %n %s
  404.     'class_ids':    None, # []
  405.     'classes':      None, # [[]]
  406.     'Evil':         None, # HTTP evil bit
  407.     'DNT':          None, # HTTP Do-Not-Track
  408.     'time':         None, # [Time for class at index]
  409.     'refresh':      None, # 
  410.     'logfile':      None, # 
  411.     'IDfile':       None, # Store session ID in a file
  412.     'tries':        None, # Try this many requests...
  413.     'failsleep':    None, # ...with this much time in between.
  414.     
  415.     #'cookie':       None, # Login problems...
  416. }
  417. homework = {}
  418. #connection = None
  419. cookies = {}    # Contains the session ID.
  420. if __name__ == '__main__':
  421.     main()