#!/usr/bin/env python
#
# Tilmann Haak <spam@thaak.de>
# 2011-12-04
# 2011-12-16 Allan Wegan <allanwegan@allanwegan.de>:
#   - Retries on network (or wiki) error.
# 2012-12-25 Allan Wegan <allanwegan@allanwegan.de>:
#   - Updated to Python 3.
#   - Now retrieves pages using less restrictive interface.
#   - Replaces "/" in file names with " - " instead of "%2f".
#
# - Download all pages from larpwiki.de
# - Write each page into a text file, e.g. "LarpWiki.txt"

import sys
import xmlrpc.client
import time
import re
import os.path
import urllib.parse
import urllib.request

netRetryM = 0x7FFFFFFF # How often to retry each request before giving up.
netRetryD = 1.0 # Seconds to wait initially before retrying a failed request.
netRetryDFun = lambda oldDelay: oldDelay * 1.5

# Retries a given function at most retriesMax times after failing:
def retryOnError(fun, retriesMax, retryDelay, retryDelayFun):
  while True:
    try:
      return fun()
    except:
      if retriesMax < 1: raise
      print("Error:", sys.exc_info())
      print("Waiting %f seconds before retrying (Retries left: %i)..." % (
        retryDelay, retriesMax
      ))
      time.sleep(retryDelay)
      retriesMax -= 1
      retryDelay = retryDelayFun(retryDelay)
      continue

# Stores text in a file:
def writeFile(path, content):
  fd = open(path, 'w')
  fd.write(content)
  fd.close()

# Get sorted page index:
wiki = xmlrpc.client.ServerProxy("http://www.larpwiki.de/?action=xmlrpc2")
pagenames = retryOnError(
  lambda: sorted(wiki.getAllPages()), netRetryM, netRetryD, netRetryDFun
)

# Store pages in current dir:
count = 0
slashRegExp = re.compile(r"/")
for pagename in pagenames:
  count += 1
  filename = slashRegExp.sub(' - ', pagename) + '.txt'
  if (os.path.isfile(filename)):
    print(r'%i/%i "%s" already exists.' % (count, len(pagenames), filename))
  else:
    print(r'%i/%i fetching "%s".' % (count, len(pagenames), filename))
    url = r"https://larpwiki.de/" + urllib.parse.quote(pagename) + r"?action=raw"
    response = retryOnError(
      lambda: urllib.request.urlopen(url), 
      netRetryM, netRetryD, netRetryDFun
    )
    text = response.read().decode("utf-8", "replace")
    writeFile(filename, text)
    time.sleep(0.1) # don't hammer on the wiki!
