Edit Diskussion History Attachments

attachment:lw2txtV3.py of LarpWiki2Text

Attachment 'lw2txtV3.py'

Download

   1 #!/usr/bin/env python
   2 #
   3 # Tilmann Haak <spam@thaak.de>
   4 # 2011-12-04
   5 # 2011-12-16 Allan Wegan <allanwegan@allanwegan.de>:
   6 #   - Retries on network (or wiki) error.
   7 # 2012-12-25 Allan Wegan <allanwegan@allanwegan.de>:
   8 #   - Updated to Python 3.
   9 #   - Now retrieves pages using less restrictive interface.
  10 #   - Replaces "/" in file names with " - " instead of "%2f".
  11 #
  12 # - Download all pages from larpwiki.de
  13 # - Write each page into a text file, e.g. "LarpWiki.txt"
  14 
  15 import sys
  16 import xmlrpc.client
  17 import time
  18 import re
  19 import os.path
  20 import urllib.parse
  21 import urllib.request
  22 
  23 netRetryM = 0x7FFFFFFF # How often to retry each request before giving up.
  24 netRetryD = 1.0 # Seconds to wait initially before retrying a failed request.
  25 netRetryDFun = lambda oldDelay: oldDelay * 1.5
  26 
  27 # Retries a given function at most retriesMax times after failing:
  28 def retryOnError(fun, retriesMax, retryDelay, retryDelayFun):
  29   while True:
  30     try:
  31       return fun()
  32     except:
  33       if retriesMax < 1: raise
  34       print("Error:", sys.exc_info())
  35       print("Waiting %f seconds before retrying (Retries left: %i)..." % (
  36         retryDelay, retriesMax
  37       ))
  38       time.sleep(retryDelay)
  39       retriesMax -= 1
  40       retryDelay = retryDelayFun(retryDelay)
  41       continue
  42 
  43 # Stores text in a file:
  44 def writeFile(path, content):
  45   fd = open(path, 'w')
  46   fd.write(content)
  47   fd.close()
  48 
  49 # Get sorted page index:
  50 wiki = xmlrpc.client.ServerProxy("http://www.larpwiki.de/?action=xmlrpc2")
  51 pagenames = retryOnError(
  52   lambda: sorted(wiki.getAllPages()), netRetryM, netRetryD, netRetryDFun
  53 )
  54 
  55 # Store pages in current dir:
  56 count = 0
  57 slashRegExp = re.compile(r"/")
  58 for pagename in pagenames:
  59   count += 1
  60   filename = slashRegExp.sub(' - ', pagename) + '.txt'
  61   if (os.path.isfile(filename)):
  62     print(r'%i/%i "%s" already exists.' % (count, len(pagenames), filename))
  63   else:
  64     print(r'%i/%i fetching "%s".' % (count, len(pagenames), filename))
  65     url = r"https://larpwiki.de/" + urllib.parse.quote(pagename) + r"?action=raw"
  66     response = retryOnError(
  67       lambda: urllib.request.urlopen(url), 
  68       netRetryM, netRetryD, netRetryDFun
  69     )
  70     text = response.read().decode("utf-8", "replace")
  71     writeFile(filename, text)
  72     time.sleep(0.1) # don't hammer on the wiki!

New Attachment

File to upload
Rename to
Overwrite existing attachment of same name
Type: Foobar

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2011-12-15 23:52:05, 0.7 KB) [[attachment:lw2txt.py]]
  • [get | view] (2011-12-17 00:25:17, 1.7 KB) [[attachment:lw2txtV2.py]]
  • [get | view] (2012-12-25 18:49:40, 2.2 KB) [[attachment:lw2txtV3.py]]
  • [get | view] (2017-12-29 11:26:10, 8.1 KB) [[attachment:lw2txtV4.py]]
 All files | Selected Files: delete move to page copy to page