Dateianhang 'lw2txtV2.py'
Herunterladen
Zeilennummern ein/ausschalten
1 #!/usr/bin/env python
2 #
3 # Tilmann Haak <spam@thaak.de>
4 # 2011-12-04
5 # 2011-12-16 Allan Wegan <allanwegan@allanwegan.de>:
6 # - Retries on network (or wiki) error.
7 #
8 # - Download all pages from larpwiki.de
9 # - Write each page into a text file, e.g. "LarpWiki.txt"
10
11 import sys
12 import xmlrpclib
13 import time
14 import string
15
16 netRetryM = 23 # How often to retry each request before giving up.
17 netRetryD = 0.5 # Seconds to wait initially before retrying a failed request.
18 netRetryDFun = lambda oldDelay: oldDelay * 1.1
19
20 # Retries a given function at most retriesMax times after failing:
21 def retryOnError(fun, retriesMax, retryDelay, retryDelayFun):
22 while True:
23 try:
24 return fun()
25 except:
26 if retriesMax < 1: raise
27 print "Error:", sys.exc_info()
28 print("Waiting %f seconds before retrying (Retries left: %i)..." % (
29 retryDelay, retriesMax
30 ))
31 time.sleep(retryDelay)
32 retriesMax -= 1
33 retryDelay = retryDelayFun(retryDelay)
34 continue
35
36 # stores a text in a file:
37 def writeFile(path, content):
38 fd = open(path, 'w')
39 fd.write(content)
40 fd.close()
41
42 # Wiki accessor:
43 wiki = xmlrpclib.ServerProxy("http://www.larpwiki.de/?action=xmlrpc2")
44
45 # get all pages on the wiki (sorted):
46 pagenames = retryOnError(
47 lambda: sorted(wiki.getAllPages()), netRetryM, netRetryD, netRetryDFun
48 )
49
50 # dump pages to current dir:
51 count = 1
52 for pagename in pagenames:
53 filename = string.replace(pagename, '/', '%2F') + '.txt'
54 print("%i/%i %s" % (count, len(pagenames), filename))
55 text = retryOnError(
56 lambda: writeFile(filename, wiki.getPage(pagename).encode('utf-8')),
57 netRetryM, netRetryD, netRetryDFun
58 )
59 time.sleep(0.05) # don't hammer on the wiki!
60 count += 1