1 #!/usr/bin/perl -w
2
3 #
4 # Copyright 2005 John Carter and The Apache Software Foundation
5 # Copyright 2010 Tilmann Haak
6 #
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
10 #
11 # http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
18 #
19
20 ###############################################################################
21 #
22 # This script converts usemod markup to moinmoin markup
23 #
24 # Usage:
25 #
26 # ie:
27 # ./um2mm.pl < WikiPageToBePorted.txt > PortedWikiPage.txt
28 #
29 #
30 # http://www.therefromhere.org/software/wikiport/
31 #
32 ###############################################################################
33
34 use strict;
35
36 package UseModtoMoinMoinPort;
37
38 #use WWW::Mechanize;
39 use encoding 'utf8';
40
41 # globals
42
43 my @UpdatedPages = ();
44 my @UnchangedPages = ();
45 my @CreatedPages = ();
46 my @BrokenMarkupPages = ();
47 my @UnCreatedPages = ();
48
49 my
50 my
51 my
52 my
53 my
54
55 #my $Mech = WWW::Mechanize->new();
56
57 #
58 # read wiki text from stdin
59 my $page_source = '';
60 while (<>) {
61 $page_source .= $_;
62 }
63
64 my $ported_page = PortPage($page_source);
65
66 print $ported_page;
67
68 sub PortPage
69 {
70 #
71 # Get page $wikiName from the UseMod Wiki & convert it to MoinMoin.
72 # Returns the converted page as a scalar.
73 #
74
75 # Reset the global error report hashes:
76
77
78
79
80
81
82 my $wikiText = shift;
83
84 use Encode qw( decode FB_CROAK );
85 #$wikiText = decode('iso-8859-1', $wikiText, FB_CROAK);
86 use HTML::Entities qw( decode_entities );
87 decode_entities($wikiText);
88
89 my @wikiTextList = split(/\n/, $wikiText);
90 die "Couldn't get \$useModUrl" unless @wikiTextList;
91
92 my $lineNum = 0;
93 foreach my $line (@wikiTextList)
94 {
95 $line = UseModtoMoinMoinLine($line, $lineNum);
96 $lineNum++;
97 }
98
99 $wikiText = join ("\n", @wikiTextList);
100
101 my $brokenMarkupReport = CreateBrokenMarkupReport();
102
103 if ($brokenMarkupReport ne "")
104 {
105 push (@BrokenMarkupPages, 'wikiName');
106 }
107
108 $wikiText = $wikiText . $brokenMarkupReport . "\n"; # Add a final \n to match MoinMoin raw.
109 return $wikiText;
110 }
111
112 sub UseModtoMoinMoinLine
113 {
114 my $line = shift;
115 my $lineNum = shift;
116
117 # Detect titles - special handling needed, because UseMod allows
118 # images and links in titles, while MoinMoin doesn't.
119
120 my $isTitle = 0;
121 if ($line =~ /^\=.*\=\r$/)
122 {
123 #= Title =
124 $isTitle = 1;
125 }
126 elsif ($line =~ /^ .*:/)
127 {
128 # Subtitle:
129 $isTitle = 1;
130 }
131
132 #
133 # Basic Formatting
134 #
135
136 # toc
137 $line =~ s/[<]toc[>]/<<TableOfContents(9)>>/i;
138
139 # links
140 $line =~ s/^([=]+)(\s[#])?\s([^=]+)\s[=]+$/$1 $2 $1/g;
141
142 # line breaks
143 $line =~ s/[<]br[>]/<<BR>>/gi;
144
145 # Einfach geklammerte Links zu zweifach geklammert:
146 $line =~ s/\b(\[[^\[][^\]]+[\]])\b/[$1]/g;
147
148 # rauten in Ueberschriften
149 $line =~ s/([=][=])+ [#] /$1 /g;
150
151 # images
152 #$line =~ s/\b(http\:\\\\.*\.(jpg|gif|png))\b/{{$1}}/Ui;
153
154 # MoinMoin is more strict than UseMod about title formatting
155 #$line =~ s/^\=\=\=([^=]+)\=+(\s)+$/\=\=\=$1\=\=\=\r/;
156 #$line =~ s/^\=\=([^=]+)\=+(\s)+$/\=\=$1\=\=\r/;
157 #$line =~ s/^\=([^=]+)\=+(\s)+$/\=$1\=\r/;
158
159 $line =~ s/<\/?b>/\'\'\'/g; # bold
160 $line =~ s/<\/?strong>/\'\'\'/g; # strong=bold
161
162 $line =~ s/<\/?i>/\'\'/g; # italic
163 $line =~ s/<\/?em>/\'\'/g; # em=italic
164 $line =~ s/<\/?u>/_/g; # underline
165 $line =~ s/<\/?sup>/^/g; # superscript
166 $line =~ s/<?sub>/,,/g; # subscript
167 $line =~ s/<tt>/\{\{\{ /g; # inline_preformatted_start
168 $line =~ s/<\/tt>/ \}\}\}/g; # inline_preformatted_end
169
170 # Need to replace <nowiki> tags with ! infront of each word
171 # The following only affects single word cases
172 $line =~ s/<nowiki>(\w+)\s*<\/nowiki>/!$1/g; # strip_wiki_formatting
173
174 $line =~ s/\\$//g; # end_of_line_continuation_removal
175
176 $line =~ s/^\s*$//g; # blank line (do this before "preformatted")
177
178 $line =~ s/^ (.*)$/\{\{\{ $1 \}\}\}/g; # preformatted
179 $line =~ s/<pre>/\{\{\{ /g; # preformatted2_start
180 $line =~ s/<\/pre>/ \}\}\}/g; # preformatted2_end
181
182 # lists
183 $line =~ s/^\*\*\*/ \* /; # bullet_list_three_level
184 $line =~ s/^\*\*/ \* /; # bullet_list_two_level
185 $line =~ s/^\*/ \* /; # bullet_list_one_level
186
187 $line =~ s/^### / 1. /; # number_list_three_levels
188 $line =~ s/^## / 1. /; # number_list_two_levels
189 $line =~ s/^# / 1. /; # number_list_one_level
190
191 # definition lists.
192 #
193 # UseMod:
194 # ;;;SomeTerm: SomeDefinition
195 # MoinMoin:
196 # SomeTerm: SomeDefintion
197 #
198 # However, MoinMoin definition list's aren't much use to us, since
199 # they don't allow links in the definition title.
200 #
201 # So we replace definition lists with ul bulleted lists,
202 # and indented paragraphs.
203 #
204 # This replacement also looks for the following pattern at the start
205 # of the line: [.*][.*]
206 #
207 # This is discarded, it was previously used to add anchor points.
208 #
209
210 if (0)
211 {
212 # Proper definition lists
213 #$line =~ s/^;;;(.+):(.+)$/ $1:: $2/; # definition_three_levels
214 #$line =~ s/^;;(.+):(.+)$/ $1:: $2/; # definition_two_levels
215 #$line =~ s/^;(.+):(.+)$/ $1:: $2/; # definition_one_levels
216 }
217 else
218 {
219 $line =~ s/^;;;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/ * $2:\n $3/; # definition_three_levels
220 $line =~ s/^;;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/ * $2:\n $3/; # definition_two_levels
221 $line =~ s/^;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/ * $2:\n $3/; # definition_one_level
222 }
223
224 $line =~ s/^:::([^:].*)$/ $1/; # indenting_three_levels
225 $line =~ s/^::([^:].*)$/ $1/; # indenting_two_levels
226 $line =~ s/^:([^:].*)$/ $1/; # indenting_one_level
227
228 # UseMod [#BladiBlah], MoinMoin [[Anchor(BladiBlah)]]
229
230 $line =~ s/\[\#([a-zA-Z0-9 _]+)\]/\[\[Anchor\($1\)\]\]/g; # anchors
231
232 # UseMod [[One]], MoinMoin ["One"]. Force a link to single word wikipage
233 #$line =~ s/\[\[([A-Z]+[a-z0-9]+)\]\]/\[\"$1\"\]/g; # odd links
234
235 # UseMod [/BladiBlah johoho], MoinMoin [wiki:/BlaDiBlah fancy link]
236 #$line =~ s/(?:^| )\[(\/[a-zA-Z0-9]+) ([^\]]+)\]/[:$1: $2]/g; # fancy_links_0
237
238 # UseMod [BladiBlah johoho], MoinMoin [wiki:/BlaDiBlah fancy link]
239 #$line =~ s/(?:^| )\[([A-Z]+[a-z0-9]+[A-Z]+[a-zA-Z0-9]+) ([^\]]+)\]/[:$1: $2]/g; # fancy_links_0
240
241 # UseMod [Bla di _da johoho], MoinMoin ["Bla di _da johoho"]
242 # (but not [wiki: ], and watch for [[ by a ' ' prefix
243 #$line =~ s/(?:^| )\[([^w\]\[][a-zA-Z0-9 _]+)\]/["$1"]/g; # fancy_links_0_2
244
245 # UseMod [[BlaDiBlah | fancy link]], MoinMoin [:BlaDiBlah: fancy link]
246 #$line =~ s/\[\[(\/?[a-zA-Z0-9]+) *\| *([^\]]+)\]\]/[:$1:$2]/g; # fancy_links_1
247
248 # Usemod [[bladlaslsla]], MoinMoin [" "]
249 #$line =~ s/\[\[([a-zA-Z0-9 _]+)\]\]/["$1"]/g; # fancy_links_2 # Not needed? JohnC
250
251 # Usemod [[blah]], MoinMoin ["blah"]
252 #$line =~ s/\[\[([a-zA-Z0-9 _]+)\]\]/\[\"$1\"\]/g;
253
254 # this was too strict...
255 #$line =~ s/\[\[([A-Z][a-z]+[A-Z][a-zA-Z]+) *\| ([^\]]+)\]\]/[wiki:$1 $2]/g; # fancy_links_1
256
257 # UseMod allows DDASDSaDASLeas as wiki name, Moin is more strict
258 # (watch for fancy_links_2 by looking for a ' ' prefix)
259 $line =~ s/ ([A-Z][A-Z]+[a-z0-9]+[A-Z]+[A-Za-z0-9]*)/ ["$1"]/g; # fancy_links_4
260 $line =~ s/ ([A-Z]+[a-z0-9]+[A-Z][A-Z]+[A-Za-z0-9]*)/ ["$1"]/g; # fancy_links_5
261
262 # UseMod forces links using ""link"", Moin uses ''''''link''''''
263 $line =~ s/""/''''''/g; # fancy_links_5
264
265 #
266 # Replace html entities with literals
267 #
268
269 $line =~ s/ / /g; # " "
270 $line =~ s/–/-/g; # "-"
271 $line =~ s/•/•/g; #bullet
272
273 $line =~ s/&#[xX]([A-Fa-f0-9]+);/"\&\#". hex($1) . ";"/eg; # convert any hex entities to decimal
274
275 $line =~ s/\&\#([0-9]+)\;/chr($1)/eg; # convert numerical entities to literals
276
277 #
278 # Links
279 #
280
281 if ($isTitle)
282 {
283 #
284 # Report images & links in titles
285 #
286
287 if ($line =~ /http\:.+\.(gif|png|jpg|jpeg) /)
288 {
289 $line =~ s/(.*?)(http\:.+\.)(gif|png|jpg|jpeg)(.*?)/$2$3\n$1 $4/g;
290
291 $ImageInTitle{$lineNum} = $line;
292 }
293
294 if ($line =~ /\[.*\]/)
295 {
296 $LinkInTitle{$lineNum} = $line;
297 }
298
299 if ($line =~ /[A-Z]+[a-z0-9]+[A-Z]+[a-z0-9]/)
300 {
301 $WikiNameInTitle{$lineNum} = $line;
302 }
303
304 if ($line =~ /\&\#([0-9]+)\;/)
305 {
306 $EntityInTitle{$lineNum} = $line;
307 }
308 }
309
310 if ($line =~ /\[.*\&\#([0-9]+)\;.*\]/)
311 {
312 $EntityInLink{$lineNum} = $line;
313 }
314
315 return $line;
316 }
317
318 sub CreateBrokenMarkupReport
319 {
320 # This creates a report that will be cat'd to be bottom of each
321 # MoinMoin WikiPage that needs manual work from an editor.
322
323 my $report = "";
324
325 my @imageKeys = keys (
326 my @linkKeys = keys (
327 my @wikinameKeys = keys (
328 my @entityTitleKeys = keys (
329 my @entityLinkKeys = keys (
330 if ((@imageKeys != 0)
331 || (@linkKeys != 0)
332 || (@wikinameKeys != 0)
333 || (@entityTitleKeys != 0)
334 || (@entityLinkKeys != 0))
335 {
336 $report .= "## Delete this section once the page has been fixed\n";
337 $report .= "----\n";
338 $report .= "\/!\\ The Markup on This Page Needs Fixing\n\n";
339 $report .= "This wiki page has been ported by the WikiMigrationBot, and this link to the WikiMigrationBotReport flags that this page contains wiki markup that needs fixing.\n\n";
340
341 if (@imageKeys != 0)
342 {
343 $report .= "ImageInTitle``s Moved on these lines:\n";
344 foreach my $lineNum (sort @imageKeys)
345 {
346 my $lineText = $ImageInTitle{$lineNum};
347 $lineText =~ s/\s$//g; # strip the newline
348 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
349 }
350 $report .= "\n";
351 }
352
353 if (@linkKeys != 0)
354 {
355 $report .= "LinkInTitle``s on these lines:\n";
356 foreach my $lineNum (sort @linkKeys)
357 {
358 my $lineText = $LinkInTitle{$lineNum};
359 $lineText =~ s/\s$//g; # strip the newline
360 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
361 }
362 $report .= "\n";
363 }
364
365 if (@wikinameKeys != 0)
366 {
367 $report .= "WikiNameInTitle``s on these lines:\n";
368 foreach my $lineNum (sort @wikinameKeys)
369 {
370 my $lineText = $WikiNameInTitle{$lineNum};
371 $lineText =~ s/\s$//g; # strip the newline
372 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
373 }
374 $report .= "\n";
375 }
376
377 if (@entityTitleKeys != 0)
378 {
379 $report .= "EntityInTitle``s on these lines:\n";
380 foreach my $lineNum (sort @entityTitleKeys)
381 {
382 my $lineText = $EntityInTitle{$lineNum};
383 $lineText =~ s/\s$//g; # strip the newline
384 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
385 }
386 $report .= "\n";
387 }
388
389 if (@entityLinkKeys != 0)
390 {
391 $report .= "EntityInLink``s on these lines:\n";
392 foreach my $lineNum (sort @entityLinkKeys)
393 {
394 my $lineText = $EntityInLink{$lineNum};
395 $lineText =~ s/\s$//g; # strip the newline
396 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
397 }
398 $report .= "\n";
399 }
400
401 $report .= "----\n";
402 $report .= "## End of section to be deleted\n";
403 }
404
405 return $report;
406 }
, as shown below in the list of files. Do
link, since this is subject to change and can break easily.