Edit Diskussion History Attachments

attachment:um2mm.pl of LarpWiki/Migration

Attachment 'um2mm.pl'

Download

   1 #!/usr/bin/perl -w
   2 
   3 #
   4 # Copyright 2005 John Carter and The Apache Software Foundation
   5 # Copyright 2010 Tilmann Haak
   6 #
   7 # Licensed under the Apache License, Version 2.0 (the "License");
   8 # you may not use this file except in compliance with the License.
   9 # You may obtain a copy of the License at
  10 #
  11 #    http://www.apache.org/licenses/LICENSE-2.0
  12 #
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS,
  15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16 # See the License for the specific language governing permissions and
  17 # limitations under the License.
  18 #
  19 
  20 ###############################################################################
  21 #
  22 # This script converts usemod markup to moinmoin markup
  23 #
  24 # Usage:
  25 # 
  26 # ie:
  27 # ./um2mm.pl < WikiPageToBePorted.txt > PortedWikiPage.txt
  28 #
  29 #
  30 # http://www.therefromhere.org/software/wikiport/
  31 # 
  32 ###############################################################################
  33 
  34 use strict;
  35 
  36 package UseModtoMoinMoinPort;
  37 
  38 #use WWW::Mechanize;
  39 use encoding 'utf8';
  40 
  41 # globals
  42 
  43 my @UpdatedPages = ();
  44 my @UnchangedPages = ();
  45 my @CreatedPages = ();
  46 my @BrokenMarkupPages = ();
  47 my @UnCreatedPages = ();
  48 
  49 my %ImageInTitle = ();
  50 my %LinkInTitle = ();
  51 my %WikiNameInTitle = ();
  52 my %EntityInTitle = ();
  53 my %EntityInLink = ();
  54 
  55 #my $Mech = WWW::Mechanize->new();
  56 
  57 #
  58 # read wiki text from stdin
  59 my $page_source = '';
  60 while (<>) {
  61     $page_source .= $_;
  62 }
  63 
  64 my $ported_page = PortPage($page_source);
  65 
  66 print $ported_page;
  67 
  68 sub PortPage
  69 {
  70        #
  71        # Get page $wikiName from the UseMod Wiki & convert it to MoinMoin.
  72        # Returns the converted page as a scalar.
  73        #
  74 
  75        # Reset the global error report hashes:
  76        %ImageInTitle = ();
  77        %LinkInTitle = ();
  78        %WikiNameInTitle = ();
  79        %EntityInTitle = ();
  80        %EntityInLink = ();
  81 
  82        my $wikiText = shift;
  83 
  84        use Encode qw( decode FB_CROAK );
  85        #$wikiText = decode('iso-8859-1', $wikiText, FB_CROAK);
  86        use HTML::Entities qw( decode_entities );
  87        decode_entities($wikiText);
  88 
  89        my @wikiTextList = split(/\n/, $wikiText);
  90        die "Couldn't get \$useModUrl" unless @wikiTextList;
  91 
  92        my $lineNum = 0;
  93        foreach my $line (@wikiTextList)
  94        {
  95                $line = UseModtoMoinMoinLine($line, $lineNum);
  96                $lineNum++;
  97        }
  98 
  99        $wikiText = join ("\n", @wikiTextList);
 100 
 101        my $brokenMarkupReport = CreateBrokenMarkupReport();
 102 
 103        if ($brokenMarkupReport ne "")
 104        {
 105                push (@BrokenMarkupPages, 'wikiName');
 106        }
 107 
 108        $wikiText = $wikiText . $brokenMarkupReport . "\n";     # Add a final \n to match MoinMoin raw.
 109        return $wikiText;
 110 }
 111 
 112 sub UseModtoMoinMoinLine
 113 {
 114        my $line = shift;
 115        my $lineNum = shift;
 116 
 117        # Detect titles - special handling needed, because UseMod allows
 118        # images and links in titles, while MoinMoin doesn't.
 119 
 120        my $isTitle = 0;
 121        if ($line =~ /^\=.*\=\r$/)
 122        {
 123                #= Title =
 124                $isTitle = 1;
 125        }
 126        elsif ($line =~ /^ .*:/)
 127        {
 128                # Subtitle:
 129                $isTitle = 1;
 130        }
 131 
 132        #
 133        # Basic Formatting
 134        #
 135 
 136        # toc
 137        $line =~ s/[<]toc[>]/<<TableOfContents(9)>>/i;
 138        
 139        # links
 140        $line =~ s/^([=]+)(\s[#])?\s([^=]+)\s[=]+$/$1 $2 $1/g;
 141 
 142        # line breaks
 143        $line =~ s/[<]br[>]/<<BR>>/gi;
 144 
 145        # Einfach geklammerte Links zu zweifach geklammert:
 146        $line =~ s/\b(\[[^\[][^\]]+[\]])\b/[$1]/g;
 147 
 148        # rauten in Ueberschriften
 149        $line =~ s/([=][=])+ [#] /$1 /g;
 150 
 151        # images
 152        #$line =~ s/\b(http\:\\\\.*\.(jpg|gif|png))\b/{{$1}}/Ui;
 153 
 154        # MoinMoin is more strict than UseMod about title formatting
 155        #$line =~ s/^\=\=\=([^=]+)\=+(\s)+$/\=\=\=$1\=\=\=\r/;
 156        #$line =~ s/^\=\=([^=]+)\=+(\s)+$/\=\=$1\=\=\r/;
 157        #$line =~ s/^\=([^=]+)\=+(\s)+$/\=$1\=\r/;
 158 
 159        $line =~ s/<\/?b>/\'\'\'/g;             # bold
 160        $line =~ s/<\/?strong>/\'\'\'/g;        # strong=bold
 161 
 162        $line =~ s/<\/?i>/\'\'/g;               # italic
 163        $line =~ s/<\/?em>/\'\'/g;              # em=italic
 164        $line =~ s/<\/?u>/_/g;                  # underline
 165        $line =~ s/<\/?sup>/^/g;                # superscript
 166        $line =~ s/<?sub>/,,/g;                 # subscript
 167        $line =~ s/<tt>/\{\{\{ /g;              # inline_preformatted_start
 168        $line =~ s/<\/tt>/ \}\}\}/g;            # inline_preformatted_end
 169 
 170        # Need to replace <nowiki> tags with ! infront of each word
 171        # The following only affects single word cases
 172        $line =~ s/<nowiki>(\w+)\s*<\/nowiki>/!$1/g;    # strip_wiki_formatting
 173 
 174        $line =~ s/\\$//g;                      # end_of_line_continuation_removal
 175 
 176        $line =~ s/^\s*$//g;    # blank line (do this before "preformatted")
 177 
 178        $line =~ s/^ (.*)$/\{\{\{ $1 \}\}\}/g;  # preformatted
 179        $line =~ s/<pre>/\{\{\{ /g;             # preformatted2_start
 180        $line =~ s/<\/pre>/ \}\}\}/g;           # preformatted2_end
 181 
 182        # lists
 183        $line =~ s/^\*\*\*/   \* /;             # bullet_list_three_level
 184        $line =~ s/^\*\*/  \* /;                # bullet_list_two_level
 185        $line =~ s/^\*/ \* /;                   # bullet_list_one_level
 186 
 187        $line =~ s/^### /   1. /;               # number_list_three_levels
 188        $line =~ s/^## /   1. /;                # number_list_two_levels
 189        $line =~ s/^# / 1. /;                   # number_list_one_level
 190 
 191        # definition lists.
 192        #
 193        # UseMod:
 194        # ;;;SomeTerm: SomeDefinition
 195        # MoinMoin:
 196        #    SomeTerm: SomeDefintion
 197        #
 198        # However, MoinMoin definition list's aren't much use to us, since
 199        # they don't allow links in the definition title.
 200        #
 201        # So we replace definition lists with ul bulleted lists,
 202        # and indented paragraphs.
 203        #
 204        # This replacement also looks for the following pattern at the start
 205        # of the line: [.*][.*]
 206        #
 207        # This is discarded, it was previously used to add anchor points.
 208        #
 209 
 210        if (0)
 211        {
 212        # Proper definition lists
 213        #$line =~ s/^;;;(.+):(.+)$/   $1:: $2/; # definition_three_levels
 214        #$line =~ s/^;;(.+):(.+)$/  $1:: $2/;   # definition_two_levels
 215        #$line =~ s/^;(.+):(.+)$/ $1:: $2/;     # definition_one_levels
 216        }
 217        else
 218        {
 219                $line =~ s/^;;;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/   * $2:\n     $3/;    # definition_three_levels
 220                $line =~ s/^;;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/  * $2:\n    $3/;       # definition_two_levels
 221                $line =~ s/^;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/ * $2:\n   $3/;  # definition_one_level
 222        }
 223 
 224        $line =~ s/^:::([^:].*)$/   $1/;                # indenting_three_levels
 225        $line =~ s/^::([^:].*)$/  $1/;          # indenting_two_levels
 226        $line =~ s/^:([^:].*)$/ $1/;            # indenting_one_level
 227 
 228        # UseMod [#BladiBlah], MoinMoin [[Anchor(BladiBlah)]]
 229 
 230        $line =~ s/\[\#([a-zA-Z0-9 _]+)\]/\[\[Anchor\($1\)\]\]/g; # anchors
 231 
 232        # UseMod [[One]], MoinMoin ["One"].  Force a link to single word wikipage
 233        #$line =~ s/\[\[([A-Z]+[a-z0-9]+)\]\]/\[\"$1\"\]/g; # odd links
 234 
 235        # UseMod [/BladiBlah johoho], MoinMoin [wiki:/BlaDiBlah fancy link]
 236        #$line =~ s/(?:^| )\[(\/[a-zA-Z0-9]+) ([^\]]+)\]/[:$1: $2]/g; # fancy_links_0
 237 
 238        # UseMod [BladiBlah johoho], MoinMoin [wiki:/BlaDiBlah fancy link]
 239        #$line =~ s/(?:^| )\[([A-Z]+[a-z0-9]+[A-Z]+[a-zA-Z0-9]+) ([^\]]+)\]/[:$1: $2]/g; # fancy_links_0
 240 
 241        # UseMod [Bla di _da johoho], MoinMoin ["Bla di _da johoho"]
 242        # (but not [wiki: ], and watch for [[ by a ' ' prefix
 243        #$line =~ s/(?:^| )\[([^w\]\[][a-zA-Z0-9 _]+)\]/["$1"]/g; # fancy_links_0_2
 244 
 245        # UseMod [[BlaDiBlah | fancy link]], MoinMoin [:BlaDiBlah: fancy link]
 246        #$line =~ s/\[\[(\/?[a-zA-Z0-9]+) *\| *([^\]]+)\]\]/[:$1:$2]/g; # fancy_links_1
 247 
 248        # Usemod [[bladlaslsla]], MoinMoin [" "]
 249        #$line =~ s/\[\[([a-zA-Z0-9 _]+)\]\]/["$1"]/g; # fancy_links_2 # Not needed? JohnC
 250 
 251        # Usemod [[blah]], MoinMoin ["blah"]
 252        #$line =~ s/\[\[([a-zA-Z0-9 _]+)\]\]/\[\"$1\"\]/g;
 253 
 254        # this was too strict...
 255        #$line =~ s/\[\[([A-Z][a-z]+[A-Z][a-zA-Z]+) *\| ([^\]]+)\]\]/[wiki:$1 $2]/g; # fancy_links_1
 256 
 257        # UseMod allows DDASDSaDASLeas as wiki name, Moin is more strict
 258        # (watch for fancy_links_2 by looking for a ' ' prefix)
 259        $line =~ s/ ([A-Z][A-Z]+[a-z0-9]+[A-Z]+[A-Za-z0-9]*)/ ["$1"]/g; # fancy_links_4
 260        $line =~ s/ ([A-Z]+[a-z0-9]+[A-Z][A-Z]+[A-Za-z0-9]*)/ ["$1"]/g; # fancy_links_5
 261 
 262        # UseMod forces links using ""link"", Moin uses ''''''link''''''
 263        $line =~ s/""/''''''/g; # fancy_links_5
 264 
 265        #
 266        # Replace html entities with literals
 267        #
 268 
 269        $line =~ s/&nbsp;/ /g; # " "
 270        $line =~ s/&ndash;/-/g; # "-"
 271        $line =~ s/&bull;/•/g; #bullet
 272 
 273        $line =~ s/&#[xX]([A-Fa-f0-9]+);/"\&\#". hex($1) . ";"/eg; # convert any hex entities to decimal
 274 
 275        $line =~ s/\&\#([0-9]+)\;/chr($1)/eg; # convert numerical entities to literals
 276 
 277        #
 278        # Links
 279        #
 280 
 281        if ($isTitle)
 282        {
 283                #
 284                # Report images & links in titles
 285                #
 286 
 287                if ($line =~ /http\:.+\.(gif|png|jpg|jpeg) /)
 288                {
 289                        $line =~ s/(.*?)(http\:.+\.)(gif|png|jpg|jpeg)(.*?)/$2$3\n$1 $4/g;
 290 
 291                        $ImageInTitle{$lineNum} = $line;
 292                }
 293 
 294                if ($line =~ /\[.*\]/)
 295                {
 296                        $LinkInTitle{$lineNum} = $line;
 297                }
 298 
 299                if ($line =~ /[A-Z]+[a-z0-9]+[A-Z]+[a-z0-9]/)
 300                {
 301                        $WikiNameInTitle{$lineNum} = $line;
 302                }
 303 
 304                if ($line =~ /\&\#([0-9]+)\;/)
 305                {
 306                        $EntityInTitle{$lineNum} = $line;
 307                }
 308        }
 309 
 310        if ($line =~ /\[.*\&\#([0-9]+)\;.*\]/)
 311        {
 312                $EntityInLink{$lineNum} = $line;
 313        }
 314 
 315        return $line;
 316 }
 317 
 318 sub CreateBrokenMarkupReport
 319 {
 320        # This creates a report that will be cat'd to be bottom of each
 321        # MoinMoin WikiPage that needs manual work from an editor.
 322 
 323        my $report = "";
 324 
 325        my @imageKeys = keys (%ImageInTitle);
 326        my @linkKeys = keys (%LinkInTitle);
 327        my @wikinameKeys = keys (%WikiNameInTitle);
 328        my @entityTitleKeys = keys (%EntityInTitle);
 329        my @entityLinkKeys = keys (%EntityInLink);
 330        if ((@imageKeys != 0)
 331                        || (@linkKeys != 0)
 332                        || (@wikinameKeys != 0)
 333                        || (@entityTitleKeys != 0)
 334                        || (@entityLinkKeys != 0))
 335        {
 336                $report .= "## Delete this section once the page has been fixed\n";
 337                $report .= "----\n";
 338                $report .= "\/!\\ The Markup on This Page Needs Fixing\n\n";
 339                $report .= "This wiki page has been ported by the WikiMigrationBot, and this link to the WikiMigrationBotReport flags that this page contains wiki markup that needs fixing.\n\n";
 340 
 341                if (@imageKeys != 0)
 342                {
 343                        $report .= "ImageInTitle``s Moved on these lines:\n";
 344                        foreach my $lineNum (sort @imageKeys)
 345                        {
 346                                my $lineText = $ImageInTitle{$lineNum};
 347                                $lineText =~ s/\s$//g;  # strip the newline
 348                                $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
 349                        }
 350                        $report .= "\n";
 351                }
 352 
 353                if (@linkKeys != 0)
 354                {
 355                        $report .= "LinkInTitle``s on these lines:\n";
 356                        foreach my $lineNum (sort @linkKeys)
 357                        {
 358                                my $lineText = $LinkInTitle{$lineNum};
 359                                $lineText =~ s/\s$//g;  # strip the newline
 360                                $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
 361                        }
 362                        $report .= "\n";
 363                }
 364 
 365                if (@wikinameKeys != 0)
 366                {
 367                        $report .= "WikiNameInTitle``s on these lines:\n";
 368                        foreach my $lineNum (sort @wikinameKeys)
 369                        {
 370                                my $lineText = $WikiNameInTitle{$lineNum};
 371                                $lineText =~ s/\s$//g;  # strip the newline
 372                                $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
 373                        }
 374                        $report .= "\n";
 375                }
 376 
 377                if (@entityTitleKeys != 0)
 378                {
 379                        $report .= "EntityInTitle``s on these lines:\n";
 380                        foreach my $lineNum (sort @entityTitleKeys)
 381                        {
 382                                my $lineText = $EntityInTitle{$lineNum};
 383                                $lineText =~ s/\s$//g;  # strip the newline
 384                                $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
 385                        }
 386                        $report .= "\n";
 387                }
 388 
 389                if (@entityLinkKeys != 0)
 390                {
 391                        $report .= "EntityInLink``s on these lines:\n";
 392                        foreach my $lineNum (sort @entityLinkKeys)
 393                        {
 394                                my $lineText = $EntityInLink{$lineNum};
 395                                $lineText =~ s/\s$//g;  # strip the newline
 396                                $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
 397                        }
 398                        $report .= "\n";
 399                }
 400 
 401                $report .= "----\n";
 402                $report .= "## End of section to be deleted\n";
 403        }
 404 
 405        return $report;
 406 }

New Attachment

File to upload
Rename to
Overwrite existing attachment of same name
Type: Foobar

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2011-12-03 19:34:24, 0.6 KB) [[attachment:convert.sh]]
  • [get | view] (2011-12-03 19:36:39, 0.4 KB) [[attachment:moin-upload.py]]
  • [get | view] (2011-12-03 19:33:37, 14.0 KB) [[attachment:um2mm.pl]]
 All files | Selected Files: delete move to page copy to page