<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">#!/usr/bin/perl -w

#
# Copyright 2005 John Carter and The Apache Software Foundation
# Copyright 2010 Tilmann Haak
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

###############################################################################
#
# This script converts usemod markup to moinmoin markup
#
# Usage:
# 
# ie:
# ./um2mm.pl &lt; WikiPageToBePorted.txt &gt; PortedWikiPage.txt
#
#
# http://www.therefromhere.org/software/wikiport/
# 
###############################################################################

use strict;

package UseModtoMoinMoinPort;

#use WWW::Mechanize;
use encoding 'utf8';

# globals

my @UpdatedPages = ();
my @UnchangedPages = ();
my @CreatedPages = ();
my @BrokenMarkupPages = ();
my @UnCreatedPages = ();

my %ImageInTitle = ();
my %LinkInTitle = ();
my %WikiNameInTitle = ();
my %EntityInTitle = ();
my %EntityInLink = ();

#my $Mech = WWW::Mechanize-&gt;new();

#
# read wiki text from stdin
my $page_source = '';
while (&lt;&gt;) {
    $page_source .= $_;
}

my $ported_page = PortPage($page_source);

print $ported_page;

sub PortPage
{
       #
       # Get page $wikiName from the UseMod Wiki &amp; convert it to MoinMoin.
       # Returns the converted page as a scalar.
       #

       # Reset the global error report hashes:
       %ImageInTitle = ();
       %LinkInTitle = ();
       %WikiNameInTitle = ();
       %EntityInTitle = ();
       %EntityInLink = ();

       my $wikiText = shift;

       use Encode qw( decode FB_CROAK );
       #$wikiText = decode('iso-8859-1', $wikiText, FB_CROAK);
       use HTML::Entities qw( decode_entities );
       decode_entities($wikiText);

       my @wikiTextList = split(/\n/, $wikiText);
       die "Couldn't get \$useModUrl" unless @wikiTextList;

       my $lineNum = 0;
       foreach my $line (@wikiTextList)
       {
               $line = UseModtoMoinMoinLine($line, $lineNum);
               $lineNum++;
       }

       $wikiText = join ("\n", @wikiTextList);

       my $brokenMarkupReport = CreateBrokenMarkupReport();

       if ($brokenMarkupReport ne "")
       {
               push (@BrokenMarkupPages, 'wikiName');
       }

       $wikiText = $wikiText . $brokenMarkupReport . "\n";     # Add a final \n to match MoinMoin raw.
       return $wikiText;
}

sub UseModtoMoinMoinLine
{
       my $line = shift;
       my $lineNum = shift;

       # Detect titles - special handling needed, because UseMod allows
       # images and links in titles, while MoinMoin doesn't.

       my $isTitle = 0;
       if ($line =~ /^\=.*\=\r$/)
       {
               #= Title =
               $isTitle = 1;
       }
       elsif ($line =~ /^ .*:/)
       {
               # Subtitle:
               $isTitle = 1;
       }

       #
       # Basic Formatting
       #

       # toc
       $line =~ s/[&lt;]toc[&gt;]/&lt;&lt;TableOfContents(9)&gt;&gt;/i;
       
       # links
       $line =~ s/^([=]+)(\s[#])?\s([^=]+)\s[=]+$/$1 $2 $1/g;

       # line breaks
       $line =~ s/[&lt;]br[&gt;]/&lt;&lt;BR&gt;&gt;/gi;

       # Einfach geklammerte Links zu zweifach geklammert:
       $line =~ s/\b(\[[^\[][^\]]+[\]])\b/[$1]/g;

       # rauten in Ueberschriften
       $line =~ s/([=][=])+ [#] /$1 /g;

       # images
       #$line =~ s/\b(http\:\\\\.*\.(jpg|gif|png))\b/{{$1}}/Ui;

       # MoinMoin is more strict than UseMod about title formatting
       #$line =~ s/^\=\=\=([^=]+)\=+(\s)+$/\=\=\=$1\=\=\=\r/;
       #$line =~ s/^\=\=([^=]+)\=+(\s)+$/\=\=$1\=\=\r/;
       #$line =~ s/^\=([^=]+)\=+(\s)+$/\=$1\=\r/;

       $line =~ s/&lt;\/?b&gt;/\'\'\'/g;             # bold
       $line =~ s/&lt;\/?strong&gt;/\'\'\'/g;        # strong=bold

       $line =~ s/&lt;\/?i&gt;/\'\'/g;               # italic
       $line =~ s/&lt;\/?em&gt;/\'\'/g;              # em=italic
       $line =~ s/&lt;\/?u&gt;/_/g;                  # underline
       $line =~ s/&lt;\/?sup&gt;/^/g;                # superscript
       $line =~ s/&lt;?sub&gt;/,,/g;                 # subscript
       $line =~ s/&lt;tt&gt;/\{\{\{ /g;              # inline_preformatted_start
       $line =~ s/&lt;\/tt&gt;/ \}\}\}/g;            # inline_preformatted_end

       # Need to replace &lt;nowiki&gt; tags with ! infront of each word
       # The following only affects single word cases
       $line =~ s/&lt;nowiki&gt;(\w+)\s*&lt;\/nowiki&gt;/!$1/g;    # strip_wiki_formatting

       $line =~ s/\\$//g;                      # end_of_line_continuation_removal

       $line =~ s/^\s*$//g;    # blank line (do this before "preformatted")

       $line =~ s/^ (.*)$/\{\{\{ $1 \}\}\}/g;  # preformatted
       $line =~ s/&lt;pre&gt;/\{\{\{ /g;             # preformatted2_start
       $line =~ s/&lt;\/pre&gt;/ \}\}\}/g;           # preformatted2_end

       # lists
       $line =~ s/^\*\*\*/   \* /;             # bullet_list_three_level
       $line =~ s/^\*\*/  \* /;                # bullet_list_two_level
       $line =~ s/^\*/ \* /;                   # bullet_list_one_level

       $line =~ s/^### /   1. /;               # number_list_three_levels
       $line =~ s/^## /   1. /;                # number_list_two_levels
       $line =~ s/^# / 1. /;                   # number_list_one_level

       # definition lists.
       #
       # UseMod:
       # ;;;SomeTerm: SomeDefinition
       # MoinMoin:
       #    SomeTerm: SomeDefintion
       #
       # However, MoinMoin definition list's aren't much use to us, since
       # they don't allow links in the definition title.
       #
       # So we replace definition lists with ul bulleted lists,
       # and indented paragraphs.
       #
       # This replacement also looks for the following pattern at the start
       # of the line: [.*][.*]
       #
       # This is discarded, it was previously used to add anchor points.
       #

       if (0)
       {
       # Proper definition lists
       #$line =~ s/^;;;(.+):(.+)$/   $1:: $2/; # definition_three_levels
       #$line =~ s/^;;(.+):(.+)$/  $1:: $2/;   # definition_two_levels
       #$line =~ s/^;(.+):(.+)$/ $1:: $2/;     # definition_one_levels
       }
       else
       {
               $line =~ s/^;;;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/   * $2:\n     $3/;    # definition_three_levels
               $line =~ s/^;;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/  * $2:\n    $3/;       # definition_two_levels
               $line =~ s/^;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/ * $2:\n   $3/;  # definition_one_level
       }

       $line =~ s/^:::([^:].*)$/   $1/;                # indenting_three_levels
       $line =~ s/^::([^:].*)$/  $1/;          # indenting_two_levels
       $line =~ s/^:([^:].*)$/ $1/;            # indenting_one_level

       # UseMod [#BladiBlah], MoinMoin [[Anchor(BladiBlah)]]

       $line =~ s/\[\#([a-zA-Z0-9 _]+)\]/\[\[Anchor\($1\)\]\]/g; # anchors

       # UseMod [[One]], MoinMoin ["One"].  Force a link to single word wikipage
       #$line =~ s/\[\[([A-Z]+[a-z0-9]+)\]\]/\[\"$1\"\]/g; # odd links

       # UseMod [/BladiBlah johoho], MoinMoin [wiki:/BlaDiBlah fancy link]
       #$line =~ s/(?:^| )\[(\/[a-zA-Z0-9]+) ([^\]]+)\]/[:$1: $2]/g; # fancy_links_0

       # UseMod [BladiBlah johoho], MoinMoin [wiki:/BlaDiBlah fancy link]
       #$line =~ s/(?:^| )\[([A-Z]+[a-z0-9]+[A-Z]+[a-zA-Z0-9]+) ([^\]]+)\]/[:$1: $2]/g; # fancy_links_0

       # UseMod [Bla di _da johoho], MoinMoin ["Bla di _da johoho"]
       # (but not [wiki: ], and watch for [[ by a ' ' prefix
       #$line =~ s/(?:^| )\[([^w\]\[][a-zA-Z0-9 _]+)\]/["$1"]/g; # fancy_links_0_2

       # UseMod [[BlaDiBlah | fancy link]], MoinMoin [:BlaDiBlah: fancy link]
       #$line =~ s/\[\[(\/?[a-zA-Z0-9]+) *\| *([^\]]+)\]\]/[:$1:$2]/g; # fancy_links_1

       # Usemod [[bladlaslsla]], MoinMoin [" "]
       #$line =~ s/\[\[([a-zA-Z0-9 _]+)\]\]/["$1"]/g; # fancy_links_2 # Not needed? JohnC

       # Usemod [[blah]], MoinMoin ["blah"]
       #$line =~ s/\[\[([a-zA-Z0-9 _]+)\]\]/\[\"$1\"\]/g;

       # this was too strict...
       #$line =~ s/\[\[([A-Z][a-z]+[A-Z][a-zA-Z]+) *\| ([^\]]+)\]\]/[wiki:$1 $2]/g; # fancy_links_1

       # UseMod allows DDASDSaDASLeas as wiki name, Moin is more strict
       # (watch for fancy_links_2 by looking for a ' ' prefix)
       $line =~ s/ ([A-Z][A-Z]+[a-z0-9]+[A-Z]+[A-Za-z0-9]*)/ ["$1"]/g; # fancy_links_4
       $line =~ s/ ([A-Z]+[a-z0-9]+[A-Z][A-Z]+[A-Za-z0-9]*)/ ["$1"]/g; # fancy_links_5

       # UseMod forces links using ""link"", Moin uses ''''''link''''''
       $line =~ s/""/''''''/g; # fancy_links_5

       #
       # Replace html entities with literals
       #

       $line =~ s/&amp;nbsp;/ /g; # " "
       $line =~ s/&amp;ndash;/-/g; # "-"
       $line =~ s/&amp;bull;/•/g; #bullet

       $line =~ s/&amp;#[xX]([A-Fa-f0-9]+);/"\&amp;\#". hex($1) . ";"/eg; # convert any hex entities to decimal

       $line =~ s/\&amp;\#([0-9]+)\;/chr($1)/eg; # convert numerical entities to literals

       #
       # Links
       #

       if ($isTitle)
       {
               #
               # Report images &amp; links in titles
               #

               if ($line =~ /http\:.+\.(gif|png|jpg|jpeg) /)
               {
                       $line =~ s/(.*?)(http\:.+\.)(gif|png|jpg|jpeg)(.*?)/$2$3\n$1 $4/g;

                       $ImageInTitle{$lineNum} = $line;
               }

               if ($line =~ /\[.*\]/)
               {
                       $LinkInTitle{$lineNum} = $line;
               }

               if ($line =~ /[A-Z]+[a-z0-9]+[A-Z]+[a-z0-9]/)
               {
                       $WikiNameInTitle{$lineNum} = $line;
               }

               if ($line =~ /\&amp;\#([0-9]+)\;/)
               {
                       $EntityInTitle{$lineNum} = $line;
               }
       }

       if ($line =~ /\[.*\&amp;\#([0-9]+)\;.*\]/)
       {
               $EntityInLink{$lineNum} = $line;
       }

       return $line;
}

sub CreateBrokenMarkupReport
{
       # This creates a report that will be cat'd to be bottom of each
       # MoinMoin WikiPage that needs manual work from an editor.

       my $report = "";

       my @imageKeys = keys (%ImageInTitle);
       my @linkKeys = keys (%LinkInTitle);
       my @wikinameKeys = keys (%WikiNameInTitle);
       my @entityTitleKeys = keys (%EntityInTitle);
       my @entityLinkKeys = keys (%EntityInLink);
       if ((@imageKeys != 0)
                       || (@linkKeys != 0)
                       || (@wikinameKeys != 0)
                       || (@entityTitleKeys != 0)
                       || (@entityLinkKeys != 0))
       {
               $report .= "## Delete this section once the page has been fixed\n";
               $report .= "----\n";
               $report .= "\/!\\ The Markup on This Page Needs Fixing\n\n";
               $report .= "This wiki page has been ported by the WikiMigrationBot, and this link to the WikiMigrationBotReport flags that this page contains wiki markup that needs fixing.\n\n";

               if (@imageKeys != 0)
               {
                       $report .= "ImageInTitle``s Moved on these lines:\n";
                       foreach my $lineNum (sort @imageKeys)
                       {
                               my $lineText = $ImageInTitle{$lineNum};
                               $lineText =~ s/\s$//g;  # strip the newline
                               $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
                       }
                       $report .= "\n";
               }

               if (@linkKeys != 0)
               {
                       $report .= "LinkInTitle``s on these lines:\n";
                       foreach my $lineNum (sort @linkKeys)
                       {
                               my $lineText = $LinkInTitle{$lineNum};
                               $lineText =~ s/\s$//g;  # strip the newline
                               $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
                       }
                       $report .= "\n";
               }

               if (@wikinameKeys != 0)
               {
                       $report .= "WikiNameInTitle``s on these lines:\n";
                       foreach my $lineNum (sort @wikinameKeys)
                       {
                               my $lineText = $WikiNameInTitle{$lineNum};
                               $lineText =~ s/\s$//g;  # strip the newline
                               $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
                       }
                       $report .= "\n";
               }

               if (@entityTitleKeys != 0)
               {
                       $report .= "EntityInTitle``s on these lines:\n";
                       foreach my $lineNum (sort @entityTitleKeys)
                       {
                               my $lineText = $EntityInTitle{$lineNum};
                               $lineText =~ s/\s$//g;  # strip the newline
                               $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
                       }
                       $report .= "\n";
               }

               if (@entityLinkKeys != 0)
               {
                       $report .= "EntityInLink``s on these lines:\n";
                       foreach my $lineNum (sort @entityLinkKeys)
                       {
                               my $lineText = $EntityInLink{$lineNum};
                               $lineText =~ s/\s$//g;  # strip the newline
                               $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
                       }
                       $report .= "\n";
               }

               $report .= "----\n";
               $report .= "## End of section to be deleted\n";
       }

       return $report;
}


</pre></body></html>