Processing MediaWiki XML with STX/Add namespaces
From Meta, a Wikimedia project coordination wiki
This is a filter that only adds an attribute with the namespace id of each page. if there is defined
<namespace key="42">Foo</namespace>
then
<title>Foo:Bar</title>
will be transformed to
<title namespace="42">Foo:Bar</title>
Tested with joost alpha build 2005-05-21
[edit] STX script
<stx:transform version="1.0"
xmlns:stx="http://stx.sourceforge.net/2002/ns"
xmlns:m="http://www.mediawiki.org/xml/export-0.3/"
output-method="xml"
>
<!-- default rule: copy all nodes, attributes and text -->
<stx:template match="node()|@*" priority="-1">
<stx:copy>
<stx:process-attributes />
<stx:process-children />
</stx:copy>
</stx:template>
<!-- store namespace nodes in a buffer -->
<stx:buffer name="namespaces"/>
<stx:template match="m:namespace">
<stx:result-buffer name="namespaces" clear="no">
<stx:copy>
<stx:process-attributes/>
<stx:value-of select="."/>
</stx:copy>
</stx:result-buffer>
<stx:copy>
<stx:process-attributes/>
<stx:value-of select="."/>
</stx:copy>
</stx:template>
<!-- test $prefix against all namespaces -->
<stx:variable name="page-namespace"/>
<stx:variable name="prefix" />
<stx:group name="match-namespace-prefix">
<stx:template match="m:namespace">
<stx:if test="$page-namespace = 0"> <!-- avoids tests if already found -->
<stx:if test="$prefix = string(.)">
<stx:assign name="page-namespace" select="@key"/>
</stx:if>
</stx:if>
</stx:template>
<stx:template match="text()" priority="1">_</stx:template>
</stx:group>
<!-- add a 'namespace'-attribute to title elements -->
<stx:template match="m:title">
<stx:assign name="page-namespace" select="0"/>
<stx:assign name="prefix" select="substring-before(.,':')"/>
<stx:if test="$prefix">
<stx:process-buffer name="namespaces" group="match-namespace-prefix"/>
</stx:if>
<stx:copy>
<stx:attribute name="namespace" select="$page-namespace"/>
<stx:process-attributes />
<stx:process-children />
</stx:copy>
</stx:template>
</stx:transform>