[XSL-LIST Mailing List Archive Home] [By Thread] [By Date]

[xsl] Issue with nested grouping


Subject: [xsl] Issue with nested grouping
From: Geert Bormans <geert@xxxxxxxxxxxxxxxxxxx>
Date: Sun, 02 Dec 2007 12:49:44 +0100

Hi all,

I have been looking at this for too long now,
and I am still not sure why this doesn't work

I have an XML (stripped down version of a word XML document)
That requires (nested) grouping, based on the text content of some paragraphs.

<?xml version="1.0" encoding="UTF-8"?>
<w:wordDocument xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"
    xmlns:wx="http://schemas.microsoft.com/office/word/2003/auxHint" >
    <w:body>
        <wx:sect>
            <wx:sub-section>
                    <w:p>
                        <w:r><w:t>Text before first pgroup</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>[start pgroup id="43210023"</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>First P in pgroup</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>Second P in pgroup.</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>[start pgroup id="54320024"</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>P in nested pgroup</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>[end pgroup id="54320024"</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>First P after nested group</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>[end pgroup id="43210023"</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>In between</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>Other In between</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>[start pgroup id="43210025"</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>[start pgroup id="54320026"</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>Isolated nested pgroup</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>[end pgroup id="54320026"</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>[end pgroup id="43210025"</w:t></w:r>
                    </w:p>
            </wx:sub-section>
        </wx:sect>
    </w:body>
</w:wordDocument>

groups have IDs and based on these IDs I decide what the pseudo end tag of a group is
The result should be something along these lines


<?xml version="1.0" encoding="UTF-8"?>
<w:wordDocument xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"
    xmlns:wx="http://schemas.microsoft.com/office/word/2003/auxHint" >
    <w:body>
        <wx:sect>
            <wx:sub-section>
                    <w:p>
                        <w:r><w:t>Text before first pgroup</w:t></w:r>
                    </w:p>
                    <pgroup id="43210023">
                    <w:p>
                        <w:r><w:t>First P in pgroup</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>Second P in pgroup.</w:t></w:r>
                    </w:p>
                    <pgroup id="54320024">
                    <w:p>
                        <w:r><w:t>P in nested pgroup</w:t></w:r>
                    </w:p>
                    </pgroup>
                    <w:p>
                        <w:r><w:t>First P after nested group</w:t></w:r>
                    </w:p>
                    </pgroup>
                    <w:p>
                        <w:r><w:t>In between</w:t></w:r>
                    </w:p>
                    <w:p>
                        <w:r><w:t>Other In between</w:t></w:r>
                    </w:p>
                    <pgroup id="43210025">
                    <pgroup id="54320026">
                    <w:p>
                        <w:r><w:t>Isolated nested pgroup</w:t></w:r>
                    </w:p>
                    </pgroup>
                    </pgroup>
            </wx:sub-section>
        </wx:sect>
    </w:body>
</w:wordDocument>

The following doesn't work

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<xsl:stylesheet  version="2.0"
xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"
xmlns:wx="http://schemas.microsoft.com/office/word/2003/auxHint"
xmlns:int="urn:internal:resulttree"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
>

<xsl:output method="xml" version="1.0" encoding="UTF-8" omit-xml-declaration="no" indent="no" />

<xsl:variable name="re-start">^\[start\s+pgroup\s+id="([^"]+)".*$</xsl:variable>

<xsl:template match="node()">
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:apply-templates select="node()"></xsl:apply-templates>
</xsl:copy>
</xsl:template>

<xsl:template match="*[w:p/w:r[matches(w:t,$re-start)]]" >
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:call-template name="processGroups">
<xsl:with-param name="thisNodes" select="node()"/>
</xsl:call-template>
</xsl:copy>
</xsl:template>

<xsl:template name="processGroups">
<xsl:param name="thisNodes"/>
<xsl:for-each-group select="$thisNodes" group-starting-with="w:p[w:r[matches(w:t,$re-start)]][1]" >
<xsl:choose>
<xsl:when test="w:r[matches(w:t,$re-start)]">
<xsl:variable name="grid">
<xsl:value-of select="replace(w:r/w:t[matches(text(),$re-start)], $re-start, '$1')"></xsl:value-of>
</xsl:variable>
<xsl:variable name="endStr">
<xsl:text>[end pgroup id="</xsl:text>
<xsl:value-of select="$grid"></xsl:value-of>
<xsl:text>"</xsl:text>
</xsl:variable>
<!-- pgroup ends at a [end with the same ID -->
<xsl:for-each-group select="current-group()" group-ending-with="w:p[w:r[contains(w:t,$endStr)]]" >
<xsl:choose>
<!-- the first positioned current-group() is the pgroup -->
<xsl:when test="position() = 1">
<xsl:element name="int:pgroup">
<xsl:attribute name="int:id"><xsl:value-of select="$grid"/></xsl:attribute>
<!-- looking for nested pgroups -->
<xsl:choose>
<xsl:when test="(current-group() except .)/w:r[matches(w:t,$re-start)]">
<xsl:call-template name="processGroups">
<xsl:with-param name="thisNodes" select="current-group() except ."></xsl:with-param>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="current-group()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:element>
</xsl:when>
<!-- the remainder of this context might still have a pgroup -->
<xsl:otherwise>
<xsl:choose>
<xsl:when test="current-group()/w:r[matches(w:t,$re-start)]">
<xsl:call-template name="processGroups">
<xsl:with-param name="thisNodes" select="current-group()"></xsl:with-param>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="current-group()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</xsl:when>
<!-- processes the piece before the first p-group, inside a context that has a pgroup-->
<xsl:otherwise>
<xsl:apply-templates select="current-group()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</xsl:template>
</xsl:stylesheet>


I don't understand why the current-group() that I pass back to the named template
does not behave as nodeset of its members
Or maybe I am just overlooking something


Anyone an idea on this?

Thanks

Geert


Current Thread
Keywords
xml