[XSL-LIST Mailing List Archive Home]
[By Thread]
[By Date]
Re: [xsl] Stylesheet Optimization -- How to Make It Faster
Subject: Re: [xsl] Stylesheet Optimization -- How to Make It Faster
From: Jeff Sese <jsese@xxxxxxxxxxxx>
Date: Tue, 28 Nov 2006 20:55:44 +0800
|
sorry for the messy sample files... my mail client removed the tabs.
I'm using saxon 8.8j
i already used keys upon your suggestion, however i did not notice a
change in the processing time, but i'll test more files just to be sure.
here's now my new xsl
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:ati="http://www.asiatype.com/xslt-functions"
exclude-result-prefixes="xs ati">
<xsl:output method="xml" version="1.0" encoding="UTF-8"/>
<xsl:variable name="abbreviations" as="element()+"
select="document('publishers_data.xml')/root/publisher/abbrev"/>
<xsl:key name="abbrev" match="expanded" use="preceding-sibling::abbrev"/>
<xsl:template match="/">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="text()[ancestor::ab and not(ancestor::note[@id
and @n and
@lang])][exists($abbreviations[matches(current(),concat('(^|\W)(',ati:escape(.),')($|\W)'))])]">
<xsl:variable name="str" as="xs:string" select="."/>
<xsl:variable name="search-str" as="xs:string+"
select="$abbreviations[matches($str,concat('(^|\W)(',ati:escape(.),')($|\W)'))]"/>
<xsl:variable name="replace" as="element()*">
<xsl:for-each select="$search-str">
<xsl:variable name="abbr" as="xs:string" select="."/>
<abbr type="title" expand="{$abbreviations/key('abbrev',
$abbr)}">
<xsl:value-of select="$abbr"/>
</abbr>
</xsl:for-each>
</xsl:variable>
<xsl:sequence select="ati:replace-with-nodes($str, $search-str,
$replace)"/>
</xsl:template>
<xsl:template
match="@*|element()|comment()|processing-instruction()" mode="#all">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:function name="ati:replace-with-nodes" as="node()+">
<xsl:param name="input" as="xs:string"/>
<xsl:param name="words-to-replace" as="xs:string*"/>
<xsl:param name="replacement" as="node()*"/>
<xsl:variable name="regex" select="string-join(for $w in
$words-to-replace return concat('(', ati:escape($w), ')'),'|')"/>
<xsl:analyze-string select="$input" regex="{$regex}">
<xsl:matching-substring> <xsl:variable name="i"
as="xs:integer" select="(1 to count($words-to-replace))[regex-group(.)]"/>
<xsl:sequence select="$replacement[$i]"/>
</xsl:matching-substring>
<xsl:non-matching-substring>
<xsl:value-of select="."/>
</xsl:non-matching-substring>
</xsl:analyze-string>
</xsl:function>
<xsl:function name="ati:escape">
<xsl:param name="s" as="xs:string"/>
<xsl:sequence
select="replace($s,'[\\\|\.\-\^\?\*\+\(\)\{\}\[\]\$]','\\$0')"/>
</xsl:function>
</xsl:stylesheet>
heres a short version of the publishers_data.xml:
<root>
<publisher>
<abbrev>Inschriften von Priene</abbrev>
<expanded>Inschriften von Priene</expanded> </publisher>
<publisher> <abbrev>P. Mil. Congr. XVIII</abbrev>
<expanded>Papiri documentari dell'UniversitC Cattolica di
Milano</expanded> </publisher> <publisher> <abbrev>P. Jud.
Des. Misc.</abbrev> <expanded>Discoveries in the Judean
Desert XXXVIII</expanded> </publisher>
<!-- more publishers here -->
</root>
heres a snippet of the source xml:
<!-- preceding::node() of ab -->
<ab lang="grk" n="1">
<foreign lang="grk">N N3N-N3N?N=N5 N:N1Oa=0 ON?a=:O NN1ON5a=7N?O</foreign>
<note place="margin">a c</note> <lb n="5"/> <foreign
lang="grk">OOa=9N=N?OO ON?a?& N<N5Oa=0 NN1N<N2a=;ON7N= N2N1ON9N;N5a=;ON1N=ON?O, a=ON5 N:N1a=6
NN9N?N=a=;ON9N?O a<&N= a= NN9N;a=5ON9N?O</foreign> <lb/>(III), <foreign
lang="grk">a<Oa=6 Oa?O N>B/N5B/ a=N;ON<ON9a=1N4N?O</foreign> (520/16)<foreign
lang="grk">N a<1OON?ON9N?N3Oa=1ON?O. a?>NOa=9N4N?ON?O N4a=2 a= a?>NN;N9-</foreign>
<note place="margin">v</note> <lb/> <foreign
lang="grk">N:N1ON=N1ON5a=:O a= ON-N;N7ON1N9 ON?a=;ON?O, N=N5a==ON5ON?O a=$N=. N:N1a=6 a<&N=
a<N:N?OOOa=4O N OO ON1N3a=9ON?O</foreign> <note id="n7" n="7" lang="ger">
<foreign lang="grk">a=$N=N N3N-N3N?N=N5 N3a=0O N<N5Oa>= N1a=Oa=9N=</foreign>
A</note> <lb/> <foreign lang="grk">a= a?>NN:N1ON1a?N?O. OOa?6ON?O N4a=2
a<1OON?Oa=7N1N= ON5N6a?6O a<N>a=5N=N5N3N:N5, OON3N3ON1Oa=4N= N4a=2 N&N5ON5N:a=;N4N7O</foreign>
<note id="n8b9" n="8b9" lang="ger"> <foreign
lang="grk">OOa?6ON?ObN=N?N8N5a=;N5ON1N9</foreign> wiederholt s. <foreign
lang="grk">a=6OON?Oa?ON1N9</foreign>, s. <foreign
lang="grk">OON3N3ON1ON5a?O</foreign>.</note>
<lb/>(I 3). <foreign lang="grk">Oa=0 N3a=0O a>=NN:N?OON9N;a=1N?O</foreign>
(<link type="boj" targets="a002" n="BOJTEXT002_T_7">2 T
7</link>) <foreign lang="grk">N=N?N8N5a=;N5ON1N9.</foreign> <note
id="n9" n="9" lang="ger"> <foreign
lang="grk">a>=NN:N?OON9N;a=1N?O</foreign> Vossius <foreign
lang="grk">a>=NN3N7ON9N;a=1N?O</foreign> Suid</note> </ab>
<!-- following::node() of ab -->
all: ab nodes appear in the same level (same depth) though out.
Any suggestions are welcome.
Thanks,
--
Jeff
|