[XSL-LIST Mailing List Archive Home] [By Thread] [By Date]

[xsl] Recognising Unicode in a CSV to XML transform


Subject: [xsl] Recognising Unicode in a CSV to XML transform
From: Marney Cotterill <marney@xxxxxxxxxxxxxxxxxxxx>
Date: Mon, 26 May 2008 12:57:40 +1000

Hi,

I have been trying to solve this problem and have hit a number of brick
walls. I'm using the following stylesheet to transform a CSV file to XML
courtesy of Andrew Welch.

My problem is when I feed the transform a CSV with any characters that sit
outside the Basic Latin set, I get the "Cannot locate :" messege found in
the <xsl:otherwise> statement.

Is it possible to somehow integrate the following into the regex of the
initial analyze string so these characters do not break the transform?

\p{InLatin-1_Supplement} (U+0080..U+00FF)
\p{InGeneral_Punctuation} (U+2000..U+206F)

I am really clutching at straws here!

Thanks
Marney

<?xml version="1.0"?>
<!--
    A CSV to XML transform
    Version 2
    Andrew Welch
    http://andrewjwelch.com
    
    Modify or supply the $pathToCSV parameter and run the transform
    using "main" as the initial template.
    
    For bug reports or modification requests contact me at
andrew.j.welch@xxxxxxxxx
-->

<xsl:stylesheet version="2.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
  xmlns:msxsl="urn:schemas-microsoft-com:xslt"
    xmlns:fn="fn"
    exclude-result-prefixes="xs fn msxsl">

  <xsl:output indent="yes" encoding="UTF-8"/>

  <xsl:param name="pathToCSV" select="'file:///c:/Documents and
Settings/webcopy.csv'"/>

  <xsl:function name="fn:getTokens" as="xs:string+">
    <xsl:param name="str" as="xs:string"/>
    <xsl:analyze-string select="concat($str, ',')"
regex='(("[^"]*")+|[^,]*),'>
      <xsl:matching-substring>
        <xsl:sequence select='replace(regex-group(1), "^""|""$|("")""",
"$1")'/>
      </xsl:matching-substring>
    </xsl:analyze-string>
  </xsl:function>

  <xsl:template match="/" name="main">
      <xsl:choose>
        <xsl:when test="unparsed-text-available($pathToCSV)">
          <xsl:variable name="csv" select="unparsed-text($pathToCSV)"/>
          <xsl:variable name="lines" select="tokenize($csv, '&#xD;&#xA;')"
as="xs:string+"/>
          <xsl:variable name="elemNames" select="fn:getTokens($lines[1])"
as="xs:string+"/>
          <root>
            <xsl:for-each select="$lines[position() > 1]">
              <row>
                <xsl:variable name="lineItems" select="fn:getTokens(.)"
as="xs:string+"/>
                <xsl:for-each select="$elemNames">
                  <xsl:variable name="pos" select="position()"/>
                  <xsl:element name="{.}">
                    <xsl:value-of select="$lineItems[$pos]"/>
                  </xsl:element>
                </xsl:for-each>
              </row>
            </xsl:for-each>
          </root>
        </xsl:when>
        <xsl:otherwise>
          <xsl:text>Cannot locate : </xsl:text>
          <xsl:value-of select="$pathToCSV"/>
        </xsl:otherwise>
      </xsl:choose>
  </xsl:template>

</xsl:stylesheet>
Marney Cotterill
graphic designer
                   
cracker//brandware

6 Bourke Street
Queens Park 
NSW 2022
Telephone 02 9387 2001
Facsimile 02 9387 2006
marney@xxxxxxxxxxxxxxxxxxxx
www.crackerbrandware.com


Current Thread
Keywords
xml