How to read Excel XML (C#)

17,860

Solution 1

OK I've finally found solution from here: http://www.codeproject.com/Articles/32370/Import-Excel-File-to-DataSet#xx

Below sample code little adopted to my needs.

public static class XMLtoDataTable {
  private static ColumnType getDefaultType() {
    return new ColumnType(typeof(String));
}

        struct ColumnType {
            public Type type;
            private string name;
            public ColumnType(Type type) { this.type = type; this.name = type.ToString().ToLower(); }
            public object ParseString(string input) {
                if (String.IsNullOrEmpty(input))
                    return DBNull.Value;
                switch (type.ToString()) {
                    case "system.datetime":
                        return DateTime.Parse(input);
                    case "system.decimal":
                        return decimal.Parse(input);
                    case "system.boolean":
                        return bool.Parse(input);
                    default:
                        return input;
                }
            }
        }


    private static ColumnType getType(XmlNode data) {
    string type = null;
    if (data.Attributes["ss:Type"] == null || data.Attributes["ss:Type"].Value == null)
        type = "";
    else
        type = data.Attributes["ss:Type"].Value;

    switch (type) {
        case "DateTime":
            return new ColumnType(typeof(DateTime));
        case "Boolean":
            return new ColumnType(typeof(Boolean));
        case "Number":
            return new ColumnType(typeof(Decimal));
        case "":
            decimal test2;
            if (data == null || String.IsNullOrEmpty(data.InnerText) || decimal.TryParse(data.InnerText, out test2)) {
                return new ColumnType(typeof(Decimal));
            } else {
                return new ColumnType(typeof(String));
            }
        default://"String"
            return new ColumnType(typeof(String));
    }
}

    public static DataSet ImportExcelXML (string fileName, bool hasHeaders, bool autoDetectColumnType) {
        StreamReader sr = new StreamReader( fileName);
        Stream st = (Stream) sr.BaseStream;
        return ImportExcelXML( st, hasHeaders, autoDetectColumnType);
    }

    private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders, bool autoDetectColumnType) {
        XmlDocument doc = new XmlDocument();
        doc.Load(new XmlTextReader(inputFileStream));
        XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);

        nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
        nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
        nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");

        DataSet ds = new DataSet();

        foreach (XmlNode node in 
          doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr)) {
            DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
            ds.Tables.Add(dt);
            XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
            if (rows.Count > 0) {

                //*************************
                //Add Columns To Table from header row
                //*************************
                List<ColumnType> columns = new List<ColumnType>();
                int startIndex = 0;
                if (hasHeaders) {
                    foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr)) {
                        columns.Add(new ColumnType(typeof(string)));//default to text
                        dt.Columns.Add(data.InnerText, typeof(string));
                    }
                    startIndex++;
                }
                //*************************
                //Update Data-Types of columns if Auto-Detecting
                //*************************
                if (autoDetectColumnType && rows.Count > 0) {
                    XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
                    int actualCellIndex = 0;
                    for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
                        XmlNode cell = cells[cellIndex];
                        if (cell.Attributes["ss:Index"] != null)
                            actualCellIndex = 
                              int.Parse(cell.Attributes["ss:Index"].Value) - 1;

                        ColumnType autoDetectType = 
                          getType(cell.SelectSingleNode("ss:Data", nsmgr));

                        if (actualCellIndex >= dt.Columns.Count) {
                            dt.Columns.Add("Column" + 
                              actualCellIndex.ToString(), autoDetectType.type);
                            columns.Add(autoDetectType);
                        } else {
                            dt.Columns[actualCellIndex].DataType = autoDetectType.type;
                            columns[actualCellIndex] = autoDetectType;
                        }

                        actualCellIndex++;
                    }
                }
                //*************************
                //Load Data
                //*************************
                for (int i = startIndex; i < rows.Count; i++) {
                    DataRow row = dt.NewRow();
                    XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
                    int actualCellIndex = 0;
                    for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
                        XmlNode cell = cells[cellIndex];
                        if (cell.Attributes["ss:Index"] != null)
                            actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;

                        XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);

                        if (actualCellIndex >= dt.Columns.Count) {
                            for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++) {
                                dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string));columns.Add(getDefaultType());
                            } // ii
                            ColumnType autoDetectType = 
                               getType(cell.SelectSingleNode("ss:Data", nsmgr));
                            dt.Columns.Add("Column" + actualCellIndex.ToString(), 
                                           typeof(string));
                            columns.Add(autoDetectType);
                        }
                        if (data != null)
                            row[actualCellIndex] = data.InnerText;

                        actualCellIndex++;
                    }

                    dt.Rows.Add(row);
                }
            }
        }
        return ds;
    }


}

Solution 2

Even without coulmn type its working fine too. thanks !!!

have sample for without coulmn type:

    private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders)
{
    XmlDocument doc = new XmlDocument();
    doc.Load(new XmlTextReader(inputFileStream));
    XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);

    nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
    nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
    nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");

    DataSet ds = new DataSet();

    foreach (XmlNode node in
      doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr))
    {
        DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
        ds.Tables.Add(dt);
        XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
        if (rows.Count > 0)
        {

            //*************************
            //Add Columns To Table from header row
            //*************************
            List<ColumnType> columns = new List<ColumnType>();
            int startIndex = 0;
            if (hasHeaders)
            {
                foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr))
                {
                    columns.Add(new ColumnType(typeof(string)));//default to text
                    dt.Columns.Add(data.InnerText, typeof(string));
                }
                startIndex++;
            }
            //*************************
            //Update Data-Types of columns if Auto-Detecting
            //*************************
            if (rows.Count > 0)
            {
                XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
                int actualCellIndex = 0;
                for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
                {
                    XmlNode cell = cells[cellIndex];
                    if (cell.Attributes["ss:Index"] != null)
                        actualCellIndex =
                          int.Parse(cell.Attributes["ss:Index"].Value) - 1;

                    //ColumnType autoDetectType =
                    //  getType(cell.SelectSingleNode("ss:Data", nsmgr));

                    if (actualCellIndex >= dt.Columns.Count)
                    {
                        //dt.Columns.Add("Column" +
                        //  actualCellIndex.ToString(), autoDetectType.type);
                        dt.Columns.Add("Column" + actualCellIndex.ToString());
                        //columns.Add(autoDetectType);
                    }
                    else
                    {
                        //dt.Columns[actualCellIndex].DataType = autoDetectType.type;
                        //columns[actualCellIndex] = autoDetectType;
                    }

                    actualCellIndex++;
                }
            }
            //*************************
            //Load Data
            //*************************
            for (int i = startIndex; i < rows.Count; i++)
            {
                DataRow row = dt.NewRow();
                XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
                int actualCellIndex = 0;
                for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
                {
                    XmlNode cell = cells[cellIndex];
                    if (cell.Attributes["ss:Index"] != null)
                        actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;

                    XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);

                    if (actualCellIndex >= dt.Columns.Count)
                    {
                        for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++)
                        {
                            //dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string)); columns.Add(getDefaultType());
                            dt.Columns.Add("Column" + actualCellIndex.ToString());
                            //columns.Add(getDefaultType());
                        } // ii
                        //ColumnType autoDetectType = getType(cell.SelectSingleNode("ss:Data", nsmgr));
                        dt.Columns.Add("Column" + actualCellIndex.ToString(),
                                       typeof(string));
                        // columns.Add(autoDetectType);
                    }
                    if (data != null)
                        row[actualCellIndex] = data.InnerText;

                    actualCellIndex++;
                }

                dt.Rows.Add(row);
            }
        }
    }
    return ds;
}

Solution 3

Try the official API first (Microsoft Open XML SDK).

Solution 4

You can try with this code

    var textReader = new XmlTextReader("...\\YourFile.xml");
    // Read until end of file
    while (textReader.Read())
    {
        XmlNodeType nType = textReader.NodeType;
        // If node type us a declaration
        if (nType == XmlNodeType.XmlDeclaration)
        {
            Console.WriteLine("Declaration:" + textReader.Name.ToString());
        }
        // if node type is a comment
        if (nType == XmlNodeType.Comment)
        {
            Console.WriteLine("Comment:" + textReader.Name.ToString());
        }
        // if node type us an attribute
        if (nType == XmlNodeType.Attribute)
        {
            Console.WriteLine("Attribute:" + textReader.Name.ToString());
        }
        // if node type is an element
        if (nType == XmlNodeType.Element)
        {
            Console.WriteLine("Element:" + textReader.Name.ToString());
        }
        // if node type is an entity\
        if (nType == XmlNodeType.Entity)
        {
            Console.WriteLine("Entity:" + textReader.Name.ToString());
        }
        // if node type is a Process Instruction
        if (nType == XmlNodeType.Entity)
        {
            Console.WriteLine("Entity:" + textReader.Name.ToString());
        }
        // if node type a document
        if (nType == XmlNodeType.DocumentType)
        {
            Console.WriteLine("Document:" + textReader.Name.ToString());
        }
        // if node type is white space
        if (nType == XmlNodeType.Whitespace)
        {
            Console.WriteLine("WhiteSpace:" + textReader.Name.ToString());
        }
    }

Solution 5

public static class XMLtoDataTable
{
    private static ColumnType getDefaultType()
    {
        return new ColumnType(typeof(String));
    }

    struct ColumnType
    {
        public Type type;
        private string name;
        public ColumnType(Type type) { this.type = type; this.name = type.ToString().ToLower(); }
        public object ParseString(string input)
        {
            if (String.IsNullOrEmpty(input))
                return DBNull.Value;
            switch (type.ToString())
            {
                case "system.datetime":
                    return DateTime.Parse(input);
                case "system.decimal":
                    return decimal.Parse(input);
                case "system.boolean":
                    return bool.Parse(input);
                default:
                    return input;
            }
        }
    }


    private static ColumnType getType(XmlNode data)
    {
        string type = null;
        if (data.Attributes["ss:Type"] == null || data.Attributes["ss:Type"].Value == null)
            type = "";
        else
            type = data.Attributes["ss:Type"].Value;

        switch (type)
        {
            case "DateTime":
                return new ColumnType(typeof(DateTime));
            case "Boolean":
                return new ColumnType(typeof(Boolean));
            case "Number":
                return new ColumnType(typeof(Decimal));
            case "":
                decimal test2;
                if (data == null || String.IsNullOrEmpty(data.InnerText) || decimal.TryParse(data.InnerText, out test2))
                {
                    return new ColumnType(typeof(Decimal));
                }
                else
                {
                    return new ColumnType(typeof(String));
                }
            default://"String"
                return new ColumnType(typeof(String));
        }
    }

    public static DataSet ImportExcelXML(string fileName, bool hasHeaders, bool autoDetectColumnType)
    {
        StreamReader sr = new StreamReader(fileName);
        Stream st = (Stream)sr.BaseStream;
        return ImportExcelXML(st, hasHeaders, autoDetectColumnType);
    }

    private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders, bool autoDetectColumnType)
    {
        XmlDocument doc = new XmlDocument();
        doc.Load(new XmlTextReader(inputFileStream));
        XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);

        nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
        nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
        nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");

        DataSet ds = new DataSet();

        foreach (XmlNode node in
          doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr))
        {
            DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
            ds.Tables.Add(dt);
            XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
            if (rows.Count > 0)
            {

                //*************************
                //Add Columns To Table from header row
                //*************************
                List<ColumnType> columns = new List<ColumnType>();
                int startIndex = 0;
                if (hasHeaders)
                {
                    foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr))
                    {
                        columns.Add(new ColumnType(typeof(string)));//default to text
                        dt.Columns.Add(data.InnerText, typeof(string));
                    }
                    startIndex++;
                }
                //*************************
                //Update Data-Types of columns if Auto-Detecting
                //*************************
                if (autoDetectColumnType && rows.Count > 0)
                {
                    XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
                    int actualCellIndex = 0;
                    for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
                    {
                        XmlNode cell = cells[cellIndex];
                        if (cell.Attributes["ss:Index"] != null)
                            actualCellIndex =
                              int.Parse(cell.Attributes["ss:Index"].Value) - 1;

                        ColumnType autoDetectType =
                          getType(cell.SelectSingleNode("ss:Data", nsmgr));

                        if (actualCellIndex >= dt.Columns.Count)
                        {
                            dt.Columns.Add("Column" +
                              actualCellIndex.ToString(), autoDetectType.type);
                            columns.Add(autoDetectType);
                        }
                        else
                        {
                            dt.Columns[actualCellIndex].DataType = autoDetectType.type;
                            columns[actualCellIndex] = autoDetectType;
                        }

                        actualCellIndex++;
                    }
                }
                //*************************
                //Load Data
                //*************************
                for (int i = startIndex; i < rows.Count; i++)
                {
                    DataRow row = dt.NewRow();
                    XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
                    int actualCellIndex = 0;
                    for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
                    {
                        XmlNode cell = cells[cellIndex];
                        if (cell.Attributes["ss:Index"] != null)
                            actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;

                        XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);

                        if (actualCellIndex >= dt.Columns.Count)
                        {
                            for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++)
                            {
                                dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string)); columns.Add(getDefaultType());
                            } // ii
                            ColumnType autoDetectType =
                               getType(cell.SelectSingleNode("ss:Data", nsmgr));
                            dt.Columns.Add("Column" + actualCellIndex.ToString(),
                                           typeof(string));
                            columns.Add(autoDetectType);
                        }
                        if (data != null)
                            row[actualCellIndex] = data.InnerText;

                        actualCellIndex++;
                    }

                    dt.Rows.Add(row);
                }
            }
        }
        return ds;
    }


}
Share:
17,860
Maciej
Author by

Maciej

C# / MS SQL 2005 developer

Updated on July 18, 2022

Comments

  • Maciej
    Maciej almost 2 years

    I wonder if there is an easy way to read Excel 2010 XML?
    This XML has a different structure than I used to read.

    Especially ss:index attribute (ss:Index**="7") makes things a bit more complicated

    EDIT: To explain better:

    • I have file with XML extension which can be easily opened in Excel
    • I'm looking a way to read this sheet programically (eg copy to DataTable)
    • Noticed this is not common XML I used to work
    • XML defines fields on the begining, than use ROW, CELL and DATA tags
    • What surprised me is when there are eg 3 fields (cells) but 2nd field doesn't have value this field is 'skipped' in XML, but 3rd field has some additional 'index' attribute eg: ss:Index**="3" (which indicates that even it is on 2nd position right index should be "3"

    fragment of example XML

          <Row ss:AutoFitHeight="0">
            <Cell><Data ss:Type="String">Johny</Data></Cell>
            <Cell ss:Index="3"><Data ss:Type="String">NY</Data></Cell>
    
  • Maciej
    Maciej over 11 years
    Isnt for reading .XLSX? I'm looking way to read files with XML extension. Tried this: msdn.microsoft.com/en-us/library/hh298534.aspx but said "File contains corrupted data."
  • Christian Hayter
    Christian Hayter over 11 years
    Yes it is for reading OOXML containers. However, you have to do extra work to extract the raw XML and process it, so why not avoid all that and just process the container directly?
  • Maciej
    Maciej over 11 years
    How to get this: "ss:Index" atribute value ?
  • Aghilas Yakoub
    Aghilas Yakoub over 11 years
    with Value property on this block if (nType == XmlNodeType.Attribute) {var result = textReader.Value}
  • Maciej
    Maciej over 11 years
    Not really... When I did that: while (textReader.Read()) Console.WriteLine( textReader.NodeType +" -> "+textReader.Value ); Any attribute was listed
  • Umo
    Umo almost 9 years
    Just wanted to say thanks! This works flawless with xml generated by CarlosAq XML Excel Writer Library after i changed the doubles (ss:) to singles (s:). Saved me hours of work! Thanks for sharing!
  • David Rogers
    David Rogers about 7 years
    This is for all intents and purposes an excellent solution, however be careful of duplicate column names, this code does not account for them and needs to be modified in case they are encountered.
  • Stephen Turner
    Stephen Turner over 4 years
    This API doesn't work for files saved from Excel as "XML Spreadsheet 2003 (.xml)", it's for "Strict Open XML Spreadsheet (.xlsx)" files.