How to read Excel XML (C#)
17,860
Solution 1
OK I've finally found solution from here: http://www.codeproject.com/Articles/32370/Import-Excel-File-to-DataSet#xx
Below sample code little adopted to my needs.
public static class XMLtoDataTable {
private static ColumnType getDefaultType() {
return new ColumnType(typeof(String));
}
struct ColumnType {
public Type type;
private string name;
public ColumnType(Type type) { this.type = type; this.name = type.ToString().ToLower(); }
public object ParseString(string input) {
if (String.IsNullOrEmpty(input))
return DBNull.Value;
switch (type.ToString()) {
case "system.datetime":
return DateTime.Parse(input);
case "system.decimal":
return decimal.Parse(input);
case "system.boolean":
return bool.Parse(input);
default:
return input;
}
}
}
private static ColumnType getType(XmlNode data) {
string type = null;
if (data.Attributes["ss:Type"] == null || data.Attributes["ss:Type"].Value == null)
type = "";
else
type = data.Attributes["ss:Type"].Value;
switch (type) {
case "DateTime":
return new ColumnType(typeof(DateTime));
case "Boolean":
return new ColumnType(typeof(Boolean));
case "Number":
return new ColumnType(typeof(Decimal));
case "":
decimal test2;
if (data == null || String.IsNullOrEmpty(data.InnerText) || decimal.TryParse(data.InnerText, out test2)) {
return new ColumnType(typeof(Decimal));
} else {
return new ColumnType(typeof(String));
}
default://"String"
return new ColumnType(typeof(String));
}
}
public static DataSet ImportExcelXML (string fileName, bool hasHeaders, bool autoDetectColumnType) {
StreamReader sr = new StreamReader( fileName);
Stream st = (Stream) sr.BaseStream;
return ImportExcelXML( st, hasHeaders, autoDetectColumnType);
}
private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders, bool autoDetectColumnType) {
XmlDocument doc = new XmlDocument();
doc.Load(new XmlTextReader(inputFileStream));
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");
DataSet ds = new DataSet();
foreach (XmlNode node in
doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr)) {
DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
ds.Tables.Add(dt);
XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
if (rows.Count > 0) {
//*************************
//Add Columns To Table from header row
//*************************
List<ColumnType> columns = new List<ColumnType>();
int startIndex = 0;
if (hasHeaders) {
foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr)) {
columns.Add(new ColumnType(typeof(string)));//default to text
dt.Columns.Add(data.InnerText, typeof(string));
}
startIndex++;
}
//*************************
//Update Data-Types of columns if Auto-Detecting
//*************************
if (autoDetectColumnType && rows.Count > 0) {
XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex =
int.Parse(cell.Attributes["ss:Index"].Value) - 1;
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
if (actualCellIndex >= dt.Columns.Count) {
dt.Columns.Add("Column" +
actualCellIndex.ToString(), autoDetectType.type);
columns.Add(autoDetectType);
} else {
dt.Columns[actualCellIndex].DataType = autoDetectType.type;
columns[actualCellIndex] = autoDetectType;
}
actualCellIndex++;
}
}
//*************************
//Load Data
//*************************
for (int i = startIndex; i < rows.Count; i++) {
DataRow row = dt.NewRow();
XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;
XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);
if (actualCellIndex >= dt.Columns.Count) {
for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++) {
dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string));columns.Add(getDefaultType());
} // ii
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
dt.Columns.Add("Column" + actualCellIndex.ToString(),
typeof(string));
columns.Add(autoDetectType);
}
if (data != null)
row[actualCellIndex] = data.InnerText;
actualCellIndex++;
}
dt.Rows.Add(row);
}
}
}
return ds;
}
}
Solution 2
Even without coulmn type its working fine too. thanks !!!
have sample for without coulmn type:
private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders)
{
XmlDocument doc = new XmlDocument();
doc.Load(new XmlTextReader(inputFileStream));
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");
DataSet ds = new DataSet();
foreach (XmlNode node in
doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr))
{
DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
ds.Tables.Add(dt);
XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
if (rows.Count > 0)
{
//*************************
//Add Columns To Table from header row
//*************************
List<ColumnType> columns = new List<ColumnType>();
int startIndex = 0;
if (hasHeaders)
{
foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr))
{
columns.Add(new ColumnType(typeof(string)));//default to text
dt.Columns.Add(data.InnerText, typeof(string));
}
startIndex++;
}
//*************************
//Update Data-Types of columns if Auto-Detecting
//*************************
if (rows.Count > 0)
{
XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex =
int.Parse(cell.Attributes["ss:Index"].Value) - 1;
//ColumnType autoDetectType =
// getType(cell.SelectSingleNode("ss:Data", nsmgr));
if (actualCellIndex >= dt.Columns.Count)
{
//dt.Columns.Add("Column" +
// actualCellIndex.ToString(), autoDetectType.type);
dt.Columns.Add("Column" + actualCellIndex.ToString());
//columns.Add(autoDetectType);
}
else
{
//dt.Columns[actualCellIndex].DataType = autoDetectType.type;
//columns[actualCellIndex] = autoDetectType;
}
actualCellIndex++;
}
}
//*************************
//Load Data
//*************************
for (int i = startIndex; i < rows.Count; i++)
{
DataRow row = dt.NewRow();
XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;
XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);
if (actualCellIndex >= dt.Columns.Count)
{
for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++)
{
//dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string)); columns.Add(getDefaultType());
dt.Columns.Add("Column" + actualCellIndex.ToString());
//columns.Add(getDefaultType());
} // ii
//ColumnType autoDetectType = getType(cell.SelectSingleNode("ss:Data", nsmgr));
dt.Columns.Add("Column" + actualCellIndex.ToString(),
typeof(string));
// columns.Add(autoDetectType);
}
if (data != null)
row[actualCellIndex] = data.InnerText;
actualCellIndex++;
}
dt.Rows.Add(row);
}
}
}
return ds;
}
Solution 3
Try the official API first (Microsoft Open XML SDK).
Solution 4
You can try with this code
var textReader = new XmlTextReader("...\\YourFile.xml");
// Read until end of file
while (textReader.Read())
{
XmlNodeType nType = textReader.NodeType;
// If node type us a declaration
if (nType == XmlNodeType.XmlDeclaration)
{
Console.WriteLine("Declaration:" + textReader.Name.ToString());
}
// if node type is a comment
if (nType == XmlNodeType.Comment)
{
Console.WriteLine("Comment:" + textReader.Name.ToString());
}
// if node type us an attribute
if (nType == XmlNodeType.Attribute)
{
Console.WriteLine("Attribute:" + textReader.Name.ToString());
}
// if node type is an element
if (nType == XmlNodeType.Element)
{
Console.WriteLine("Element:" + textReader.Name.ToString());
}
// if node type is an entity\
if (nType == XmlNodeType.Entity)
{
Console.WriteLine("Entity:" + textReader.Name.ToString());
}
// if node type is a Process Instruction
if (nType == XmlNodeType.Entity)
{
Console.WriteLine("Entity:" + textReader.Name.ToString());
}
// if node type a document
if (nType == XmlNodeType.DocumentType)
{
Console.WriteLine("Document:" + textReader.Name.ToString());
}
// if node type is white space
if (nType == XmlNodeType.Whitespace)
{
Console.WriteLine("WhiteSpace:" + textReader.Name.ToString());
}
}
Solution 5
public static class XMLtoDataTable
{
private static ColumnType getDefaultType()
{
return new ColumnType(typeof(String));
}
struct ColumnType
{
public Type type;
private string name;
public ColumnType(Type type) { this.type = type; this.name = type.ToString().ToLower(); }
public object ParseString(string input)
{
if (String.IsNullOrEmpty(input))
return DBNull.Value;
switch (type.ToString())
{
case "system.datetime":
return DateTime.Parse(input);
case "system.decimal":
return decimal.Parse(input);
case "system.boolean":
return bool.Parse(input);
default:
return input;
}
}
}
private static ColumnType getType(XmlNode data)
{
string type = null;
if (data.Attributes["ss:Type"] == null || data.Attributes["ss:Type"].Value == null)
type = "";
else
type = data.Attributes["ss:Type"].Value;
switch (type)
{
case "DateTime":
return new ColumnType(typeof(DateTime));
case "Boolean":
return new ColumnType(typeof(Boolean));
case "Number":
return new ColumnType(typeof(Decimal));
case "":
decimal test2;
if (data == null || String.IsNullOrEmpty(data.InnerText) || decimal.TryParse(data.InnerText, out test2))
{
return new ColumnType(typeof(Decimal));
}
else
{
return new ColumnType(typeof(String));
}
default://"String"
return new ColumnType(typeof(String));
}
}
public static DataSet ImportExcelXML(string fileName, bool hasHeaders, bool autoDetectColumnType)
{
StreamReader sr = new StreamReader(fileName);
Stream st = (Stream)sr.BaseStream;
return ImportExcelXML(st, hasHeaders, autoDetectColumnType);
}
private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders, bool autoDetectColumnType)
{
XmlDocument doc = new XmlDocument();
doc.Load(new XmlTextReader(inputFileStream));
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");
DataSet ds = new DataSet();
foreach (XmlNode node in
doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr))
{
DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
ds.Tables.Add(dt);
XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
if (rows.Count > 0)
{
//*************************
//Add Columns To Table from header row
//*************************
List<ColumnType> columns = new List<ColumnType>();
int startIndex = 0;
if (hasHeaders)
{
foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr))
{
columns.Add(new ColumnType(typeof(string)));//default to text
dt.Columns.Add(data.InnerText, typeof(string));
}
startIndex++;
}
//*************************
//Update Data-Types of columns if Auto-Detecting
//*************************
if (autoDetectColumnType && rows.Count > 0)
{
XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex =
int.Parse(cell.Attributes["ss:Index"].Value) - 1;
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
if (actualCellIndex >= dt.Columns.Count)
{
dt.Columns.Add("Column" +
actualCellIndex.ToString(), autoDetectType.type);
columns.Add(autoDetectType);
}
else
{
dt.Columns[actualCellIndex].DataType = autoDetectType.type;
columns[actualCellIndex] = autoDetectType;
}
actualCellIndex++;
}
}
//*************************
//Load Data
//*************************
for (int i = startIndex; i < rows.Count; i++)
{
DataRow row = dt.NewRow();
XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;
XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);
if (actualCellIndex >= dt.Columns.Count)
{
for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++)
{
dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string)); columns.Add(getDefaultType());
} // ii
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
dt.Columns.Add("Column" + actualCellIndex.ToString(),
typeof(string));
columns.Add(autoDetectType);
}
if (data != null)
row[actualCellIndex] = data.InnerText;
actualCellIndex++;
}
dt.Rows.Add(row);
}
}
}
return ds;
}
}
Comments
-
Maciej almost 2 years
I wonder if there is an easy way to read Excel 2010 XML?
This XML has a different structure than I used to read.Especially ss:index attribute (ss:Index**="7") makes things a bit more complicated
EDIT: To explain better:
- I have file with XML extension which can be easily opened in Excel
- I'm looking a way to read this sheet programically (eg copy to DataTable)
- Noticed this is not common XML I used to work
- XML defines fields on the begining, than use ROW, CELL and DATA tags
- What surprised me is when there are eg 3 fields (cells) but 2nd field doesn't have value this field is 'skipped' in XML, but 3rd field has some additional 'index' attribute eg: ss:Index**="3" (which indicates that even it is on 2nd position right index should be "3"
fragment of example XML
<Row ss:AutoFitHeight="0"> <Cell><Data ss:Type="String">Johny</Data></Cell> <Cell ss:Index="3"><Data ss:Type="String">NY</Data></Cell>
-
Maciej over 11 yearsIsnt for reading .XLSX? I'm looking way to read files with XML extension. Tried this: msdn.microsoft.com/en-us/library/hh298534.aspx but said "File contains corrupted data."
-
Christian Hayter over 11 yearsYes it is for reading OOXML containers. However, you have to do extra work to extract the raw XML and process it, so why not avoid all that and just process the container directly?
-
Maciej over 11 yearsHow to get this: "ss:Index" atribute value ?
-
Aghilas Yakoub over 11 yearswith Value property on this block if (nType == XmlNodeType.Attribute) {var result = textReader.Value}
-
Maciej over 11 yearsNot really... When I did that: while (textReader.Read()) Console.WriteLine( textReader.NodeType +" -> "+textReader.Value ); Any attribute was listed
-
Umo almost 9 yearsJust wanted to say thanks! This works flawless with xml generated by CarlosAq XML Excel Writer Library after i changed the doubles (ss:) to singles (s:). Saved me hours of work! Thanks for sharing!
-
David Rogers about 7 yearsThis is for all intents and purposes an excellent solution, however be careful of duplicate column names, this code does not account for them and needs to be modified in case they are encountered.
-
Stephen Turner over 4 yearsThis API doesn't work for files saved from Excel as "XML Spreadsheet 2003 (.xml)", it's for "Strict Open XML Spreadsheet (.xlsx)" files.