How to use a dynamic CSV delimiter with FileHelpers?
Solution 1
I just realized there is a DelimitedFileEngine
which solves your problem another way.
You can just go
var engine = new DelimitedFileEngine(typeof(BaseCustomer));
engine.Options.Delimiter = ",";
It seems that BaseCustomer
needs to be decorated with a [DelimitedRecord]
attribute, otherwise an exception is raised but the delimiter is overridden by whatever is supplied to engine.Options.Delimiter
.
The following example imports a comma delimited record using a format which is marked as bar delimited.
[DelimitedRecord("|")]
public class Format1
{
public string Field1;
public string Field2;
public string Field3;
public string Field4;
}
static void Main(string[] args)
{
var engine = new DelimitedFileEngine(typeof(Format1));
// change the delimiter
engine.Options.Delimiter = ",";
// import a comma separated record
object[] importedObjects = engine.ReadString(@"a,b,c,d");
foreach (object importedObject in importedObjects)
{
if (importedObject is Format1)
{
Format1 format1 = (Format1)importedObject;
// process it (for example, check the values)
Assert.AreEqual("a", format1.Field1);
Assert.AreEqual("b", format1.Field2);
Assert.AreEqual("c", format1.Field3);
Assert.AreEqual("d", format1.Field4);
}
}
}
Solution 2
No thats not possible.
But you can use the FileHelper DelimitedClassBuilder
to build a dynamic file parser where you can set the delimiter at runtime:
DelimitedClassBuilder dcb = new DelimitedClassBuilder("Name",
"Here goes your col separator");
// You have to build your field definitions by hand now
dcb.AddField("FieldName", typeof(decimal));
...
// build the engine
DelimitedFileEngine fileEngine = new DelimitedFileEngine(dcb.CreateRecordClass());
// read the file
dynamic[] data = fileEngine.ReadFile(filePath);
Solution 3
You can use runtime classes. You have two choices. Either compile your class from a string
For instance
// The class definition
public string mClass =
@"
[DelimitedRecord(""" + delimiter + @""")]
public class BaseCustomer
{
public int CustId;
public string Name;
public decimal Balance;
[FileHelpers.FieldConverter(FileHelpers.ConverterKind.Date, ""ddMMyyyy"")]
public DateTime AddedDate;
}
";
Type t = ClassBuilder.ClassFromString(mClass);
FileHelperEngine engine = new FileHelperEngine(t);
DataTable = engine.ReadFileAsDT("test.txt");
Or alternatively, you can use the DelimitedClassBuilder class.
DelimitedClassBuilder cb = new DelimitedClassBuilder("BaseCustomer", delimiter);
cb.AddField("CustId", typeof(int));
cb.LastField.TrimMode = TrimMode.Both;
cb.LastField.FieldNullValue = 0;
cb.AddField("Balance", typeof(Decimal));
cb.AddField("AddedDate", typeof(DateTime));
engine = new FileHelperEngine(cb.CreateRecordClass());
DataTable dt = engine.ReadFileAsDT("test.txt");
Solution 4
It is possible. But only by moving the serialization type into a separate assembly.
Like this:
using System;
using System.Collections.Generic;
using System.Windows.Forms;
namespace FlaechenupdateScript
{
static class Program
{
// http://www.codeproject.com/KB/cs/runtimecompiling.aspx
private static System.Reflection.Assembly BuildAssembly(string code)
{
Microsoft.CSharp.CSharpCodeProvider provider =
new Microsoft.CSharp.CSharpCodeProvider();
System.CodeDom.Compiler.ICodeCompiler compiler = provider.CreateCompiler();
System.CodeDom.Compiler.CompilerParameters compilerparams = new System.CodeDom.Compiler.CompilerParameters();
string strLocation = System.Reflection.Assembly.GetExecutingAssembly().Location;
string strBasePath = System.IO.Path.GetDirectoryName(strLocation);
string strSerializationTypes = System.IO.Path.Combine(strBasePath, "SerializationTypes.dll");
string strFileHelpersLocation = System.IO.Path.Combine(strBasePath, "FileHelpers.dll");
compilerparams.ReferencedAssemblies.Add(strSerializationTypes);
compilerparams.ReferencedAssemblies.Add(strFileHelpersLocation);
compilerparams.GenerateExecutable = false;
compilerparams.GenerateInMemory = true;
System.CodeDom.Compiler.CompilerResults results =
compiler.CompileAssemblyFromSource(compilerparams, code);
if (results.Errors.HasErrors)
{
System.Text.StringBuilder errors = new System.Text.StringBuilder("Compiler Errors :\r\n");
foreach (System.CodeDom.Compiler.CompilerError error in results.Errors)
{
errors.AppendFormat("Line {0},{1}\t: {2}\n",
error.Line, error.Column, error.ErrorText);
}
throw new Exception(errors.ToString());
}
else
{
return results.CompiledAssembly;
}
} // End Function BuildAssembly
public static Type GetClassType(Type tt, string strDelimiter)
{
string strFullTypeName = tt.FullName;
string strTypeUniqueName = System.Guid.NewGuid().ToString() + System.Guid.NewGuid().ToString() + System.Guid.NewGuid().ToString() + System.Guid.NewGuid().ToString();
strTypeUniqueName = "_" + strTypeUniqueName.Replace("-", "_");
string xx = @"
namespace CrapLord
{
[FileHelpers.IgnoreFirst]
[FileHelpers.IgnoreEmptyLines]
[FileHelpers.DelimitedRecord(""" + strDelimiter + @""")]
public class " + strTypeUniqueName + @" : " + strFullTypeName + @"
{
}
}
";
System.Reflection.Assembly a = BuildAssembly(xx);
var o = a.CreateInstance("CrapLord." + strTypeUniqueName);
Type t = o.GetType();
//System.Reflection.MethodInfo mi = t.GetMethod("EvalCode");
//var s = mi.Invoke(o, null);
return t;
}
/// <summary>
/// Der Haupteinstiegspunkt für die Anwendung.
/// </summary>
[STAThread]
static void Main()
{
//Application.EnableVisualStyles();
//Application.SetCompatibleTextRenderingDefault(false);
//Application.Run(new Form1());
Type t = GetClassType(typeof(Tools.Serialization.CSV.Customer), ",");
//FileHelpers.FileHelperEngine engine = new FileHelpers.FileHelperEngine(typeof(SemicolonCustomer));
FileHelpers.FileHelperEngine engine = new FileHelpers.FileHelperEngine(t);
string str = "path/to/datafile";
Tools.Serialization.CSV.Customer[] custs = (Tools.Serialization.CSV.Customer[])engine.ReadFile(str);
//Customer[] custs = (Customer[]) engine.ReadFile("yourfile.txt");
foreach (Tools.Serialization.CSV.Customer cli in custs)
{
Console.WriteLine();
Console.WriteLine("Customer: " + cli.CustId.ToString() + " - " + cli.Name);
Console.WriteLine("Added Date: " + cli.AddedDate.ToString("d-M-yyyy"));
Console.WriteLine("Balance: " + cli.Balance.ToString());
Console.WriteLine();
Console.WriteLine("-----------------------------");
} // Next cli
Console.WriteLine(Environment.NewLine);
Console.WriteLine(" --- Press any key to continue --- ");
Console.ReadKey();
}
}
}
SerializationTypes Assembly:
using System;
using System.Collections.Generic;
using System.Text;
namespace Tools.Serialization.CSV
{
//------------------------
// RECORD CLASS (Example, change at your will)
// TIP: Remember to use the wizard to generate this class
public class Customer
{
public int CustId;
public string Name;
public decimal Balance;
[FileHelpers.FieldConverter(FileHelpers.ConverterKind.Date, "ddMMyyyy")]
public DateTime AddedDate;
}
}
Solution 5
Maybe you want to use the TextFieldParser from Microsoft.VisualBasic.FileIO Namespace:
string[] fields;
string[] delimiter = new string[] { "|" };
using (Microsoft.VisualBasic.FileIO.TextFieldParser parser =
new Microsoft.VisualBasic.FileIO.TextFieldParser(filename))
{
parser.Delimiters = delimiter;
parser.HasFieldsEnclosedInQuotes = false;
while (!parser.EndOfData)
{
fields = parser.ReadFields();
//Do what you need
}
}
Stefan Steiger
I'm an avid HTTP-header-reader, github-user and a few more minor things like BusinessIntelligence & Web Software Developer Technologies I work with: Microsoft Reporting- & Analysis Service (2005-2016), ASP.NET, ASP.NET MVC, .NET Core, ADO.NET, JSON, XML, SOAP, Thrift ActiveDirectory, OAuth, MS Federated Login XHTML5, JavaScript (jQuery must die), ReverseAJAX/WebSockets, WebGL, CSS3 C#, .NET/mono, plain old C, and occasional C++ or Java and a little Bash-Scripts, Python and PHP5 I have a rather broad experience with the following relational SQL databases T-SQL PL/PGsql including CLR / extended stored procedures/functions Occasionally, I also work with MySQL/MariaDB Firebird/Interbase Oracle 10g+ SqLite Access I develop Enterprise Web-Applications (.NET 2.0 & 4.5) and interface to systems like LDAP/AD (ActiveDirectory) WebServices (including WCF, SOAP and Thrift) MS Federated Login OAuth DropBox XML & JSON data-stores DWG/SVG imaging for architecture In my spare-time, I'm a Linux-Server-Enthusiast (I have my own Web & DNS server) and reverse-engineer with interest in IDS Systems (IntrusionDetection), WireShark, IDA Pro Advanced, GDB, libPCAP. - Studied Theoretical Physics at the Swiss Federal Institute of Technology (ETHZ).
Updated on June 28, 2022Comments
-
Stefan Steiger almost 2 years
Question: I need to read a CSV file. I use the FileHelpers library to achieve this.
The problem is I need a dynamic delimiter (user defined), meaning anything can be delimiter (Comma, semicolon, tab, newline, but also anything else).
The problem is, FileHelpers defines the delimiter in an attribute, which means at compile-time. This makes it impossible to do it dynamically.
What I can do is declare a new class, which inherits from one base class, and set the delimiter on this new class.
[FileHelpers.DelimitedRecord(",")] public class CommaCustomer : BaseCustomer { }
That way I only have to make changes in the base class for every new delimiter. The problem is, this is I can't (and don't want to) create a child class for every possible delimiter.
This is the code I have so far:
using System; using System.Data; using System.IO; //using FileHelpers; //using FileHelpers.RunTime; namespace Examples { class MainClass { [STAThread] static void Main() { FileHelpers.FileHelperEngine engine = new FileHelpers.FileHelperEngine(typeof(SemicolonCustomer)); // To read use: string str = @"D:\Username\Desktop\FileHelpers_Examples_CSharp_VbNet\Data\SemicolonCustomers.txt"; //str = @"D:\Username\Desktop\FileHelpers_Examples_CSharp_VbNet\Data\CustomersDelimited.txt"; SemicolonCustomer[] custs = (SemicolonCustomer[])engine.ReadFile(str); //Customer[] custs = (Customer[]) engine.ReadFile("yourfile.txt"); foreach (SemicolonCustomer cli in custs) { Console.WriteLine(); Console.WriteLine("Customer: " + cli.CustId.ToString() + " - " + cli.Name); Console.WriteLine("Added Date: " + cli.AddedDate.ToString("d-M-yyyy")); Console.WriteLine("Balance: " + cli.Balance.ToString()); Console.WriteLine(); Console.WriteLine("-----------------------------"); } // Next cli Console.ReadKey(); Console.WriteLine("Writing data to a delimited file..."); Console.WriteLine(); // To write use: //engine.WriteFile("myyourfile.txt", custs); //If you are using .NET 2.0 or greater is //better if you use the Generics version: // FileHelperEngine engine = new FileHelperEngine<Customer>(); // To read use (no casts =) // Customer[] custs = engine.ReadFile("yourfile.txt"); // To write use: // engine.WriteFile("yourfile.txt", custs); } // End Sub Main } // End Class MainClass //------------------------ // RECORD CLASS (Example, change at your will) // TIP: Remember to use the wizard to generate this class public class BaseCustomer { public int CustId; public string Name; public decimal Balance; [FileHelpers.FieldConverter(FileHelpers.ConverterKind.Date, "ddMMyyyy")] public DateTime AddedDate; } [FileHelpers.DelimitedRecord(";")] public class SemicolonCustomer : BaseCustomer { } [FileHelpers.DelimitedRecord(",")] public class CommaCustomer : BaseCustomer { } }
Is it somehow possible at runtime to compile a child class
[FileHelpers.DelimitedRecord(\"" + delimiter + "\")] public class AnyDelimiterCustomer : BaseCustomer { }
And then reference this runtime compiled class in code ?
-
Stefan Steiger over 12 yearsJust got further. Now it has problems with thousand separators. And it doesn't work well with properties.
-
shamp00 over 12 yearsYou seem to be making it way too complicated. Just pass your
string xx
toClassBuilder.ClassFromString(xx)
. -
Stefan Steiger over 12 years@shamp00: No I don't. Putting a return FileHelpers.RunTime.ClassBuilder.ClassFromString(xx); in my GetClassType function will throw a "Namespace not found exception". FYI, I do it like this for that I don't have to define the class as string. I want to define it as an actual class, which is much faster...
-
shamp00 over 12 yearsI'm not sure I understand you. The speed improvement would only be with respect to not having to convert the string into a class (once per class). It would have no effect on the performance of the import. Also, you can put
using
statements in the string you provide to solve any namespace problems. Or alternatively, use the DelimitedClassBuilder. -
Stefan Steiger over 12 years@shamp00: No, the speed improvement is in not having to convert the class into a string when writing the program. Runtime performance is irrelevant (in this case).
-
Nigiri almost 11 yearsThis is what I was looking for!! Nice!!