How to compare the cell values of two data tables

11,310

Solution 1

According you the new requirements you might try this.

It first gets the primary key name - the Unique column, then the other columns names, creates a new DataTable with new column names, fills the values and creates an expression to compare the them:

void Main()
{
    // create some test data

    var dt1 = new DataTable();
    dt1.Columns.Add("emp_num", typeof(int));
    dt1.Columns.Add("salary", typeof(int));
    dt1.Columns.Add("ov", typeof(double));
    dt1.Columns[0].Unique = true;

    dt1.Rows.Add(455, 3000, 67.891);
    dt1.Rows.Add(677, 5000, 89.112);
    dt1.Rows.Add(778, 6000, 12.672);

    var dt2 = new DataTable();
    dt2.Columns.Add("emp_num", typeof(int));
    dt2.Columns.Add("salary", typeof(int));
    dt2.Columns.Add("ov", typeof(double));
    dt2.Columns[0].Unique = true;

    dt2.Rows.Add(455, 3000, 67.891);
    dt2.Rows.Add(677, 5000, 50.113);
    dt2.Rows.Add(778, 5500, 12.672);
    dt2.Rows.Add(779, 5500, 12.672);

    var result = CompareDataTables(dt1, dt2);
    result.Dump("Result");

}

CompareDataTables method:

static DataTable CompareDataTables(DataTable dt1, DataTable dt2)
{
    var keyName = dt1.Columns.Cast<DataColumn>().Single (x => x.Unique).ColumnName;
    var dt1Cols = dt1.Columns.Cast<DataColumn>().Where (x => !x.Unique).Select (x =>x.ColumnName );
    var dt2Cols = dt1.Columns.Cast<DataColumn>().Where (x => !x.Unique).Select (x =>x.ColumnName );

    // get keys from both data tables
    var keys = new HashSet<int>(dt1.AsEnumerable().Select (x => (int)x[keyName]));
    keys.UnionWith(dt2.AsEnumerable().Select (x => (int)x[keyName]));

    keys.Dump("keys");

    // create a new data table that will hold the results
    var result = new DataTable();
    result.Columns.Add(keyName, typeof(int));
    result.Columns[0].Unique = true;

    // initialize data and comparison columns
    foreach (var name in dt1Cols)
    {
        result.Columns.Add(name + "_off", dt1.Columns[name].DataType);
        result.Columns.Add(name + "_on", dt1.Columns[name].DataType);
        result.Columns.Add(name + "_same", typeof(bool), name + "_off = " + name + "_on");
    } 


    foreach (var key in keys)
    {
        // get a row from each data table with the current key
        var rowOff = dt1.Select(keyName + " = " + key).FirstOrDefault();
        var rowOn = dt2.Select(keyName + " = " + key).FirstOrDefault();

        // create a new row            
        var newRow = result.NewRow();

        // fill the new row with off data
        if (rowOff != null)
        {
            newRow[keyName] = rowOff[keyName];
            foreach (var name in dt1Cols)
            {
                newRow[name + "_off"] = rowOff[name];
            }
        }

        // fill the new row with on data
        if (rowOn != null)
        {
            foreach (var name in dt1Cols)
            {
                newRow[name + "_on"] = rowOn[name];
            }
            newRow[keyName] = rowOn[keyName];
        }

        // add the row to the result data table
        result.Rows.Add(newRow);        
    }

    return result;
}

Summary2

It's not bullet proof. It'd be a good idea to check if the data tables have the same structure.

Solution 2

Here's an implementation in a fairly generic method that compares two DataTables and returns another DataTable with the differences shown.

  • Dynamic 'key' column (only single column, not multiple).
  • Doesn't display rows that have the same data.
  • Handles NULLs in data.
  • Columns not in both tables.
  • T=object comparisons.

Populating the DataTables...


    /// Build data and test the underlying method.
    public void Main()
    {
        Dictionary columns = new Dictionary();
        columns.Add("emp_num", typeof(int));
        columns.Add("salary", typeof(int));
        columns.Add("ov", typeof(double));

        DataTable left = new DataTable();
        foreach(KeyValuePair column in columns)
        {
            left.Columns.Add(column.Key, column.Value);
        }
        left.Rows.Add(455, 3000, 67.891);
        left.Rows.Add(677, 5000, 89.112);
        left.Rows.Add(778, 6000, 12.672);
        left.Rows.Add(9001, 5500, 12.672);
        left.Rows.Add(4, null, 9.2);
        //left.Dump("Left");

        DataTable right = new DataTable();
        right.Columns.Add("outlier", typeof(string));
        foreach (KeyValuePair column in columns)
        {
            right.Columns.Add(column.Key, column.Value);
        }
        right.Columns.Add("float", typeof(float));
        right.Rows.Add(0, 455, 3000, 67.891, 5);
        right.Rows.Add(1, 677, 5000, 50.113, 5);
        right.Rows.Add(2, 778, 5500, 12.672, 6);
        right.Rows.Add(2, 9000, 5500, 12.672, 6);
        right.Rows.Add(3, 4, 10, 9.2, 7);
        //right.Dump("Right");


        // Compare.
        DataTable results = Compare(left, right, "emp_num");
        //results.Dump("Results"); // Fancy table output via LINQPad.

        // Get the comparison columns for display.
        List comparedColumns = new List();
        foreach (DataColumn column in results.Columns)
        {
            comparedColumns.Add(column.ColumnName);
        }

        // Display the comparison rows.
        Console.WriteLine(string.Join(", ", comparedColumns));
        foreach(DataRow row in results.Rows)
        {
            Console.WriteLine(string.Join(", ", row.ItemArray));
        }
    }

Fancy table output from LINQPad of the input.

Generic Method: DataTable Compare(DataTable, DataTable)


    /// Compares the values of each row in the provided DataTables and returns any rows that have a difference based on a provided 'key' column.
    /// the 'pre' data.
    /// the 'post' data.
    /// Name of the column to use for matching rows.
    /// New DataTable populated with difference rows only.
    public DataTable Compare(DataTable left, DataTable right, string keyColumn)
    {
        const string Pre = "_Pre";
        const string Post = "_Post";

        DataColumn leftKey = left.Columns.Contains(keyColumn) ? left.Columns[keyColumn] : null;
        DataColumn rightKey = right.Columns.Contains(keyColumn) ? right.Columns[keyColumn] : null;

        if (leftKey == null || rightKey == null)
        {
            return null;
        }

        // Get the matching columns between the two tables for doing comparisons.
        List comparisonColumns = new List();
        DataTable results = new DataTable();
        // Adding the key column to the front for sake of ease of viewing.
        results.Columns.Add(new DataColumn(leftKey.ColumnName, leftKey.DataType));
        foreach (DataColumn column in left.Columns)
        {
            if(column == leftKey)
            {
                continue;
            }

            // Remove any columns that are not present in the compare table.
            foreach (DataColumn compareColumn in right.Columns)
            {
                if (column.ColumnName == compareColumn.ColumnName && column.DataType == compareColumn.DataType)
                {
                    comparisonColumns.Add(column.ColumnName);
                    results.Columns.Add(new DataColumn(column.ColumnName + Pre, column.DataType));
                    results.Columns.Add(new DataColumn(column.ColumnName + Post, column.DataType));
                    break;
                }
            }
        }

        foreach (DataRow leftRow in left.Rows)
        {
            object key = leftRow.Field(leftKey);
            string filterExpression = string.Format("{0} = {1}", keyColumn, key);
            DataRow rightRow = right.Select(filterExpression).SingleOrDefault();
            // Need a row for a comparison to be valid.
            if (rightRow == null)
            {
                continue;
            }

            List comparison = new List();
            comparison.Add(key);
            bool isDiff = false;
            foreach (string comparisonColumn in comparisonColumns)
            {
                object pre = leftRow.ItemArray[left.Columns.IndexOf(comparisonColumn)];
                comparison.Add(pre);
                object post = rightRow.ItemArray[right.Columns.IndexOf(comparisonColumn)];
                comparison.Add(post);

                // Only need the row if the values differ in at least one column.
                isDiff |= (pre == null && post != null) || (pre != null && post == null) || (!pre.Equals(post));
            }
            if (isDiff)
            {
                results.Rows.Add(comparison.ToArray());
            }
        }

        return results;
    }

Output:

Fancy table output from LINQPad of the output.


    emp_num, salary_Pre, salary_Post, ov_Pre, ov_Post
    677, 5000, 5000, 89.112, 50.113
    778, 6000, 5500, 12.672, 12.672
    4, , 10, 9.2, 9.2

Solution 3

Here is one way to achieve your task:

 var dt1 = new DataTable();
        dt1.Columns.Add("emp_num", typeof(int));
        dt1.Columns.Add("salary", typeof(int));
        dt1.Columns.Add("ov", typeof(double));
        dt1.Columns[0].Unique = true;

        dt1.Rows.Add(455, 3000, 67.891);
        dt1.Rows.Add(677, 6000, 50.113);
        dt1.Rows.Add(778, 5500, 12.650);
        dt1.Rows.Add(779, 5500, 12.672);

        var dt2 = new DataTable();
        dt2.Columns.Add("emp_num", typeof(int));
        dt2.Columns.Add("salary", typeof(int));
        dt2.Columns.Add("ov", typeof(double));
        dt2.Columns[0].Unique = true;

        dt2.Rows.Add(455, 3000, 67.891);
        dt2.Rows.Add(677, 5000, 50.113);
        dt2.Rows.Add(778, 5500, 12.672);
        dt2.Rows.Add(779, 5500, 12.672);

        var dtListValues1 = new List<List<string>>();



        for (int j = 0; j < dt2.Rows.Count; j++)
        {
            var list = new List<string>();
            for (var i = 0; i < dt2.Columns.Count; i++)
            {                

                    list.Add(dt2.Rows[j][i].ToString());
                list.Add("===");

                    list.Add(dt1.Rows[j][i].ToString());
                list.Add("||");
                if(dt2.Rows[j][i].ToString() == dt1.Rows[j][i].ToString())
                {
                    list.Add("true");
                }
                else
                {
                    list.Add("false");
                }

            }
            dtListValues1.Add(list);
        }

        var rowsWithDifferentCells = dtListValues1.Where(x => x.Contains("false"));


        foreach (var item in dtListValues1)
        {
            Console.WriteLine("Row-->> "+ string.Join(",",item));
        }
        Console.WriteLine("----------------------------------------");
        foreach (var item in rowsWithDifferentCells)
        {
            Console.WriteLine("Row with different cell-->> "+string.Join(",", item));
        }

I put all test data in a List<List<string>>(), in the process I also made check if the values are the same. Then I put a filter on false, to show me only the rows which contain cells with different values.

You just need to put this code in a ConsoleApp. and test it.

For sure is not the best approach,but is a quick way to check your data.

Solution 4

You can get all keys first, then create a new Summary objects, put the data there and let it do the comparison work. Finally you can do whatever you want with it:

void Main()
{
    var dt1 = new DataTable();
    dt1.Columns.Add("emp_num", typeof(int));
    dt1.Columns.Add("salary", typeof(int));
    dt1.Columns.Add("ov", typeof(double));
    dt1.Rows.Add(455, 3000, 67.891);
    dt1.Rows.Add(677, 5000, 89.112);
    dt1.Rows.Add(778, 6000, 12.672);

    var dt2 = new DataTable();
    dt2.Columns.Add("emp_num", typeof(int));
    dt2.Columns.Add("salary", typeof(int));
    dt2.Columns.Add("ov", typeof(double));
    dt2.Rows.Add(455, 3000, 67.891);
    dt2.Rows.Add(677, 5000, 50.113);
    dt2.Rows.Add(778, 5500, 12.672);
    dt2.Rows.Add(779, 5500, 12.672);

    var keys = new HashSet<int>(dt1.AsEnumerable().Select (x => (int)x["emp_num"]));
    keys.UnionWith(dt2.AsEnumerable().Select (x => (int)x["emp_num"]));

    keys.Dump("emp_num (keys)");

    var results = keys.Select (emp_num => 
    {
        var rowOff = dt1.Select("emp_num = " + emp_num).FirstOrDefault();
        var rowOn = dt2.Select("emp_num = " + emp_num).FirstOrDefault();
        return new Summary(emp_num, rowOff, rowOn);
    });

    results.Dump("Summary");
}

Summary helper class:

class Summary
{
    public Summary(int emp_num, DataRow rowOff, DataRow rowOn)
    {
        this.emp_num = emp_num;

        if (rowOff != null)
        {
            salary_off = (int)rowOff["salary"];
            ov_off = (double)rowOff["ov"];
        }

        if (rowOn != null)
        {
            salary_on = (int)rowOn["salary"];
            ov_on = (double)rowOn["ov"];
        }
    }
    public int emp_num;

    public int salary_off ;
    public int salary_on;
    public bool salarySame { get { return salary_off == salary_on; }  }

    public double ov_off ;
    public double ov_on;
    public bool ovSame { get { return ov_off == ov_on; } }

}

Summary

Solution 5

If this is SQL related, then you could use the except SQL command:

SELECT * FROM TB_Online EXCEPT SELECT * FROM TB_Offline

You can take a look here, under "Compare Tables Using the EXCEPT Clause"

Share:
11,310

Related videos on Youtube

Anyname Donotcare
Author by

Anyname Donotcare

Updated on June 04, 2022

Comments

  • Anyname Donotcare
    Anyname Donotcare almost 2 years

    If i have two data tables with same structure,the same primary key and the same number of columns .

    How to compare their content and detect the cells which are not the same in the two data tables ?

    ex:

    TB_Offline

    emp_num(key)  salary      ov    
    
     455           3000      67.891   
     677           5000      89.112    
     778           6000      12.672   
    

    TB_Online

    emp_num(key)  salary      ov  
    
     455           3000      67.891 
     677           5000      50.113 
     778           5500      12.672   
    

    I want to get result like this(or some structure to show the differences) :

    emp_num(key)| salary_off |salary_on|s1   | ov_off  |  ov_on  |s2         
    
       677      |   5000     |    5000 | 1   |  89.112 |  50.113 | 0    
       778      |   6000     |    5500 | 0   |  12.672 |  12.672 | 1 
    

    NOTE:

    455 doesn't exist in the result because it was the exact in all columns among the two datatables.
    
  • Anyname Donotcare
    Anyname Donotcare about 8 years
    +1 for the answer ,thanks a lot but this method is limited ,i expect generic method to do the comparison .
  • t3chb0t
    t3chb0t about 8 years
    @AnynameDonotcare What do you mean by generic method? There is nothing about it in you question ;-)
  • Anyname Donotcare
    Anyname Donotcare about 8 years
    I appreciate your effort a lot :) , i mean in the constructor u use specific fields (int)rowOff["salary"] ,(double)rowOff["ov"] and with specific types . I want to use this method with any two datatables with any structure .
  • Anyname Donotcare
    Anyname Donotcare about 8 years
    sorry , your answer relating to the sql not the .net
  • Anyname Donotcare
    Anyname Donotcare about 8 years
    Could U clarify what's Dump method ? Is this a .net method ?
  • t3chb0t
    t3chb0t about 8 years
    Oh, sorry. It's copy paste from LINQPad and Dump is its method to dump objects. It created the grids on the picture for easier debugging.