Sometimes a simple way to compare a source simple XML file to a target XML file without the benefit of a schema is needed. While solutions using XSLT and even Microsoft’s XmlDiff exist it seems like the simplest way out is to use the DataSet’s ReadXml() method and a couple of loops:
public static List<string> Compare(string sourceFileName, string targetFileName)
{
DataSet dsSource = new DataSet("Source");
DataSet dsTarget = new DataSet("Target");
List<string> changeLog = new List<string>();
try
{
// load sourceFileName into a dataset
dsSource.ReadXml(sourceFileName);
DataColumn[] sourcePrimarykeys = new DataColumn[1];
sourcePrimarykeys[0] = dsSource.Tables[0].Columns["Id"];
dsSource.Tables[0].PrimaryKey = sourcePrimarykeys;
// load targetFileName into a dataset
dsTarget.ReadXml(targetFileName);
DataColumn[] targetPrimarykeys = new DataColumn[1];
targetPrimarykeys[0] = dsTarget.Tables[0].Columns["Id"];
dsTarget.Tables[0].PrimaryKey = targetPrimarykeys;
}
catch (Exception ex)
{
Console.WriteLine("Error: " + ex.Message);
}
// Loop through the rows in the source file and see if it exists in the target
foreach (DataRow sourceRow in dsSource.Tables[0].Rows)
{
changeLog.Add("Comaring Source Row " + sourceRow["Id"].ToString());
DataRow targetRow = dsTarget.Tables[0].Rows.Find(sourceRow["Id"].ToString());
if (targetRow != null)
{
// compare each column in the source file to the target file
foreach (DataColumn sourceColumn in dsSource.Tables[0].Columns)
{
if (sourceRow[sourceColumn.ColumnName].ToString() !=
targetRow[sourceColumn.ColumnName].ToString())
{
changeLog.Add(sourceColumn.ColumnName.ToString() + " has changed from: "
+ sourceRow[sourceColumn.ColumnName].ToString() + " to " +
targetRow[sourceColumn.ColumnName].ToString());
}
}
}
else
{
changeLog.Add("the row cannot be found in the target");
}
}
// determine if there are any rows in target that don't exist in source
foreach (DataRow targetRow in dsTarget.Tables[0].Rows)
{
DataRow sourceRow = dsSource.Tables[0].Rows.Find(targetRow["Id"].ToString());
if (sourceRow == null)
{
changeLog.Add("A row was found in target that doesn't exist in source");
}
}
return changeLog;
}