domingo, 22 de agosto de 2010

XML DIFF - Show differences in XML

This little routine will display the difference between 2 xml files analyzing the contents and returning a list of differences. There is a limitation with respect to the structure of XML to be compared
Depending on the content and structure of XML you will need to make some modifications in the implementation. In the comparison of XML that contains collections the analysis is done element by element and if there is an element out of order the return can not be expected. Ideally, the XML should be typed ( following a XMLSchema ).


Class TechParseCounter:
/**
* Desenvolvido por Ricardo Alberto Harari em 05/06/2005 - 20:43 - GMT-3:00
*
* Este codigo pode ser usado livremente, inclusive para fins comerciais
* desde que mantenha referencia ao autor original e não altere o fully qualified name desta classe
*
* Technique T.I. Ltda
* www.technique.com.br
*
* @author Ricardo A. Harari
*
*/

package com.technique.xmlUtil;

public class TechParseCounter {
private int counter;
public TechParseCounter(int i) {
counter = i;
}
public void add() {
counter++;
}
public int getCounter() {
return counter;
}
}



CLASS TechParseContentItem
/**
* Desenvolvido por Ricardo Alberto Harari em 05/06/2005 - 20:43 - GMT-3:00
*
* Este codigo pode ser usado livremente, inclusive para fins comerciais
* desde que mantenha referencia ao autor original e não altere o fully qualified name desta classe
*
* Technique T.I. Ltda
* www.technique.com.br
*
* @author Ricardo A. Harari
*
*/

package com.technique.xmlUtil;

public class TechParseContentItem {
static String[] ACTION_NAMES = new String[] {
"insert",
"update",
"delete"
};

public static int ACTION_INSERT = 0;
public static int ACTION_UPDATE = 1;
public static int ACTION_DELETE = 2;

public int action;

public String key;
public String attributeName;
public String oldValue;
public String newValue;

public TechParseContentItem(int action, String key, String attributeName, String oldValue, String newValue) {
this.action = action;
this.key = key;
this.attributeName = attributeName;
this.oldValue = oldValue;
this.newValue = newValue;
}

public String actionName() {
return ACTION_NAMES[action];
}

public String toString() {
String act = action == ACTION_INSERT ? "insert" : action == ACTION_UPDATE ? "update" : "delete";
return "action=[" + act + "], key=[" + key + "], attributeName=[" + attributeName + "], oldValue=[" + oldValue + "], newValue=[" + newValue + "]";
}

}


CLASS TechParseDiff
/**
* Desenvolvido por Ricardo Alberto Harari em 05/06/2005 - 20:43 - GMT-3:00
*
* Este codigo pode ser usado livremente, inclusive para fins comerciais
* desde que mantenha referencia ao autor original e não altere o fully qualified name desta classe
*
* Technique T.I. Ltda
* www.technique.com.br
*
* @author Ricardo A. Harari
*
*/

package com.technique.xmlUtil;

/**
* Technique T.I. Ltda
* www.technique.com.br
*
*
*/

import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.Hashtable;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class TechParseDiff extends DefaultHandler
{
/*
* separador de campos para montar a chave
*/
static String field_separator = "@5@_@<>@~5";
static String field_separator2 = "/";

private Hashtable ocorrencias = null;
private String[] levelBuffer = new String[100];
private String currentLevel = null;
private int level = 0;
private Hashtable xmlcontent = null;
private boolean started = false;
private StringBuffer currentCharBuffer = null;

public Hashtable retrieveDifference() {
return xmlcontent;
}

public TechParseDiff() {
super();
level = 0;
}

private void clearBuffer() {
currentCharBuffer = new StringBuffer();
started = false;
xmlcontent = new Hashtable();
level = 0;
currentLevel = "";
ocorrencias = new Hashtable();
}

protected void relaseBuffer() {
ocorrencias = null;
xmlcontent = null;
currentCharBuffer = null;
currentLevel = null;
}
private String formatKey(String key) {
return key.replaceAll(field_separator, field_separator2);
}
private String formatAttributeName(String key) {
int i = key.lastIndexOf(field_separator) + field_separator.length();
return key.substring(i, key.length());
}

/**
* compara 2 XMLs e coloca no Hashtable "xmlcontent" o resultado das diferencas encontradas.
* em xmlcontent voce terá objetos do tipo TechParseContentItem
*
* @param oldXml - stream do xml antigo
* @param newXml - stream do xml novo
* @throws SAXException - erro de parsing
* @throws IOException - erro de IO
*/
public void compare(InputStream oldXml, InputStream newXml) throws Exception {
try {
clearBuffer();
Hashtable difference = new Hashtable();
XMLReader xr = XMLReaderFactory.createXMLReader();
InputSource is = new InputSource(oldXml);
xr.setContentHandler(this);
xr.setErrorHandler(this);
xr.parse(is);
Hashtable ht1 = xmlcontent;

this.clearBuffer();
is = new InputSource(newXml);
xr.setContentHandler(this);
xr.setErrorHandler(this);
xr.parse(is);

Hashtable ht2 = xmlcontent;
Enumeration enum2 = ht2.keys();
//int inserts = 0;
while (enum2.hasMoreElements()) {
Object key2 = enum2.nextElement();
Object o2 = ht2.get(key2);
Object o1 = ht1.get(key2);
if (o1 == null) {
difference.put(key2, new TechParseContentItem(TechParseContentItem.ACTION_INSERT,
formatKey(key2.toString()),
formatAttributeName(key2.toString()),
null,
o2.toString()));
} else {
if (!o1.toString().equals(o2.toString())) {
difference.put(key2, new TechParseContentItem(TechParseContentItem.ACTION_UPDATE,
formatKey(key2.toString()),
formatAttributeName(key2.toString()),
o1.toString(),
o2.toString()));
}
ht1.remove(key2);
}
}
enum2 = ht1.keys();
while (enum2.hasMoreElements()) {
Object key1 = enum2.nextElement();
Object o1 = ht1.get(key1);
difference.put(key1, new TechParseContentItem(TechParseContentItem.ACTION_DELETE,
formatKey(key1.toString()),
formatAttributeName(key1.toString()),
o1.toString(),
null));
}
this.clearBuffer();
xmlcontent = difference;
} catch (Exception e) {
throw new Exception("Nao foi possivel gravar os dados de Log. Motivo:" + e.getMessage(), e);
}
}

public void endElement (String uri, String name, String qName) {
//super.endElement(uri, name, qName);
if ("".equals (uri)) {
removeLevel(qName);
} else {
removeLevel("{" + uri + "}" + name);
}
started = false;
}

private void addLevel(String name) {
levelBuffer[++level] = name;
currentLevel += field_separator + name;
Object o = ocorrencias.get(currentLevel);
if (o == null) {
ocorrencias.put(currentLevel, new TechParseCounter(1));
} else {
((TechParseCounter) o).add();
}
}

private void removeLevel(String name) {
if (currentCharBuffer.length() > 0) {
TechParseCounter ocorr = (TechParseCounter) ocorrencias.get(currentLevel);
String s = ocorr.getCounter() == 0 ? "" : "[" + ocorr.getCounter() + "]";
xmlcontent.put(currentLevel
+ s
+ field_separator + name,
currentCharBuffer.toString().trim());
currentCharBuffer = new StringBuffer();
}
String levelName = levelBuffer[level];
int j = currentLevel.length() - levelName.length() - field_separator.length();
currentLevel = j < 1 ? "" : currentLevel.substring(0, j);
levelBuffer[level--] = null;
}

public void startElement (String uri, String name, String qName, Attributes atts) {
if ("".equals (uri)) {
addLevel(qName);
} else {
addLevel("{" + uri + "}" + name);
}
int i = atts.getLength();
TechParseCounter ocorr = (TechParseCounter) ocorrencias.get(currentLevel);
String s = ocorr.getCounter() == 0 ? "" : "[" + ocorr.getCounter() + "]";
for (int j = 0; j < i; j++) {
if (atts.getValue(j) != null) {
xmlcontent.put(currentLevel + s + field_separator + atts.getQName(j), atts.getValue(j));
}
}
started = true;
}

public void characters (char ch[], int start, int length) {
if (!started) return;
for (int i = start; i < start + length; i++) {
switch (ch[i]) {
case '\\'|'"'|'\r'|'\n'|'\t':
break;
default:
currentCharBuffer.append(ch[i]);
break;
}
}
}

public void startDocument () {
//start
}

public void endDocument () {
//end
}

}



Sample Usage:
/**
* Technique TI Ltda - Project: techEngine
* @author Ricardo A. Harari
* com.technique.xmlUtil
*
* xml diff sample usage
*/

package com.technique.xmlUtil;

import java.io.ByteArrayInputStream;
import java.util.Enumeration;
import java.util.Hashtable;

public class TechDiffSample {

/**
* @param args
*/
public static void main(String[] args) {
String xml1 = "<document><stockoption>PETR4</stockoption><date>05/06/2005</date><value>1.20</value><stockoption>NET4</stockoption><date>04/06/2005</date><value>1.20</value></document>";
String xml2 = "<document><stockoption>PETR4</stockoption><date>05/06/2005</date><value>1.22</value><comment>ipsenlorem</comment><stockoption>NET4</stockoption><date>05/06/2005</date><value>1.20</value></document>";
TechParseDiff xmldiff = new TechParseDiff();
ByteArrayInputStream bais1 = new ByteArrayInputStream(xml1.getBytes());
ByteArrayInputStream bais2 = new ByteArrayInputStream(xml2.getBytes());
try {
xmldiff.compare(bais1, bais2);
xmldiff.toString();
Hashtable ht = xmldiff.retrieveDifference();
Enumeration en = ht.elements();
while (en.hasMoreElements()) {
TechParseContentItem item = (TechParseContentItem) en.nextElement();
System.out.println(item.toString());
}
} catch (Exception e) {
e.printStackTrace();
}

}

}


The result will show the differences between the two XML indicating the insert, updates and deletes. If you have collections in XML you will also have information on the order of the element [1, 2, 3, ...] that has changed.

action=[insert], key=[/document/comment[1]/comment], attributeName=[comment], oldValue=[null], newValue=[ipsenlorem]
action=[update], key=[/document/value[1]/value], attributeName=[value], oldValue=[1.20], newValue=[1.22]

action=[update], key=[/document/date[2]/date], attributeName=[date], oldValue=[04/06/2005], newValue=[05/06/2005]

This routine uses the SAX parser, so it can be used to compare huge files. With a little modification you can record the results of the comparisons in a database or files instead of storing in a hashtable.
This routine is part of an old framework I developed, the TechEngine.


Have fun!