Menu

Shuffle XML Elements with XPath and VTD-XML

This is a simple app that shuffles elements in an XML file. It uses XPath to address individual element then re-arrange and re-combine the fragments.  Those fragments are identified by their offsets and lengths, both of which are obtained by calling VTDNav's getElementFragment().

Why not using SAX and STaX?

Simply speaking, the lack of XPath support makes it very tedious, almost impossible, to re-arrange XML element fragments.

Why not using DOM?

Aside from performance and memory usage, the redundant, wasteful de-serialization/serialization contributes nothing but overhead to the task.

The Code

Input XML:

   <root>
     <a> text </a>
     <b> text </b>
     <c> text </c>
     <a> text </a>
     <b> text </b>
     <c> text </c>
     <a> text </a>
     <b> text </b>
     <c> text </c>
   </root>

Output.xml

   <root>
     <a> text </a>
     <a> text </a>
     <a> text </a>
     <b> text </b>
     <b> text </b>
     <b> text </b>
     <c> text </c>
     <c> text </c>
     <c> text </c>
   </root>

Java Code:

import com.ximpleware.*;
import java.io.*;
public class shuffle {
    public static void main(String[] args) throws Exception {
        VTDGen vg = new VTDGen();
        AutoPilot ap0 = new AutoPilot();
        AutoPilot ap1 = new AutoPilot();
        AutoPilot ap2 = new AutoPilot();
        ap0.selectXPath("/root/a");
        ap1.selectXPath("/root/b");
        ap2.selectXPath("/root/c");

        if (vg.parseFile("old.xml",false)){
            VTDNav vn = vg.getNav();
            ap0.bind(vn);
            ap1.bind(vn);
            ap2.bind(vn);
            FileOutputStream fos = new FileOutputStream("new.xml");
            fos.write("<root>".getBytes());
            byte[] ba = vn.getXML().getBytes();
            while(ap0.evalXPath()!=-1){
                long l= vn.getElementFragment();
                int offset = (int)l;
                int len = (int)(l>>32);
                fos.write('\n');
                fos.write(ba,offset, len);
            }
            ap0.resetXPath();
            while(ap1.evalXPath()!=-1){
                long l= vn.getElementFragment();
                int offset = (int)l;
                int len = (int)(l>>32);
                fos.write('\n');
                fos.write(ba,offset, len);
            }
            ap1.resetXPath();
            while(ap2.evalXPath()!=-1){
                long l= vn.getElementFragment();
                int offset = (int)l;
                int len = (int)(l>>32);
                fos.write('\n');
                fos.write(ba,offset, len);
            }
            ap2.resetXPath();
            fos.write('\n');
            fos.write("</root>".getBytes());
        }
    }
}

C# code:

using System;
using com.ximpleware;

namespace shuffle
{
    public class shuffle
    {
        public static void Main(String[] args)
        {
            VTDGen vg = new VTDGen();
            AutoPilot ap0 = new AutoPilot();
            AutoPilot ap1 = new AutoPilot();
            AutoPilot ap2 = new AutoPilot();
            ap0.selectXPath("/root/a");
            ap1.selectXPath("/root/b");
            ap2.selectXPath("/root/c");
            Encoding eg = System.Text.Encoding.GetEncoding("utf-8");
            if (vg.parseFile("old.xml", false))
            {
                VTDNav vn = vg.getNav();
                ap0.bind(vn);
                ap1.bind(vn);
                ap2.bind(vn);
                FileStream fos = new FileStream("new.xml", System.IO.FileMode.OpenOrCreate);
                //fos.Write("<root>".getBytes());
                byte[] ba0,ba1, ba2, ba3, ba4;
                //ba0 = eg.GetBytes("
                ba1 = eg.GetBytes("<root>");
                ba2 = eg.GetBytes("</root>");
                ba3 = eg.GetBytes("\n");
                fos.Write(ba1, 0, ba1.Length);
                byte[] ba = vn.getXML().getBytes();
                while (ap0.evalXPath() != -1)
                {
                    long l = vn.getElementFragment();
                    int offset = (int)l;
                    int len = (int)(l >> 32);
                    fos.Write(ba3,0,ba3.Length);
                    fos.Write(ba, offset, len);
                }
                ap0.resetXPath();
                while (ap1.evalXPath() != -1)
                {
                    long l = vn.getElementFragment();
                    int offset = (int)l;
                    int len = (int)(l >> 32);
                    fos.Write(ba3,0,ba3.Length);
                    fos.Write(ba, offset, len);
                }
                ap1.resetXPath();
                while (ap2.evalXPath() != -1)
                {
                    long l = vn.getElementFragment();
                    int offset = (int)l;
                    int len = (int)(l >> 32);
                    fos.Write(ba3,0,ba3.Length);
                    fos.Write(ba, offset, len);
                }
                ap2.resetXPath();
                fos.Write(ba3,0,ba3.Length);
                fos.Write(ba2,0,ba2.Length);
            }
        }
    }
}


link

Posted by SourceForge Robot 2010-05-25

Log in to post a comment.