Menu

Merge files by sum values in columns

Java forum
Duy Dinh
2013-10-04
2013-10-04
  • Duy Dinh

    Duy Dinh - 2013-10-04
    /**
     * Merge files by summing up values from columns
     * 
     * @param files
     *            input files
     * @param output
     *            output file
     * @param header
     *            use header
     * @param startColumnID
     *            sum values of columns starting from a column id
     */
    public static void combineResults(List<File> files, File output,
            boolean header, int startColumnID) {
        if (files == null || files.size() == 0) {
            return;
        }
    
        // an arraylist of arraylist of lines
        ArrayList<ArrayList<String>> lineSetList = new ArrayList<ArrayList<String>>();
        for (int i = 0; i < files.size(); i++)
            lineSetList.add(FileManager.ReadLines(files.get(i)
                    .getAbsolutePath()));
    
        // count the number of column
        int columnCount = lineSetList.get(0).get(0).split("[\t]").length;
        int rowCount = lineSetList.get(0).size();
    
        String headerString = header == true ? lineSetList.get(0).get(0) : null;
        ArrayList<Double[]> sumValues = ArrayProcessing
                .initializeDoubleArrayList(rowCount, columnCount);
    
        // merge columns by sum up values from files
    
        for (int fileID = 0; fileID < files.size(); fileID++) {
            int lineID = header == true ? 1 : 0;
    
            // parse line
            for (; lineID < lineSetList.get(fileID).size(); lineID++) {
                String[] strValues = lineSetList.get(fileID).get(lineID)
                        .split("[\t]");
                Double[] dblValues = ArrayProcessing.convertDouble(strValues);
    
                // accumulate value
                // for columns that have the same value, we do not sum values
                int columnID = 0;
                for (; columnID < startColumnID; columnID++) {
                    sumValues.get(lineID)[columnID] = dblValues[columnID];
                }
    
                // sum values of each column in a line
                ArrayProcessing.updateArray(sumValues, lineID, startColumnID,
                        dblValues);
            }
        }
    
        StringBuilder sb = new StringBuilder();
        System.out.println(sb.append(ArrayProcessing.toString(sumValues,
                headerString)));
        FileManager.WriteToFile(sb.toString(), output.getAbsolutePath());
    }
    
     

    Last edit: Duy Dinh 2013-10-04
  • Duy Dinh

    Duy Dinh - 2013-10-04
    /**
     * Convert string value into double
     * 
     * @param fields
     *            string
     * @return array of double values
     */
    public static Double[] convertDouble(String[] fields) {
    
        Double[] values = new Double[fields.length];
        for (int i = 0; i < fields.length; i++)
            values[i] = Double.parseDouble(fields[i]);
        return values;
    }
    
    public static Double[] initializeDoubleArray(int columnCount) {
        Double[] values = new Double[columnCount];
        for (int i = 0; i < columnCount; i++)
            values[i] = 0.0;
        return values;
    }
    
    public static void updateArray(ArrayList<Double[]> list, int lineID,
            int startColumnID, Double[] sum) {
        for (int columnID = startColumnID; columnID < sum.length; columnID++)
            list.get(lineID)[columnID] += sum[columnID];
    }
    
    public static ArrayList<Double[]> initializeDoubleArrayList(int rowCount,
            int columnCount) {
        ArrayList<Double[]> doubleArrayList = new ArrayList<Double[]>(rowCount);
        for (int i = 0; i < rowCount; i++)
            doubleArrayList.add(initializeDoubleArray(columnCount));
        return doubleArrayList;
    }
    
    public static String toString(ArrayList<Double[]> values,
            String headerString) {
        StringBuilder sb = new StringBuilder();
        int i = headerString == null ? 0 : 1;
        if (i == 1) {
            sb.append(headerString).append("\n");
        }
        for (; i < values.size(); i++) {
            int j = 0;
            for (; j < values.get(i).length - 1; j++)
                sb.append(values.get(i)[j]).append("\t");
            sb.append(values.get(i)[j]).append("\n");
        }
        return sb.toString();
    }
    
    public static String toString(ArrayList<Double[]> values) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < values.size(); i++) {
            int j = 0;
            for (; j < values.get(i).length - 1; j++)
                sb.append(values.get(i)[j]).append("\t");
            sb.append(values.get(i)[j]).append("\n");
        }
        return sb.toString();
    }
    
     

    Last edit: Duy Dinh 2013-10-04

Log in to post a comment.

Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.