% ctssTable
% Timo Lassmann
% March 5th 2014
ctssTable extracts the raw ctss counts from a tome database and prints them out in a table.
$ ctssTable <database name> -sample <sample1>,<sample2>,<sample...> -out <output.txt>
The output is a tab separated file listing:
/*
tomeTools - a collection of programs to work with CAGE data.
Author: Timo Lassmann <timolassmann@gmail.com>
Copyright (C) 2014 RIKEN
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <stdio.h>
#include <getopt.h>
#include <string.h>
#include<sys/stat.h>
#if HAVE_CONFIG_H
#include <config.h>
#endif
#include "tomedb.h"
#define OPT_SAMPLE 1
double* get_sample_data_from_matrix(struct crf* crf,double* column,int* use_sample , int tracks,int strand,int x);
void usage();
int main (int argc, char * argv[])
{
int i,c,f,g;
char* tmp = 0;
int count;
int num_samples = 0;
char** sample_names = 0;
int help = 0;
int quiet = 0;
char* outfile = 0;
while (1){
static struct option long_options[] ={
{"sample",required_argument,0,OPT_SAMPLE},
{"out",required_argument,0, 'o'},
{"quiet",0,0,'q'},
{"help",0,0,'h'},
{0, 0, 0, 0}
};
int option_index = 0;
c = getopt_long_only (argc, argv,"o:qh:",long_options, &option_index);
if (c == -1){
break;
}
switch(c) {
case 0:
break;
case OPT_SAMPLE:
tmp = optarg;
count = byg_count(",", tmp);
//fprintf(stderr,"%d\n",count);
num_samples = count+1;
sample_names = malloc(sizeof(char*) * (count+1));
for(i = 0; i < count+1;i++){
sample_names[i] = malloc(sizeof(char)* 1000);
}
f = 0;
g = 0;
for(i = 0;i < strlen(tmp);i++){
if(tmp[i] != ','){
if(tmp[i] != ' '){
sample_names[f][g] = tmp[i];
}
g++;
}else{
sample_names[f][g] = 0;
f++;
g = 0;
}
}
sample_names[f][g] = 0;
break;
case 'q':
quiet = 1;
break;
case 'h':
help = 1;
break;
case 'o':
outfile = optarg;
break;
case '?':
exit(1);
break;
default:
fprintf(stderr,"default\n\n\n\n");
abort ();
}
}
char* db_file = 0;
if(help){
usage();
exit(EXIT_SUCCESS);
}
db_file = argv[optind++];
if(!db_file){
fprintf(stderr,"Error: no database file...\n");
exit(EXIT_FAILURE);
}
if(num_samples == 0){
fprintf(stderr,"Error: no samples specified..\n");
exit(EXIT_FAILURE);
}
TOMEDB* db = 0;
db = main_read_db(db,db_file);
struct tome_data* td = init_tome_data(100,db->num_tracks );
td->us = init_use_samples(db,sample_names,0, num_samples ,0);
FILE* out = 0;
int blob_length = 0;
unsigned char *blob_data = 0;
struct crf * crf = 0;
const char *zSql = "SELECT * FROM tome_data;";
sqlite3_stmt *pStmt;
sqlite3 *sqlite_db;
int rc;
blob_length = 0;
blob_data = 0;
rc = sqlite3_open(db_file, &sqlite_db);
char* chr = 0;
unsigned int pos = 0;
unsigned int offset = 0;
double* column = malloc(sizeof(double)* (db->num_tracks+1));
if(outfile){
if ((out = fopen(outfile, "w")) == NULL){
fprintf(stderr,"can't open output\n");
exit(EXIT_FAILURE);
}
}else{
out = stdout;
}
fprintf(out,"Chromosome\tPosition\tStrand");
for(i = 0; i < db->num_tracks;i++){
if(td->us->use_sample[i]){
fprintf(out,"\t%s",td->us->sample_names[i]);
}
}
fprintf(out,"\n");
rc = sqlite3_prepare(sqlite_db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
fprintf(stderr,"could not establish database connection.\n");
exit(EXIT_FAILURE);
}
while ( sqlite3_step(pStmt) == SQLITE_ROW){
rc = sqlite3_column_int(pStmt,0);
blob_length = sqlite3_column_bytes(pStmt, 1);
blob_data = (unsigned char *)malloc(blob_length);
memcpy(blob_data, sqlite3_column_blob(pStmt, 1), blob_length);
blob_data = unrle(blob_data,&blob_length);
crf = make_crf_from_blob(crf, blob_data,blob_length,db->num_tracks);
free(blob_data);
crf->genome_pos = rc;
for(i= 0; i < TOMEDB_DB_BLOCK_SIZE;i++){
column = get_sample_data_from_matrix(crf,column, td->us->use_sample , db->num_tracks,0,i);
if(column[num_samples] != 0.0){
pos = (crf->genome_pos << 15u) + i;
chr = get_chr_name(db, pos );
offset = get_chr_offset (db->seq_info,chr);
fprintf(out, "%s\t%d\t+\t",chr, pos - offset);
for(c = 0;c < num_samples-1;c++){
fprintf(out,"%d\t",(int)column[c]);
column[c] = 0.0;
}
fprintf(out,"%d\n",(int)column[num_samples-1]);
column[num_samples-1] = 0.0;
}
}
free_crf(crf);
crf = 0;
}
sqlite3_finalize(pStmt);
rc = sqlite3_prepare(sqlite_db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
fprintf(stderr,"could not establish database connection.\n");
exit(EXIT_FAILURE);
}
while ( sqlite3_step(pStmt) == SQLITE_ROW){
rc = sqlite3_column_int(pStmt,0);
blob_length = sqlite3_column_bytes(pStmt, 1);
blob_data = (unsigned char *)malloc(blob_length);
memcpy(blob_data, sqlite3_column_blob(pStmt, 1), blob_length);
blob_data = unrle(blob_data,&blob_length);
crf = make_crf_from_blob(crf, blob_data,blob_length,db->num_tracks);
free(blob_data);
crf->genome_pos = rc;
for(i= 0; i < TOMEDB_DB_BLOCK_SIZE;i++){
column = get_sample_data_from_matrix(crf,column, td->us->use_sample , db->num_tracks,1,i);
if(column[num_samples] != 0.0){
pos = (crf->genome_pos << 15u) + i;
chr = get_chr_name(db, pos );
offset = get_chr_offset (db->seq_info,chr);
fprintf(out, "%s\t%d\t-\t",chr, pos - offset);
for(c = 0;c < num_samples-1;c++){
fprintf(out,"%d\t",(int)column[c]);
column[c] = 0.0;
}
fprintf(out,"%d\n",(int)column[num_samples-1]);
column[num_samples-1] = 0.0;
}
}
free_crf(crf);
crf = 0;
}
sqlite3_finalize(pStmt);
//fclose(file);
sqlite3_close(sqlite_db);
free_tome_data(td);
free_db(db);
if(outfile){
fclose(out);
}
for(i = 0; i < num_samples;i++){
free(sample_names[i]);
}
free(sample_names);
return EXIT_SUCCESS;
}
double* get_sample_data_from_matrix(struct crf* crf,double* column,int* use_sample , int tracks,int strand,int x)
{
int i,j;
int f = 0;
double sum = 0;
if(!strand){
f = 0;
for(i = 0; i < tracks;i++){
if(use_sample[i]){
column[f] = 0;
if(i < crf->num_plus_tracks){
for(j = crf->plus_row_ptr[i]; j < crf->plus_row_ptr[i+1]; j++){
if(crf->plus_col_index[j] == x ){
column[f] = crf->plus_values[j];
sum +=crf->plus_values[j];
}
}
}
f++;
}
}
}else{
f = 0;
for(i = 0; i < tracks;i++){
if(use_sample[i]){
column[f] = 0;
if(i < crf->num_minus_tracks){
for(j = crf->minus_row_ptr[i]; j < crf->minus_row_ptr[i+1]; j++){
if(crf->minus_col_index[j] == x){
column[f] = crf->minus_values[j];
sum +=crf->minus_values[j];
}
}
}
f++;
}
}
}
column[f] = sum;
return column;
}
void usage()
{
fprintf(stdout, "\n%s %s, Copyright (C) 2014 Riken <%s>\n",PACKAGE_NAME, PACKAGE_VERSION,PACKAGE_BUGREPORT);
fprintf(stdout, "\n");
fprintf(stdout, "Usage: ctssTable [options] <db> -sample <sample1>,<sample2>,<sample...> \n\n");
fprintf(stdout, "Options:\n");
fprintf(stdout, " -o STR output file prefix.\n");
fprintf(stdout, "\n");
}