[PATCH] Miscellaneous minor operf fixes, plus fix broken --separate-cpu option

This patch has been pushed up to the perf-events branch already, but review comments
are welcome.

Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>
---
 libperf_events/operf_counter.cpp |   19 +++++++++++--------
 libperf_events/operf_counter.h   |    8 +++++---
 libperf_events/operf_utils.cpp   |   27 +++++++++++----------------
 libperf_events/operf_utils.h     |    5 ++---
 pe_profiling/operf.cpp           |   30 +++++++++++++++---------------
 5 files changed, 44 insertions(+), 45 deletions(-)

diff --git a/libperf_events/operf_counter.cpp b/libperf_events/operf_counter.cpp
index 06e57b7..1ceb012 100644
--- a/libperf_events/operf_counter.cpp
+++ b/libperf_events/operf_counter.cpp
@@ -22,9 +22,6 @@
 #include <errno.h>
 #include <string.h>
 #include <iostream>
-#ifdef HAVE_LIBPFM
-#include <perfmon/pfmlib.h>
-#endif
 #include <stdlib.h>
 #include "op_events.h"
 #include "operf_counter.h"
@@ -55,13 +52,16 @@ static const char *__op_magic = "OPFILE";
 
 }  // end anonymous namespace
 
-operf_counter::operf_counter(operf_event_t evt,  bool enable_on_exec, bool do_cg)
+operf_counter::operf_counter(operf_event_t evt,  bool enable_on_exec, bool do_cg,
+                             bool separate_cpu)
 {
 	memset(&attr, 0, sizeof(attr));
 	attr.size = sizeof(attr);
 	attr.sample_type = OP_BASIC_SAMPLE_FORMAT;
 	if (do_cg)
 		attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+	if (separate_cpu)
+		attr.sample_type |= PERF_SAMPLE_CPU;
 	attr.type = PERF_TYPE_RAW;
 	attr.config = evt.evt_code;
 	attr.sample_period = evt.count;
@@ -132,7 +132,8 @@ operf_record::~operf_record()
 }
 
 operf_record::operf_record(string outfile, bool sys_wide, pid_t the_pid, bool pid_running,
-                           vector<operf_event_t> & events, vmlinux_info_t vi, bool do_cg)
+                           vector<operf_event_t> & events, vmlinux_info_t vi, bool do_cg,
+                           bool separate_by_cpu)
 {
 	int flags = O_CREAT|O_RDWR|O_TRUNC;
 	struct sigaction sa;
@@ -144,6 +145,7 @@ operf_record::operf_record(string outfile, bool sys_wide, pid_t the_pid, bool pi
 	pid_started = pid_running;
 	system_wide = sys_wide;
 	callgraph = do_cg;
+	separate_cpu = separate_by_cpu;
 	total_bytes_recorded = 0;
 	poll_count = 0;
 	evts = events;
@@ -343,7 +345,7 @@ void operf_record::setup()
 			evts[event].counter = event;
 			perfCounters[cpu].push_back(operf_counter(evts[event],
 			                                          (!pid_started && !system_wide),
-			                                          callgraph));
+			                                          callgraph, separate_cpu));
 			if ((rc = perfCounters[cpu][event].perf_event_open(pid, real_cpu, event, this)) < 0) {
 				err_msg = "Internal Error.  Perf event setup failed.";
 				goto error;
@@ -521,6 +523,7 @@ int operf_read::convertPerfData(void)
 	cverb << vdebug << "Converting operf.data to oprofile sample data format" << endl;
 	cverb << vdebug << "data size is " << hex << info.file_data_size << "; data offset is " << info.file_data_offset << endl;
 	cverb << vdebug << "head is " << hex << info.head << endl;
+	cverb << vdebug << "sample type is " << hex <<  opHeader.h_attrs[0].attr.sample_type << endl;
 	first_time_processing = true;
 	while (1) {
 		streamsize rec_size = 0;
@@ -529,11 +532,11 @@ int operf_read::convertPerfData(void)
 			break;
 		}
 		rec_size = event->header.size;
-		op_write_event(event);
+		op_write_event(event, opHeader.h_attrs[0].attr.sample_type);
 		num_bytes += rec_size;
 	}
 	first_time_processing = false;
-	op_reprocess_unresolved_events();
+	op_reprocess_unresolved_events(opHeader.h_attrs[0].attr.sample_type);
 
 	map<pid_t, operf_process_info *>::iterator it = process_map.begin();
 	while (it != process_map.end())
diff --git a/libperf_events/operf_counter.h b/libperf_events/operf_counter.h
index 3012ccd..32599d4 100644
--- a/libperf_events/operf_counter.h
+++ b/libperf_events/operf_counter.h
@@ -36,7 +36,7 @@
 class operf_record;
 
 #define OP_BASIC_SAMPLE_FORMAT (PERF_SAMPLE_ID | PERF_SAMPLE_IP \
-    | PERF_SAMPLE_TID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID)
+    | PERF_SAMPLE_TID)
 
 static inline int
 op_perf_event_open(struct perf_event_attr * attr,
@@ -53,7 +53,8 @@ op_perf_event_open(struct perf_event_attr * attr,
 
 class operf_counter {
 public:
-	operf_counter(operf_event_t evt, bool enable_on_exec, bool callgraph);
+	operf_counter(operf_event_t evt, bool enable_on_exec, bool callgraph,
+	              bool separate_by_cpu);
 	~operf_counter();
 	int perf_event_open(pid_t ppid, int cpu, unsigned counter, operf_record * pr);
 	const struct perf_event_attr * the_attr(void) const { return &attr; }
@@ -77,7 +78,7 @@ public:
 	 */
 	operf_record(std::string outfile, bool sys_wide, pid_t the_pid, bool pid_running,
 	             std::vector<operf_event_t> & evts, OP_perf_utils::vmlinux_info_t vi,
-	             bool callgraph);
+	             bool callgraph, bool separate_by_cpu);
 	~operf_record();
 	void recordPerfData(void);
 	int out_fd(void) const { return outputFile; }
@@ -99,6 +100,7 @@ private:
 	bool pid_started;
 	bool system_wide;
 	bool callgraph;
+	bool separate_cpu;
 	std::vector< std::vector<operf_counter> > perfCounters;
 	int total_bytes_recorded;
 	int poll_count;
diff --git a/libperf_events/operf_utils.cpp b/libperf_events/operf_utils.cpp
index e19e49f..ef372d5 100644
--- a/libperf_events/operf_utils.cpp
+++ b/libperf_events/operf_utils.cpp
@@ -333,6 +333,7 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data)
 		trans.end_addr = op_mmap->end_addr;
 		trans.tgid = data->pid;
 		trans.tid = data->tid;
+		trans.cpu = data->cpu;
 		if (trans.in_kernel)
 			trans.pc = data->ip;
 		else
@@ -393,21 +394,20 @@ static void __handle_callchain(u64 * array, struct sample_data * data)
 	}
 }
 
-static void __handle_sample_event(event_t * event)
+static void __handle_sample_event(event_t * event, u64 sample_type)
 {
 	struct sample_data data;
 	bool early_match = false;
-	const u64 type = OP_BASIC_SAMPLE_FORMAT;
 	const struct operf_mmap * op_mmap = NULL;
 
 	u64 *array = event->sample.array;
 
-	if (type & PERF_SAMPLE_IP) {
+	if (sample_type & PERF_SAMPLE_IP) {
 		data.ip = event->ip.ip;
 		array++;
 	}
 
-	if (type & PERF_SAMPLE_TID) {
+	if (sample_type & PERF_SAMPLE_TID) {
 		u_int32_t *p = (u_int32_t *)array;
 		data.pid = p[0];
 		data.tid = p[1];
@@ -415,17 +415,12 @@ static void __handle_sample_event(event_t * event)
 	}
 
 	data.id = ~0ULL;
-	if (type & PERF_SAMPLE_ID) {
+	if (sample_type & PERF_SAMPLE_ID) {
 		data.id = *array;
 		array++;
 	}
 
-	if (type & PERF_SAMPLE_STREAM_ID) {
-		data.stream_id = *array;
-		array++;
-	}
-
-	if (type & PERF_SAMPLE_CPU) {
+	if (sample_type & PERF_SAMPLE_CPU) {
 		u_int32_t *p = (u_int32_t *)array;
 		data.cpu = *p;
 		array++;
@@ -502,7 +497,7 @@ static void __handle_sample_event(event_t * event)
 			operf_sfile_log_sample(&trans);
 
 			update_trans_last(&trans);
-			if (operf_options::callgraph)
+			if (sample_type & PERF_SAMPLE_CALLCHAIN)
 				__handle_callchain(array, &data);
 			goto out;
 		}
@@ -531,7 +526,7 @@ out:
  * those samples are discarded and we increment the appropriate "lost sample" stat.
  * TODO:   What's the name of the stat we increment?
  */
-int OP_perf_utils::op_write_event(event_t * event)
+int OP_perf_utils::op_write_event(event_t * event, u64 sample_type)
 {
 #if 0
 	if (event->header.type < PERF_RECORD_MAX) {
@@ -541,7 +536,7 @@ int OP_perf_utils::op_write_event(event_t * event)
 
 	switch (event->header.type) {
 	case PERF_RECORD_SAMPLE:
-		__handle_sample_event(event);
+		__handle_sample_event(event, sample_type);
 		return 0;
 	case PERF_RECORD_MMAP:
 		__handle_mmap_event(event);
@@ -563,7 +558,7 @@ int OP_perf_utils::op_write_event(event_t * event)
 	}
 }
 
-void OP_perf_utils::op_reprocess_unresolved_events(void)
+void OP_perf_utils::op_reprocess_unresolved_events(u64 sample_type)
 {
 	list<event_t *>::const_iterator it = unresolved_events.begin();
 	for (; it != unresolved_events.end(); it++) {
@@ -571,7 +566,7 @@ void OP_perf_utils::op_reprocess_unresolved_events(void)
 		// This is just a sanity check, since all events in this list
 		// are unresolved sample events.
 		if (evt->header.type == PERF_RECORD_SAMPLE) {
-			__handle_sample_event(evt);
+			__handle_sample_event(evt, sample_type);
 			free(evt);
 		}
 	}
diff --git a/libperf_events/operf_utils.h b/libperf_events/operf_utils.h
index a306d77..8a6e457 100644
--- a/libperf_events/operf_utils.h
+++ b/libperf_events/operf_utils.h
@@ -28,7 +28,6 @@ namespace operf_options {
 extern bool system_wide;
 extern bool reset;
 extern int pid;
-extern bool callgraph;
 extern int mmap_pages_mult;
 extern std::string session_dir;
 extern bool separate_cpu;
@@ -66,13 +65,13 @@ void op_perfrecord_sigusr1_handler(int sig __attribute__((unused)),
 		void *u_context __attribute__((unused)));
 int op_record_process_info(bool system_wide, pid_t pid, operf_record * pr, int output_fd);
 int op_write_output(int output, void *buf, size_t size);
-int op_write_event(event_t * event);
+int op_write_event(event_t * event, u64 sample_type);
 int op_read_from_stream(std::ifstream & is, char * buf, std::streamsize sz);
 int op_mmap_trace_file(struct mmap_info & info);
 event_t * op_get_perf_event(struct mmap_info & info);
 int op_get_next_online_cpu(DIR * dir, struct dirent *entry);
 bool op_convert_event_vals(std::vector<operf_event_t> * evt_vec);
-void op_reprocess_unresolved_events(void);
+void op_reprocess_unresolved_events(u64 sample_type);
 }
 
 // The rmb() macros were borrowed from perf.h in the kernel tree
diff --git a/pe_profiling/operf.cpp b/pe_profiling/operf.cpp
index add4984..f605381 100644
--- a/pe_profiling/operf.cpp
+++ b/pe_profiling/operf.cpp
@@ -119,6 +119,14 @@ popt::option options_array[] = {
 };
 }
 
+static void __print_usage_and_exit(const char * extra_msg)
+{
+	if (extra_msg)
+		cerr << extra_msg << endl;
+	cerr << "usage: operf [ options ] [ --system-wide | --pid <pid> | [ command [ args ] ] ]" << endl;
+	exit(EXIT_FAILURE);
+}
+
 
 static void op_sig_stop(int val __attribute__((unused)))
 {
@@ -155,9 +163,9 @@ void run_app(void)
 {
 	char * app_fname = rindex(app_name, '/') + 1;
 	if (!app_fname) {
-		cerr << "Error trying to parse app name " <<  app_name << endl;
-		cerr << "usage: operf [options] --pid=<PID> | appname [args]" << endl;
-		exit(EXIT_FAILURE);
+		string msg = "Error trying to parse app name ";
+		msg += app_name;
+		__print_usage_and_exit(msg.c_str());
 	}
 
 	vector<string> exec_args_str;
@@ -256,7 +264,8 @@ int start_profiling_app(void)
 			vi.end = kernel_end;
 			operf_record operfRecord(outputfile, operf_options::system_wide, app_PID,
 			                         (operf_options::pid == app_PID), events, vi,
-			                         operf_options::callgraph);
+			                         operf_options::callgraph,
+			                         operf_options::separate_cpu);
 			if (operfRecord.get_valid() == false) {
 				/* If valid is false, it means that one of the "known" errors has
 				 * occurred:
@@ -272,7 +281,7 @@ int start_profiling_app(void)
 				of.open(outputfile.c_str(), ios_base::trunc);
 				of.close();
 				cerr << "operf record init failed" << endl;
-				cerr << "usage: operf [options] --pid=<PID> | appname [args]" << endl;
+				cerr << "usage: operf [ options ] [ --system-wide | --pid <pid> | [ command [ args ] ] ]" << endl;
 				// Exit with SUCCESS to avoid the unnecessary "operf-record process ended
 				// abnormally" message
 				_exit(EXIT_SUCCESS);
@@ -608,15 +617,6 @@ int validate_app_name(void)
 	out: return rc;
 }
 
-static void __print_usage_and_exit(char * extra_msg)
-{
-	if (extra_msg)
-		cerr << extra_msg << endl;
-	cerr << "usage: operf [options] --pid=<PID> | appname [args]" << endl;
-	exit(EXIT_FAILURE);
-
-}
-
 static u32 _get_event_code(char name[])
 {
 	FILE * fp;
@@ -925,7 +925,7 @@ static void process_args(int argc, char const ** argv)
 			}
 		}
 		if (validate_app_name() < 0) {
-			exit(1);
+			__print_usage_and_exit(NULL);
 		}
 	} else if (operf_options::pid) {
 		if (operf_options::system_wide)
-- 
1.6.2.rc2