[PATCH] Re-process operf samples dropped due to no mapping or no process info

The perf_events kernel subsystem often starts recording sample
events before recording corresponding COMM and MMAP events.
So in the first pass through the recorded events, it's not
unusual to fail to match up a sample with its process info or
the binary mapping (executable or library) for which the sample
was taken.  With this patch, we save such samples; then, after
the first pass through all recorded events is complete, we
re-process those saved samples to try again to match them up with
their corresponding process info and mapping.

This patch has already been pushed to the perf-events branch, but
review comments are welcome.

Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>
---
 libperf_events/operf_counter.cpp |    6 ++++-
 libperf_events/operf_utils.cpp   |   47 +++++++++++++++++++++++++++++++------
 libperf_events/operf_utils.h     |    1 +
 3 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/libperf_events/operf_counter.cpp b/libperf_events/operf_counter.cpp
index 8d4cc85..06e57b7 100644
--- a/libperf_events/operf_counter.cpp
+++ b/libperf_events/operf_counter.cpp
@@ -43,6 +43,8 @@ int sample_reads;
 unsigned int pagesize;
 verbose vperf("perf_events");
 
+extern bool first_time_processing;
+
 namespace {
 
 vector<string> event_names;
@@ -519,6 +521,7 @@ int operf_read::convertPerfData(void)
 	cverb << vdebug << "Converting operf.data to oprofile sample data format" << endl;
 	cverb << vdebug << "data size is " << hex << info.file_data_size << "; data offset is " << info.file_data_offset << endl;
 	cverb << vdebug << "head is " << hex << info.head << endl;
+	first_time_processing = true;
 	while (1) {
 		streamsize rec_size = 0;
 		event_t * event = op_get_perf_event(info);
@@ -529,7 +532,8 @@ int operf_read::convertPerfData(void)
 		op_write_event(event);
 		num_bytes += rec_size;
 	}
-
+	first_time_processing = false;
+	op_reprocess_unresolved_events();
 
 	map<pid_t, operf_process_info *>::iterator it = process_map.begin();
 	while (it != process_map.end())
diff --git a/libperf_events/operf_utils.cpp b/libperf_events/operf_utils.cpp
index e0bb523..e19e49f 100644
--- a/libperf_events/operf_utils.cpp
+++ b/libperf_events/operf_utils.cpp
@@ -33,6 +33,7 @@
 #include "operf_kernel.h"
 #include "operf_sfile.h"
 #include "op_fileio.h"
+#include "op_libiberty.h"
 
 
 extern verbose vmisc;
@@ -49,8 +50,9 @@ map<pid_t, operf_process_info *> process_map;
 multimap<string, struct operf_mmap *> all_images_map;
 map<u64, struct operf_mmap *> kernel_modules;
 struct operf_mmap * kernel_mmap;
+bool first_time_processing;
 
-
+static list<event_t *> unresolved_events;
 static struct operf_transient trans;
 static bool sfile_init_done;
 
@@ -286,7 +288,8 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data)
 			 * cause a lot of thrashing about.  But at the very least,
 			 * we need to log the lost sample.
 			*/
-			cverb << vmisc << "Discarding sample -- process info unavailable" << endl;
+			if ((cverb << vmisc) && !first_time_processing)
+				cerr << "Dropping sample -- process info unavailable" << endl;
 			goto out;
 		}
 	}
@@ -313,9 +316,12 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data)
 					(kernel_mmap->end_addr == 0ULL))
 				op_mmap = kernel_mmap;
 		}
-		if (!op_mmap)
-			cverb << vperf << "Discarding sample: no mapping found for kernel sample addr "
-			      << hex << data->ip << endl;
+		if (!op_mmap) {
+			/* This can happen if a kernel module is loaded after profiling
+			 * starts, and then we get samples for that kernel module.
+			 * TODO:  Fix this.
+			 */
+		}
 	} else {
 		op_mmap = proc->find_mapping_for_sample(data->ip);
 	}
@@ -339,7 +345,12 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data)
 		/* Ditto the comment above -- i.e., we need to look into caching these
 		 * discarded samples and trying to process them later.
 		 */
-		cverb << vmisc << "Discarding sample where no mapping was found" << endl;
+		if ((cverb << vmisc) && !first_time_processing) {
+			string domain = trans.in_kernel ? "kernel" : "userspace";
+			cerr << "Discarding " << domain
+			     << " sample where no mapping was found. (pc=0x"
+			     << hex << data->ip <<")" << endl;
+		}
 		retval = NULL;
 	}
 out:
@@ -419,9 +430,9 @@ static void __handle_sample_event(event_t * event)
 		data.cpu = *p;
 		array++;
 	}
-	if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
+	if (event->header.misc == PERF_RECORD_MISC_KERNEL) {
 		trans.in_kernel = 1;
-	} else if (event->header.misc & PERF_RECORD_MISC_USER) {
+	} else if (event->header.misc == PERF_RECORD_MISC_USER) {
 		trans.in_kernel = 0;
 	} else {
 		// TODO: For now, we'll discard hypervisor and guest kernel/
@@ -493,8 +504,14 @@ static void __handle_sample_event(event_t * event)
 			update_trans_last(&trans);
 			if (operf_options::callgraph)
 				__handle_callchain(array, &data);
+			goto out;
 		}
 	}
+	if (first_time_processing) {
+		event_t * ev = (event_t *)xmalloc(event->header.size);
+		memcpy(ev, event, event->header.size);
+		unresolved_events.push_back(ev);
+	}
 
 out:
 	return;
@@ -546,6 +563,20 @@ int OP_perf_utils::op_write_event(event_t * event)
 	}
 }
 
+void OP_perf_utils::op_reprocess_unresolved_events(void)
+{
+	list<event_t *>::const_iterator it = unresolved_events.begin();
+	for (; it != unresolved_events.end(); it++) {
+		event_t * evt = (*it);
+		// This is just a sanity check, since all events in this list
+		// are unresolved sample events.
+		if (evt->header.type == PERF_RECORD_SAMPLE) {
+			__handle_sample_event(evt);
+			free(evt);
+		}
+	}
+}
+
 
 void OP_perf_utils::op_perfrecord_sigusr1_handler(int sig __attribute__((unused)),
 		siginfo_t * siginfo __attribute__((unused)),
diff --git a/libperf_events/operf_utils.h b/libperf_events/operf_utils.h
index dea26ec..a306d77 100644
--- a/libperf_events/operf_utils.h
+++ b/libperf_events/operf_utils.h
@@ -72,6 +72,7 @@ int op_mmap_trace_file(struct mmap_info & info);
 event_t * op_get_perf_event(struct mmap_info & info);
 int op_get_next_online_cpu(DIR * dir, struct dirent *entry);
 bool op_convert_event_vals(std::vector<operf_event_t> * evt_vec);
+void op_reprocess_unresolved_events(void);
 }
 
 // The rmb() macros were borrowed from perf.h in the kernel tree
-- 
1.6.2.rc2