From: Maynard J. <may...@us...> - 2012-05-22 22:06:15
|
[PATCH] Change operf to create oprofile sample files during profiling instead of afterwards Previously, the 'operf-record' process read the perf_events sample data from the kernel and wrote it to a temporary output file (operf.data). Then, once the profiling session was completed, a conversion was performed on that data to put it into oprofile format, stored in the usual sample files in <session-dir>/samples/current. I noticed that when operf was run in system-wide mode, the temporary operf.data file was very large, and the conversion step was taking inordinately long (from several seconds on a laptop to over a minute on a large, busy server system. I've had a TODO comment in the operf.cpp code to investigate performing the conversion step in-flight (while profiling) versus waiting for the profiling to end. So I changed the operf-record process from writing the perf_events sample data to a file to writing to a pipe. Then I created a new operf-read process to read from the other end of that pipe and perform the conversion to oprofile format. Doing the conversion during the profiling run allowed me to identify the hot spots in the opreport output. I found several areas (especially involving the inefficiency of the operf_sfile hash algorithm) where improvements were needed. The conversion step now takes about 15% of the time it used to! A wonderful result! This patch has been committed to the perf-events branch, but review comments are welcome. Signed-off-by: Maynard Johnson <may...@us...> --- libperf_events/operf_counter.cpp | 214 ++++++++++++++++++---------------- libperf_events/operf_counter.h | 12 +- libperf_events/operf_event.h | 11 -- libperf_events/operf_sfile.cpp | 74 ++++++------ libperf_events/operf_sfile.h | 2 + libperf_events/operf_utils.cpp | 237 +++++++++++++------------------------- libperf_events/operf_utils.h | 12 +-- pe_profiling/operf.cpp | 168 ++++++++++++++++++++------- 8 files changed, 366 insertions(+), 364 deletions(-) diff --git a/libperf_events/operf_counter.cpp b/libperf_events/operf_counter.cpp index 1ceb012..5d1f016 100644 --- a/libperf_events/operf_counter.cpp +++ b/libperf_events/operf_counter.cpp @@ -36,6 +36,7 @@ using namespace OP_perf_utils; volatile bool quit; +volatile bool read_quit; int sample_reads; unsigned int pagesize; verbose vperf("perf_events"); @@ -121,17 +122,14 @@ int operf_counter::perf_event_open(pid_t ppid, int cpu, unsigned event, operf_re operf_record::~operf_record() { cverb << vperf << "operf_record::~operf_record()" << endl; - opHeader.data_size = total_bytes_recorded; - if (total_bytes_recorded) - write_op_header_info(); delete[] poll_data; - close(outputFile); + close(output_fd); samples_array.clear(); evts.clear(); perfCounters.clear(); } -operf_record::operf_record(string outfile, bool sys_wide, pid_t the_pid, bool pid_running, +operf_record::operf_record(int out_fd, bool sys_wide, pid_t the_pid, bool pid_running, vector<operf_event_t> & events, vmlinux_info_t vi, bool do_cg, bool separate_by_cpu) { @@ -154,14 +152,8 @@ operf_record::operf_record(string outfile, bool sys_wide, pid_t the_pid, bool pi if (system_wide && (pid != -1 || pid_started)) return; // object is not valid - opHeader.data_size = 0; - outputFile = open(outfile.c_str(), flags, S_IRUSR|S_IWUSR); - if (outputFile < 0) { - string errmsg = "Internal error: Could not create output file. errno is "; - errmsg += strerror(errno); - throw runtime_error(errmsg); - } - cverb << vperf << "operf_record ctor: successfully opened output file " << outfile << endl; + output_fd = out_fd; + cverb << vperf << "operf_record ctor using output fd " << output_fd << endl; memset(&sa, 0, sizeof(struct sigaction)); sa.sa_sigaction = op_perfrecord_sigusr1_handler; @@ -198,37 +190,25 @@ void operf_record::write_op_header_info() struct OP_file_header f_header; struct op_file_attr f_attr; - lseek(outputFile, sizeof(f_header), SEEK_SET); - - for (unsigned i = 0; i < evts.size(); i++) { - opHeader.h_attrs[i].id_offset = lseek(outputFile, 0, SEEK_CUR); - add_to_total(op_write_output(outputFile, &opHeader.h_attrs[i].ids[0], - opHeader.h_attrs[i].ids.size() * sizeof(u64))); - } + f_header.magic = OP_MAGIC; + f_header.size = sizeof(f_header); + f_header.attr_size = sizeof(f_attr); + f_header.attrs.size = evts.size() * sizeof(f_attr); + f_header.data.size = 0; - opHeader.attr_offset = lseek(outputFile, 0, SEEK_CUR); + add_to_total(op_write_output(output_fd, &f_header, sizeof(f_header))); for (unsigned i = 0; i < evts.size(); i++) { struct op_header_evt_info attr = opHeader.h_attrs[i]; f_attr.attr = attr.attr; - f_attr.ids.offset = attr.id_offset; - f_attr.ids.size =attr.ids.size() * sizeof(u64); - add_to_total(op_write_output(outputFile, &f_attr, sizeof(f_attr))); + f_attr.ids.size = attr.ids.size() * sizeof(u64); + add_to_total(op_write_output(output_fd, &f_attr, sizeof(f_attr))); } - opHeader.data_offset = lseek(outputFile, 0, SEEK_CUR); - - f_header.magic = OP_MAGIC; - f_header.size = sizeof(f_header); - f_header.attr_size = sizeof(f_attr); - f_header.attrs.offset = opHeader.attr_offset; - f_header.attrs.size = evts.size() * sizeof(f_attr); - f_header.data.offset = opHeader.data_offset; - f_header.data.size = opHeader.data_size; - - lseek(outputFile, 0, SEEK_SET); - add_to_total(op_write_output(outputFile, &f_header, sizeof(f_header))); - lseek(outputFile, opHeader.data_offset + opHeader.data_size, SEEK_SET); + for (unsigned i = 0; i < evts.size(); i++) { + add_to_total(op_write_output(output_fd, &opHeader.h_attrs[i].ids[0], + opHeader.h_attrs[i].ids.size() * sizeof(u64))); + } } int operf_record::prepareToRecord(int counter, int cpu, int fd) @@ -360,7 +340,7 @@ void operf_record::setup() closedir(dir); write_op_header_info(); if (pid_started || system_wide) { - if (op_record_process_info(system_wide, pid, this, outputFile) < 0) { + if (op_record_process_info(system_wide, pid, this, output_fd) < 0) { for (int i = 0; i < num_cpus; i++) { for (unsigned int evt = 0; evt < evts.size(); evt++) ioctl(perfCounters[i][evt].get_fd(), PERF_EVENT_IOC_DISABLE); @@ -368,7 +348,7 @@ void operf_record::setup() goto error; } } - op_record_kernel_info(vmlinux_file, kernel_start, kernel_end, outputFile, this); + op_record_kernel_info(vmlinux_file, kernel_start, kernel_end, output_fd, this); // Set bit to indicate we're set to go. valid = true; @@ -376,7 +356,7 @@ void operf_record::setup() error: delete[] poll_data; - close(outputFile); + close(output_fd); if (rc != OP_PERF_HANDLED_ERROR) throw runtime_error(err_msg); } @@ -409,15 +389,33 @@ void operf_record::recordPerfData(void) cverb << vperf << "operf_record::recordPerfData received signal to quit." << endl; } } + close(output_fd); cverb << vdebug << "operf recording finished." << endl; } -void operf_read::init(string infilename, string samples_loc, op_cpu cputype, vector<operf_event_t> & events) +void operf_read::init(int sample_data_pipe_fd, string samples_loc, op_cpu cputype, vector<operf_event_t> & events) { - inputFname = infilename; + struct sigaction sa; + sigset_t ss; + sample_data_fd = sample_data_pipe_fd; sampledir = samples_loc; evts = events; cpu_type = cputype; + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_sigaction = op_perfread_sigusr1_handler; + sigemptyset(&sa.sa_mask); + sigemptyset(&ss); + sigaddset(&ss, SIGUSR1); + sigprocmask(SIG_UNBLOCK, &ss, NULL); + sa.sa_mask = ss; + sa.sa_flags = SA_NOCLDSTOP | SA_SIGINFO; + cverb << vperf << "operf-read calling sigaction" << endl; + if (sigaction(SIGUSR1, &sa, NULL) == -1) { + cverb << vperf << "operf-read init: sigaction failed; errno is: " + << strerror(errno) << endl; + _exit(EXIT_FAILURE); + } + } operf_read::~operf_read() @@ -427,72 +425,66 @@ operf_read::~operf_read() int operf_read::readPerfHeader(void) { - int ret = 0; - - opHeader.data_size = 0; - istrm.open(inputFname.c_str(), ios_base::in); - if (!istrm.good()) { - return -1; - } - istrm.peek(); - if (istrm.eof()) { - cverb << vperf << "operf_read::readPerfHeader: Empty profile data file." << endl; - valid = false; - return OP_PERF_HANDLED_ERROR; - } - cverb << vperf << "operf_read: successfully opened input file " << inputFname << endl; - read_op_header_info_with_ifstream(); - valid = true; - cverb << vperf << "Successfully read perf header" << endl; - - return ret; -} - -void operf_read::read_op_header_info_with_ifstream(void) -{ struct OP_file_header fheader; - istrm.seekg(0, ios_base::beg); - - if (op_read_from_stream(istrm, (char *)&fheader, sizeof(fheader)) != sizeof(fheader)) { - throw runtime_error("Error: input file " + inputFname + " does not have enough data for header"); + string errmsg; + int num_fattrs; + size_t fattr_size; + vector<struct op_file_attr> f_attr_cache; + + errno = 0; + if (read(sample_data_fd, &fheader, sizeof(fheader)) != sizeof(fheader)) { + errmsg = "Error reading header on sample data pipe: " + string(strerror(errno)); + goto fail; } - if (memcmp(&fheader.magic, __op_magic, sizeof(fheader.magic))) - throw runtime_error("Error: input file " + inputFname + " does not have expected header data"); + if (memcmp(&fheader.magic, __op_magic, sizeof(fheader.magic))) { + errmsg = "Error: operf sample data does not have expected header data"; + goto fail; + } cverb << vperf << "operf magic number " << (char *)&fheader.magic << " matches expected __op_magic " << __op_magic << endl; - opHeader.attr_offset = fheader.attrs.offset; - opHeader.data_offset = fheader.data.offset; - opHeader.data_size = fheader.data.size; - size_t fattr_size = sizeof(struct op_file_attr); + fattr_size = sizeof(struct op_file_attr); if (fattr_size != fheader.attr_size) { - string msg = "Error: perf_events binary incompatibility. Event data collection was apparently " + errmsg = "Error: perf_events binary incompatibility. Event data collection was apparently " "performed under a different kernel version than current."; - throw runtime_error(msg); + goto fail; } - int num_fattrs = fheader.attrs.size/fheader.attr_size; + num_fattrs = fheader.attrs.size/fheader.attr_size; cverb << vperf << "num_fattrs is " << num_fattrs << endl; - istrm.seekg(opHeader.attr_offset, ios_base::beg); for (int i = 0; i < num_fattrs; i++) { struct op_file_attr f_attr; streamsize fattr_size = sizeof(f_attr); - if (op_read_from_stream(istrm, (char *)&f_attr, fattr_size) != fattr_size) - throw runtime_error("Error: Unexpected end of input file " + inputFname + "."); + if (read(sample_data_fd, (char *)&f_attr, fattr_size) != fattr_size) { + errmsg = "Error reading file attr on sample data pipe: " + string(strerror(errno)); + goto fail; + } opHeader.h_attrs[i].attr = f_attr.attr; - streampos next_f_attr = istrm.tellg(); + f_attr_cache.push_back(f_attr); + } + for (int i = 0; i < num_fattrs; i++) { + vector<struct op_file_attr>::iterator it = f_attr_cache.begin(); + struct op_file_attr f_attr = *(it); int num_ids = f_attr.ids.size/sizeof(u64); - istrm.seekg(f_attr.ids.offset, ios_base::beg); + for (int id = 0; id < num_ids; id++) { u64 perf_id; streamsize perfid_size = sizeof(perf_id); - if (op_read_from_stream(istrm, (char *)& perf_id, perfid_size) != perfid_size) - throw runtime_error("Error: Unexpected end of input file " + inputFname + "."); + if (read(sample_data_fd, (char *)& perf_id, perfid_size) != perfid_size) { + errmsg = "Error reading perf ID on sample data pipe: " + string(strerror(errno)); + goto fail; + } cverb << vperf << "Perf header: id = " << hex << (unsigned long long)perf_id << endl; opHeader.h_attrs[i].ids.push_back(perf_id); } - istrm.seekg(next_f_attr, ios_base::beg); + } - istrm.close(); + valid = true; + cverb << vperf << "Successfully read perf header" << endl; + return 0; + +fail: + cerr << errmsg; + return -1; } int operf_read::get_eventnum_by_perf_event_id(u64 id) const @@ -507,32 +499,50 @@ int operf_read::get_eventnum_by_perf_event_id(u64 id) const return -1; } +int operf_read::_get_one_perf_event(event_t * event) +{ + static size_t pe_header_size = sizeof(perf_event_header); + char * evt = (char *)event; + ssize_t num_read; + perf_event_header * header = (perf_event_header *)event; + errno = 0; + if ((num_read = read(sample_data_fd, header, pe_header_size)) < 0) { + cverb << vdebug << "Read 1 of sample data pipe returned with " << strerror(errno) << endl; + return -1; + } else if (num_read == 0) { + return -1; + } + evt += pe_header_size; + if (!header->size) + return -1; + if ((num_read = read(sample_data_fd, evt, header->size - pe_header_size)) < 0) { + cverb << vdebug << "Read 2 of sample data pipe returned with " << strerror(errno) << endl; + return -1; + } else if (num_read == 0) { + return -1; + } + return 0; +} + int operf_read::convertPerfData(void) { int num_bytes = 0; - struct mmap_info info; - info.file_data_offset = opHeader.data_offset; - info.file_data_size = opHeader.data_size; - info.traceFD = open(inputFname.c_str(), O_RDONLY); - if (op_mmap_trace_file(info) < 0) { - close(info.traceFD); - throw runtime_error("Error: Unable to mmap operf data file"); - } + // Allocate way more than enough space for a really big event with a long callchain + event_t * event = (event_t *)xmalloc(65536); - cverb << vdebug << "Converting operf.data to oprofile sample data format" << endl; - cverb << vdebug << "data size is " << hex << info.file_data_size << "; data offset is " << info.file_data_offset << endl; - cverb << vdebug << "head is " << hex << info.head << endl; + cverb << vdebug << "Converting operf data to oprofile sample data format" << endl; cverb << vdebug << "sample type is " << hex << opHeader.h_attrs[0].attr.sample_type << endl; first_time_processing = true; + memset(event, '\0', 65536); while (1) { streamsize rec_size = 0; - event_t * event = op_get_perf_event(info); - if (event == NULL) { + if (_get_one_perf_event(event) < 0) { break; } rec_size = event->header.size; - op_write_event(event, opHeader.h_attrs[0].attr.sample_type); + if (op_write_event(event, opHeader.h_attrs[0].attr.sample_type) < 0) + break; num_bytes += rec_size; } first_time_processing = false; @@ -548,12 +558,12 @@ int operf_read::convertPerfData(void) delete images_it++->second; all_images_map.clear(); - close(info.traceFD); char * cbuf; cbuf = (char *)xmalloc(operf_options::session_dir.length() + 5); strcpy(cbuf, operf_options::session_dir.c_str()); strcat(cbuf, "/abi"); op_write_abi_to_file(cbuf); free(cbuf); + free(event); return num_bytes; } diff --git a/libperf_events/operf_counter.h b/libperf_events/operf_counter.h index 984fc01..ce1198e 100644 --- a/libperf_events/operf_counter.h +++ b/libperf_events/operf_counter.h @@ -77,12 +77,12 @@ public: * For single app profiling, set sys_wide=false, the_pid=<processID-to-profile>, * and pid_running=true if profiling an already active process; otherwise false. */ - operf_record(std::string outfile, bool sys_wide, pid_t the_pid, bool pid_running, + operf_record(int output_pipe_fd, bool sys_wide, pid_t the_pid, bool pid_running, std::vector<operf_event_t> & evts, OP_perf_utils::vmlinux_info_t vi, bool callgraph, bool separate_by_cpu); ~operf_record(); void recordPerfData(void); - int out_fd(void) const { return outputFile; } + int out_fd(void) const { return output_fd; } void add_to_total(int n) { total_bytes_recorded += n; } int get_total_bytes_recorded(void) const { return total_bytes_recorded; } void register_perf_event_id(unsigned counter, u64 id, perf_event_attr evt_attr); @@ -93,7 +93,7 @@ private: void setup(void); int prepareToRecord(int counter, int cpu, int fd); void write_op_header_info(void); - int outputFile; + int output_fd; struct pollfd * poll_data; std::vector< std::vector<struct mmap_data> > samples_array; int num_cpus; @@ -115,7 +115,7 @@ private: class operf_read { public: operf_read(void) { valid = false; } - void init(std::string infile, std::string samples_dir, op_cpu cputype, + void init(int sample_data_pipe_fd, std::string samples_dir, op_cpu cputype, std::vector<operf_event_t> & evts); ~operf_read(); int readPerfHeader(void); @@ -125,14 +125,14 @@ public: inline const operf_event_t * get_event_by_counter(u32 counter) { return &evts[counter]; } private: - std::string inputFname; + int sample_data_fd; std::string sampledir; std::ifstream istrm; struct OP_header opHeader; std::vector<operf_event_t> evts; bool valid; op_cpu cpu_type; - void read_op_header_info_with_ifstream(void); + int _get_one_perf_event(event_t *); }; diff --git a/libperf_events/operf_event.h b/libperf_events/operf_event.h index 1513b0e..95feb9b 100644 --- a/libperf_events/operf_event.h +++ b/libperf_events/operf_event.h @@ -107,12 +107,6 @@ struct sample_data { struct ip_callchain * callchain; }; -struct mmap_info { - u64 offset, file_data_size, file_data_offset, head; - char * buf; - int traceFD; -}; - typedef struct operf_event { char name[OP_MAX_EVT_NAME_LEN]; @@ -130,7 +124,6 @@ typedef struct operf_event { } operf_event_t; struct op_file_section { - u64 offset; u64 size; }; @@ -142,7 +135,6 @@ struct op_file_attr { struct op_header_evt_info { struct perf_event_attr attr; std::vector<u64> ids; - off_t id_offset; }; struct OP_file_header { @@ -155,9 +147,6 @@ struct OP_file_header { struct OP_header { struct op_header_evt_info h_attrs[OP_MAX_NUM_EVENTS]; - off_t attr_offset; - off_t data_offset; - u64 data_size; }; /* Some of the above definitions were borrowed from the perf tool's util/event.h file. */ diff --git a/libperf_events/operf_sfile.cpp b/libperf_events/operf_sfile.cpp index cc71439..ada47e1 100644 --- a/libperf_events/operf_sfile.cpp +++ b/libperf_events/operf_sfile.cpp @@ -41,17 +41,7 @@ static unsigned long sfile_hash(struct operf_transient const * trans, struct operf_kernel_image * ki) { unsigned long val = 0; - size_t fname_len = strlen(trans->image_name); - /* fname_ptr will point at the first character in the image name - * which we'll use for hashing. We don't need to hash on the whole - * image name to get a decent hash. Arbitrarily, we'll hash on the - * first 16 chars or fname_len (if fname_len < 16). - */ - unsigned int fname_hash_len = (fname_len < 16) ? fname_len : 16; - const char * fname_ptr = trans->image_name + fname_len - fname_hash_len; - - val ^= trans->tid << 2; val ^= trans->tgid << 2; if (operf_options::separate_cpu) @@ -61,12 +51,28 @@ sfile_hash(struct operf_transient const * trans, struct operf_kernel_image * ki) val ^= ki->start >> 14; val ^= ki->end >> 7; } - for (unsigned int i = 0; i < fname_hash_len; i++) - val = ((val << 5) + val) ^ fname_ptr[i]; if (trans->is_anon) { val ^= trans->start_addr >> VMA_SHIFT; val ^= trans->end_addr >> (VMA_SHIFT + 1); + } else { + size_t fname_len = trans->app_len; + /* fname_ptr will point at the first character in the binary file's name + * which we'll use for hashing. We don't need to hash on the whole + * pathname to get a decent hash. Arbitrarily, we'll hash on the + * last 16 chars (or last fname_len chars if fname_len < 16). + */ + unsigned int fname_hash_len = (fname_len < 16) ? fname_len : 16; + const char * fname_ptr = trans->app_filename + fname_len - fname_hash_len; + for (unsigned int i = 0; i < fname_hash_len; i++) + val = ((val << 5) + val) ^ fname_ptr[i]; + + // Now do the same for image name + fname_len = trans->image_len; + fname_hash_len = (fname_len < 16) ? fname_len : 16; + fname_ptr = trans->image_name + fname_len - fname_hash_len; + for (unsigned int i = 0; i < fname_hash_len; i++) + val = ((val << 5) + val) ^ fname_ptr[i]; } return val & HASH_BITS; @@ -75,10 +81,11 @@ sfile_hash(struct operf_transient const * trans, struct operf_kernel_image * ki) static int do_match(struct operf_sfile const * sf, struct operf_kernel_image const * ki, - bool is_anon, const char * image_name, const char * appname, pid_t tgid, - pid_t tid, unsigned int cpu) + bool is_anon, const char * image_name, size_t image_len, + const char * appname, size_t app_len, + pid_t tgid, pid_t tid, unsigned int cpu) { - size_t len1, len2; + size_t shortest_image_len, shortest_app_len; /* this is a simplified check for "is a kernel image" AND * "is the right kernel image". Also handles no-vmlinux @@ -87,15 +94,13 @@ do_match(struct operf_sfile const * sf, struct operf_kernel_image const * ki, if (sf->kernel != ki) return 0; - if (sf->is_anon != is_anon) + if (sf->tid != tid || sf->tgid != tgid) return 0; - len1 = strlen(sf->app_filename); - len2 = strlen(appname); - if ((len1 != len2) || strncmp(sf->app_filename, appname, len1)) + if (sf->is_anon != is_anon) return 0; - if (sf->tid != tid || sf->tgid != tgid) + if ((sf->app_len != app_len) || (sf->image_len != image_len)) return 0; if (operf_options::separate_cpu) { @@ -106,28 +111,22 @@ do_match(struct operf_sfile const * sf, struct operf_kernel_image const * ki, if (ki) return 1; - return (!strncmp(sf->image_name, image_name, len1)); + shortest_image_len = sf->image_len < image_len ? sf->image_len : image_len; + if (strncmp(sf->image_name, image_name, shortest_image_len)) + return 0; -} - -static int -trans_match(struct operf_transient const * trans, struct operf_sfile const * sfile, - struct operf_kernel_image const * ki) -{ - return do_match(sfile, ki, - trans->is_anon, - trans->image_name, trans->app_filename, - trans->tgid, trans->tid, trans->cpu); + shortest_app_len = sf->app_len < app_len ? sf->app_len : app_len; + return !strncmp(sf->app_filename, appname, shortest_app_len); } - int operf_sfile_equal(struct operf_sfile const * sf, struct operf_sfile const * sf2) { return do_match(sf, sf2->kernel, sf2->is_anon, - sf2->image_name, sf2->app_filename, + sf2->image_name, sf2->image_len, + sf2->app_filename, sf2->app_len, sf2->tgid, sf2->tid, sf2->cpu); } @@ -143,13 +142,14 @@ create_sfile(unsigned long hash, struct operf_transient const * trans, sf = (operf_sfile *)xmalloc(sizeof(struct operf_sfile)); sf->hashval = hash; - sf->tid = trans->tid; sf->tgid = trans->tgid; sf->cpu = 0; sf->kernel = ki; sf->image_name = trans->image_name; sf->app_filename = trans->app_filename; + sf->image_len = trans->image_len; + sf->app_len = trans->app_len; sf->is_anon = trans->is_anon; sf->start_addr = trans->start_addr; sf->end_addr = trans->end_addr; @@ -196,7 +196,11 @@ struct operf_sfile * operf_sfile_find(struct operf_transient const * trans) hash = sfile_hash(trans, ki); list_for_each(pos, &hashes[hash]) { sf = list_entry(pos, struct operf_sfile, hash); - if (trans_match(trans, sf, ki)) { + if (do_match(sf, ki, + trans->is_anon, + trans->image_name, trans->image_len, + trans->app_filename, trans->app_len, + trans->tgid, trans->tid, trans->cpu)) { operf_sfile_get(sf); goto lru; } diff --git a/libperf_events/operf_sfile.h b/libperf_events/operf_sfile.h index 5ea02e5..9b91180 100644 --- a/libperf_events/operf_sfile.h +++ b/libperf_events/operf_sfile.h @@ -41,6 +41,7 @@ struct operf_sfile { unsigned long hashval; const char * image_name; const char * app_filename; + size_t image_len, app_len; /** thread ID, -1 if not set */ pid_t tid; /** thread group ID, -1 if not set */ @@ -89,6 +90,7 @@ struct operf_transient { vma_t pc; const char * image_name; const char * app_filename; + size_t image_len, app_len; vma_t last_pc; int event; u64 sample_id; diff --git a/libperf_events/operf_utils.cpp b/libperf_events/operf_utils.cpp index 4ff99c0..42eecce 100644 --- a/libperf_events/operf_utils.cpp +++ b/libperf_events/operf_utils.cpp @@ -38,6 +38,7 @@ extern verbose vmisc; extern volatile bool quit; +extern volatile bool read_quit; extern operf_read operfRead; extern int sample_reads; extern unsigned int pagesize; @@ -161,22 +162,26 @@ static void __handle_comm_event(event_t * event) return; } #endif - cverb << vperf << "PERF_RECORD_COMM for " << event->comm.comm << endl; + if (cverb << vperf) + cout << "PERF_RECORD_COMM for " << event->comm.comm << endl; map<pid_t, operf_process_info *>::iterator it; it = process_map.find(event->comm.pid); if (it == process_map.end()) { operf_process_info * proc = new operf_process_info(event->comm.pid, app_name ? app_name : event->comm.comm, app_name != NULL, true); - cverb << vperf << "Adding new proc info to collection for PID " << event->comm.pid << endl; + if (cverb << vperf) + cout << "Adding new proc info to collection for PID " << event->comm.pid << endl; process_map[event->comm.pid] = proc; } else { // sanity check -- should not get a second COMM event for same PID if (it->second->is_valid()) { - cverb << vperf << "Received extraneous COMM event for " << event->comm.comm - << ", PID " << event->comm.pid << endl; + if (cverb << vperf) + cout << "Received extraneous COMM event for " << event->comm.comm + << ", PID " << event->comm.pid << endl; } else { - cverb << vperf << "Processing deferred mappings" << endl; + if (cverb << vperf) + cout << "Processing deferred mappings" << endl; it->second->process_deferred_mappings(event->comm.comm); } } @@ -212,9 +217,11 @@ static void __handle_mmap_event(event_t * event) mapping->end_addr = (event->mmap.len == 0ULL)? 0ULL : mapping->start_addr + event->mmap.len - 1; mapping->pgoff = event->mmap.pgoff; - cverb << vperf << "PERF_RECORD_MMAP for " << event->mmap.filename << endl; - cverb << vperf << "\tstart_addr: " << hex << mapping->start_addr; - cverb << vperf << "; end addr: " << mapping->end_addr << endl; + if (cverb << vperf) { + cout << "PERF_RECORD_MMAP for " << event->mmap.filename << endl; + cout << "\tstart_addr: " << hex << mapping->start_addr; + cout << "; end addr: " << mapping->end_addr << endl; + } if (event->header.misc & PERF_RECORD_MISC_USER) all_images_map.insert(pair<string, struct operf_mmap *>(image_basename, mapping)); @@ -244,8 +251,9 @@ static void __handle_mmap_event(event_t * event) operf_process_info * proc = new operf_process_info(event->mmap.pid, app_name ? app_name : NULL, app_name != NULL, false); proc->add_deferred_mapping(mapping); - cverb << vperf << "Added deferred mapping " << event->mmap.filename - << " for new process_info object" << endl; + if (cverb << vperf) + cout << "Added deferred mapping " << event->mmap.filename + << " for new process_info object" << endl; process_map[event->mmap.pid] = proc; #ifdef _TEST_DEFERRED_MAPPING if (!do_comm_event) { @@ -255,8 +263,9 @@ static void __handle_mmap_event(event_t * event) #endif } else if (!it->second->is_valid()) { it->second->add_deferred_mapping(mapping); - cverb << vperf << "Added deferred mapping " << event->mmap.filename - << " for existing but incomplete process_info object" << endl; + if (cverb << vperf) + cout << "Added deferred mapping " << event->mmap.filename + << " for existing but incomplete process_info object" << endl; } else { it->second->process_new_mapping(mapping); } @@ -271,7 +280,8 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data) if (trans.tgid == data->pid) { proc = trans.cur_procinfo; - cverb << vmisc << "trans.tgid == data->pid : " << data->pid << endl; + if (cverb << vmisc) + cout << "trans.tgid == data->pid : " << data->pid << endl; } else { // Find operf_process info for data.tgid. @@ -330,9 +340,12 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data) op_mmap = proc->find_mapping_for_sample(data->ip); } if (op_mmap) { - cverb << vperf << "Found mmap for sample; image_name is " << op_mmap->filename << endl; + if (cverb << vperf) + cout << "Found mmap for sample; image_name is " << op_mmap->filename << endl; trans.image_name = op_mmap->filename; trans.app_filename = proc->get_app_name().c_str(); + trans.image_len = strlen(trans.image_name); + trans.app_len = strlen(trans.app_filename); trans.start_addr = op_mmap->start_addr; trans.end_addr = op_mmap->end_addr; trans.tgid = data->pid; @@ -367,7 +380,8 @@ static void __handle_callchain(u64 * array, struct sample_data * data) { data->callchain = (struct ip_callchain *) array; if (data->callchain->nr) { - cverb << vperf << "Processing callchain" << endl; + if (cverb << vperf) + cout << "Processing callchain" << endl; for (int i = 0; i < data->callchain->nr; i++) { data->ip = data->callchain->ips[i]; if (data->ip >= PERF_CONTEXT_MAX) { @@ -402,7 +416,7 @@ static void __handle_callchain(u64 * array, struct sample_data * data) static void __handle_sample_event(event_t * event, u64 sample_type) { struct sample_data data; - bool early_match = false; + bool found_trans = false; const struct operf_mmap * op_mmap = NULL; u64 *array = event->sample.array; @@ -457,9 +471,10 @@ static void __handle_sample_event(event_t * event, u64 sample_type) goto out; } - cverb << vperf << "(IP, " << event->header.misc << "): " << dec << data.pid << "/" - << data.tid << ": " << hex << (unsigned long long)data.ip - << endl << "\tdata ID: " << data.id << endl; + if (cverb << vperf) + cout << "(IP, " << event->header.misc << "): " << dec << data.pid << "/" + << data.tid << ": " << hex << (unsigned long long)data.ip + << endl << "\tdata ID: " << data.id << endl; // Verify the sample. trans.event = operfRead.get_eventnum_by_perf_event_id(data.id); @@ -471,42 +486,46 @@ static void __handle_sample_event(event_t * event, u64 sample_type) /* Check for the common case first -- i.e., where the current sample is from * the same context as the previous sample. For the "no-vmlinux" case, start_addr * and end_addr will be zero, so need to make sure we detect that. + * The last resort (and most expensive) is to call __get_operf_trans() if the + * sample cannot be matched up with a previous tran object. */ if (trans.in_kernel) { if (trans.image_name && trans.tgid == data.pid) { // For the no-vmlinux case . . . if ((trans.start_addr == 0ULL) && (trans.end_addr == 0ULL)) { trans.pc = data.ip; - early_match = true; + found_trans = true; // For the case where a vmlinux file is passed in . . . } else if (data.ip >= trans.start_addr && data.ip < trans.end_addr) { trans.pc = data.ip; - early_match = true; + found_trans = true; } } } else { if (trans.tgid == data.pid && data.ip >= trans.start_addr && data.ip < trans.end_addr) { trans.tid = data.tid; trans.pc = data.ip - trans.start_addr; - early_match = true; + found_trans = true; + } else if (__get_operf_trans(&data)) { + trans.current = operf_sfile_find(&trans); + found_trans = true; } + } - if (early_match || __get_operf_trans(&data)) { - trans.current = operf_sfile_find(&trans); - /* - * trans.current may be NULL if a kernel sample falls through - * the cracks, or if it's a sample from an anon region we couldn't find - */ - if (trans.current) { - /* log the sample or arc */ - operf_sfile_log_sample(&trans); + /* + * trans.current may be NULL if a kernel sample falls through + * the cracks, or if it's a sample from an anon region we couldn't find + */ + if (found_trans && trans.current) { + /* log the sample or arc */ + operf_sfile_log_sample(&trans); - update_trans_last(&trans); - if (sample_type & PERF_SAMPLE_CALLCHAIN) - __handle_callchain(array, &data); - goto out; - } + update_trans_last(&trans); + if (sample_type & PERF_SAMPLE_CALLCHAIN) + __handle_callchain(array, &data); + goto out; } + if (first_time_processing) { event_t * ev = (event_t *)xmalloc(event->header.size); memcpy(ev, event, event->header.size); @@ -519,17 +538,13 @@ out: /* This function is used by operf_read::convertPerfData() to convert perf-formatted - * data to oprofile sample data files. After the header information in the perf data file, - * the next piece of data is the PERF_RECORD_COMM record which tells us the name of the + * data to oprofile sample data files. After the header information in the perf sample data, + * the next piece of data is typically the PERF_RECORD_COMM record which tells us the name of the * application/command being profiled. This is followed by PERF_RECORD_MMAP records * which indicate what binary executables and libraries were mmap'ed into process memory * when profiling began. Additional PERF_RECORD_MMAP records may appear later in the data * stream (e.g., dlopen for single-process profiling or new process startup for system-wide - * profiling. For such additional PERF_RECORD_MMAP records, it's possible that samples - * may be collected and written to the perf data file for those mmap'ed binaries prior - * to the reception/recording of the corresponding PERF_RECORD_MMAP record, in which case, - * those samples are discarded and we increment the appropriate "lost sample" stat. - * TODO: What's the name of the stat we increment? + * profiling. */ int OP_perf_utils::op_write_event(event_t * event, u64 sample_type) { @@ -554,7 +569,6 @@ int OP_perf_utils::op_write_event(event_t * event, u64 sample_type) __handle_comm_event(event); return 0; case PERF_RECORD_EXIT: - cverb << vperf << "PERF_RECORD_EXIT found in trace" << endl; return 0; default: // OK, ignore all other header types. @@ -586,17 +600,11 @@ void OP_perf_utils::op_perfrecord_sigusr1_handler(int sig __attribute__((unused) quit = true; } -int OP_perf_utils::op_read_from_stream(ifstream & is, char * buf, streamsize sz) +void OP_perf_utils::op_perfread_sigusr1_handler(int sig __attribute__((unused)), + siginfo_t * siginfo __attribute__((unused)), + void *u_context __attribute__((unused))) { - int rc = 0; - is.read(buf, sz); - if (!is.eof() && is.fail()) { - cerr << "Internal error: Failed to read from input file." << endl; - rc = -1; - } else { - rc = is.gcount(); - } - return rc; + read_quit = true; } int OP_perf_utils::op_write_output(int output, void *buf, size_t size) @@ -606,7 +614,7 @@ int OP_perf_utils::op_write_output(int output, void *buf, size_t size) int ret = write(output, buf, size); if (ret < 0) { - string errmsg = "Internal error: Failed to write to output file. errno is "; + string errmsg = "Internal error: Failed to write sample data to pipe. errno is "; errmsg += strerror(errno); throw runtime_error(errmsg); } @@ -670,7 +678,8 @@ static void op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, o mmap.header.size = (sizeof(mmap) - (sizeof(mmap.filename) - size)); int num = OP_perf_utils::op_write_output(output_fd, &mmap, mmap.header.size); - cverb << vperf << "Created MMAP event for " << imagename << endl; + if (cverb << vperf) + cout << "Created MMAP event for " << imagename << endl; pr->add_to_total(num); } } @@ -769,11 +778,13 @@ out: op_record_process_exec_mmaps(pid, tgid, output_fd, pr); fclose(fp); - if (ret) + if (ret) { cverb << vperf << "couldn't get app name and tgid for pid " << dec << pid << " from /proc fs." << endl; - else - cverb << vperf << "Created COMM event for " << comm.comm << endl; + } else { + if (cverb << vperf) + cout << "Created COMM event for " << comm.comm << endl; + } return ret; } @@ -787,7 +798,8 @@ int OP_perf_utils::op_record_process_info(bool system_wide, pid_t pid, operf_rec int output_fd) { int ret; - cverb << vperf << "op_record_process_info" << endl; + if (cverb << vperf) + cout << "op_record_process_info" << endl; if (!system_wide) { ret = _record_one_process_info(pid, system_wide, pr, output_fd); } else { @@ -883,8 +895,9 @@ static void _record_module_info(int output_fd, operf_record * pr) mmap.header.size = (sizeof(mmap) - (sizeof(mmap.filename) - size)); int num = OP_perf_utils::op_write_output(output_fd, &mmap, mmap.header.size); - cverb << vperf << "Created MMAP event for " << module_name << ". Size: " - << module_size << "; start addr: " << start_address << endl; + if (cverb << vperf) + cout << "Created MMAP event for " << module_name << ". Size: " + << module_size << "; start addr: " << start_address << endl; pr->add_to_total(num); free(line); } @@ -917,8 +930,9 @@ void OP_perf_utils::op_record_kernel_info(string vmlinux_file, u64 start_addr, u mmap.header.size = (sizeof(mmap) - (sizeof(mmap.filename) - size)); int num = op_write_output(output_fd, &mmap, mmap.header.size); - cverb << vperf << "Created MMAP event for " <<mmap.filename << ". Size: " - << mmap.len << "; start addr: " << mmap.start << endl; + if (cverb << vperf) + cout << "Created MMAP event of size " << mmap.header.size << " for " <<mmap.filename << ". length: " + << mmap.len << "; start addr: " << mmap.start << endl; pr->add_to_total(num); _record_module_info(output_fd, pr); } @@ -965,101 +979,6 @@ void OP_perf_utils::op_get_kernel_event_data(struct mmap_data *md, operf_record } -static size_t mmap_size; -static size_t pg_sz; - -static int __mmap_trace_file(struct mmap_info & info) -{ - int mmap_prot = PROT_READ; - int mmap_flags = MAP_SHARED; - - info.buf = (char *) mmap(NULL, mmap_size, mmap_prot, - mmap_flags, info.traceFD, info.offset); - if (info.buf == MAP_FAILED) { - cerr << "Error: mmap failed with errno:\n\t" << strerror(errno) << endl; - return -1; - } - else { - cverb << vperf << hex << "mmap with the following parameters" << endl - << "\tinfo.head: " << info.head << endl - << "\tinfo.offset: " << info.offset << endl; - return 0; - } -} - -int OP_perf_utils::op_mmap_trace_file(struct mmap_info & info) -{ - u64 shift; - if (!pg_sz) - pg_sz = sysconf(_SC_PAGESIZE); - if (!mmap_size) { - if (MMAP_WINDOW_SZ > info.file_data_size) { - mmap_size = info.file_data_size; - } else { - mmap_size = MMAP_WINDOW_SZ; - } - } - info.offset = 0; - info.head = info.file_data_offset; - shift = pg_sz * (info.head / pg_sz); - info.offset += shift; - info.head -= shift; - return __mmap_trace_file(info); -} - -event_t * OP_perf_utils::op_get_perf_event(struct mmap_info & info) -{ - uint32_t size; - event_t * event; - - if (info.offset + info.head >= info.file_data_offset + info.file_data_size) - return NULL; - - if (!pg_sz) - pg_sz = sysconf(_SC_PAGESIZE); - -try_again: - event = (event_t *)(info.buf + info.head); - - if ((mmap_size != info.file_data_size) && - (((info.head + sizeof(event->header)) > mmap_size) || - (info.head + event->header.size > mmap_size))) { - int ret; - u64 shift = pg_sz * (info.head / pg_sz); - cverb << vperf << "Remapping perf data file" << endl; - ret = munmap(info.buf, mmap_size); - if (ret) { - string errmsg = "Internal error: munmap of perf data file failed with errno: "; - errmsg += strerror(errno); - throw runtime_error(errmsg); - } - - info.offset += shift; - info.head -= shift; - ret = __mmap_trace_file(info); - if (ret) { - string errmsg = "Internal error: mmap of perf data file failed with errno: "; - errmsg += strerror(errno); - throw runtime_error(errmsg); - } - goto try_again; - } - - size = event->header.size; - - // The tail end of the operf data file may be zero'ed out, so we assume if we - // find size==0, we're now in that area of the file, so we're done. - if (size == 0) - return NULL; - - info.head += size; - if (info.offset + info.head >= info.file_data_offset + info.file_data_size) - return NULL; - - return event; -} - - int OP_perf_utils::op_get_next_online_cpu(DIR * dir, struct dirent *entry) { #define OFFLINE 0x30 diff --git a/libperf_events/operf_utils.h b/libperf_events/operf_utils.h index 060ee2b..9359c82 100644 --- a/libperf_events/operf_utils.h +++ b/libperf_events/operf_utils.h @@ -38,12 +38,6 @@ extern bool no_vmlinux; #define OP_APPNAME_LEN 1024 -#if BITS_PER_LONG == 64 -#define MMAP_WINDOW_SZ ULLONG_MAX -#else -#define MMAP_WINDOW_SZ (32 * 1024 * 1024ULL) -#endif - extern unsigned int op_nr_counters; static inline size_t align_64bit(u64 x) @@ -64,12 +58,12 @@ void op_get_kernel_event_data(struct mmap_data *md, operf_record * pr); void op_perfrecord_sigusr1_handler(int sig __attribute__((unused)), siginfo_t * siginfo __attribute__((unused)), void *u_context __attribute__((unused))); +void op_perfread_sigusr1_handler(int sig __attribute__((unused)), + siginfo_t * siginfo __attribute__((unused)), + void *u_context __attribute__((unused))); int op_record_process_info(bool system_wide, pid_t pid, operf_record * pr, int output_fd); int op_write_output(int output, void *buf, size_t size); int op_write_event(event_t * event, u64 sample_type); -int op_read_from_stream(std::ifstream & is, char * buf, std::streamsize sz); -int op_mmap_trace_file(struct mmap_info & info); -event_t * op_get_perf_event(struct mmap_info & info); int op_get_next_online_cpu(DIR * dir, struct dirent *entry); bool op_convert_event_vals(std::vector<operf_event_t> * evt_vec); void op_reprocess_unresolved_events(u64 sample_type); diff --git a/pe_profiling/operf.cpp b/pe_profiling/operf.cpp index 95bf183..ee4e3ba 100644 --- a/pe_profiling/operf.cpp +++ b/pe_profiling/operf.cpp @@ -47,8 +47,10 @@ using namespace std; typedef enum END_CODE { ALL_OK = 0, - APP_ABNORMAL_END = -2, - PERF_RECORD_ERROR = -3 + APP_ABNORMAL_END = 1, + PERF_RECORD_ERROR = 2, + PERF_READ_ERROR = 4, + PERF_BOTH_ERROR = 8 } end_code_t; // Globals @@ -60,9 +62,10 @@ double cpu_speed; char op_samples_current_dir[PATH_MAX]; uint op_nr_counters; verbose vmisc("misc"); +static void convert_sample_data(void); +static int sample_data_pipe[2]; -#define DEFAULT_OPERF_OUTFILE "operf.data" #define CALLGRAPH_MIN_COUNT_SCALE 15 static char full_pathname[PATH_MAX]; @@ -72,10 +75,9 @@ static pid_t app_PID = -1; static pid_t jitconv_pid = -1; static bool app_started; static pid_t operf_pid; +static pid_t convert_pid; static string samples_dir; -static string outputfile; static bool startApp; -static bool reset_done = false; static char start_time_str[32]; static vector<operf_event_t> events; static bool jit_conversion_running; @@ -255,6 +257,8 @@ int start_profiling_app(void) perror("Internal error: fork failed"); _exit(EXIT_FAILURE); } else if (app_PID == 0) { // child process for exec'ing app + close(sample_data_pipe[0]); + close(sample_data_pipe[1]); run_app(); } // parent @@ -270,6 +274,7 @@ int start_profiling_app(void) return -1; } else if (operf_pid == 0) { // operf-record process int ready = 0; + close(sample_data_pipe[0]); /* * Since an informative message will be displayed to the user if * an error occurs, we don't want to blow chunks here; instead, we'll @@ -281,7 +286,7 @@ int start_profiling_app(void) vi.image_name = operf_options::vmlinux; vi.start = kernel_start; vi.end = kernel_end; - operf_record operfRecord(outputfile, operf_options::system_wide, app_PID, + operf_record operfRecord(sample_data_pipe[1], operf_options::system_wide, app_PID, (operf_options::pid == app_PID), events, vi, operf_options::callgraph, operf_options::separate_cpu); @@ -308,7 +313,7 @@ int start_profiling_app(void) // start recording operfRecord.recordPerfData(); - cerr << "Total bytes recorded from perf events: " + cverb << vmisc << "Total bytes recorded from perf events: " << dec << operfRecord.get_total_bytes_recorded() << endl; operfRecord.~operf_record(); @@ -319,9 +324,6 @@ int start_profiling_app(void) goto fail_out; } fail_out: - ofstream of; - of.open(outputfile.c_str(), ios_base::trunc); - of.close(); if (startApp && !ready){ /* ready==0 means we've not yet told parent we're ready, * but the parent is reading our pipe. So we tell the @@ -376,6 +378,10 @@ static end_code_t _kill_operf_pid(void) { int waitpid_status = 0; end_code_t rc = ALL_OK; + struct timeval tv; + long long start_time_sec; + long long usec_timer; + bool keep_trying = true; // stop operf-record process if (kill(operf_pid, SIGUSR1) < 0) { @@ -387,14 +393,67 @@ static end_code_t _kill_operf_pid(void) rc = PERF_RECORD_ERROR; } else { if (WIFEXITED(waitpid_status) && (!WEXITSTATUS(waitpid_status))) { - cverb << vdebug << "waitpid for operf-record process returned OK" << endl; + cverb << vdebug << "operf-record process returned OK" << endl; } else { cerr << "operf-record process ended abnormally: " - << WEXITSTATUS(waitpid_status) << endl; + << WEXITSTATUS(waitpid_status) << endl; rc = PERF_RECORD_ERROR; } } } + + // Now stop the operf-read process (aka "convert_pid") + waitpid_status = 0; + gettimeofday(&tv, NULL); + start_time_sec = tv.tv_sec; + usec_timer = tv.tv_usec; + /* We'll initially try the waitpid with WNOHANG once every 100,000 usecs. + * If it hasn't ended within 5 seconds, we'll kill it and do one + * final wait. + */ + while (keep_trying) { + int option = WNOHANG; + gettimeofday(&tv, NULL); + if (tv.tv_sec > start_time_sec + 5) { + keep_trying = false; + option = 0; + cerr << "now trying to kill convert pid..." << endl; + + if (kill(convert_pid, SIGUSR1) < 0) { + perror("Attempt to stop operf-read process failed"); + rc = rc ? PERF_BOTH_ERROR : PERF_READ_ERROR; + break; + } + } else { + /* If we exceed the 100000 usec interval or if the tv_usec + * value has rolled over to restart at 0, then we reset + * the usec_timer to current tv_usec and try waitpid. + */ + if ((tv.tv_usec % 1000000) > (usec_timer + 100000) + || (tv.tv_usec < usec_timer)) + usec_timer = tv.tv_usec; + else + continue; + } + if (waitpid(convert_pid, &waitpid_status, option) < 0) { + keep_trying = false; + if (errno != ECHILD) { + perror("waitpid for operf-read process failed"); + rc = rc ? PERF_BOTH_ERROR : PERF_READ_ERROR; + } + } else { + if (WIFEXITED(waitpid_status)) { + keep_trying = false; + if (!WEXITSTATUS(waitpid_status)) { + cverb << vdebug << "operf-read process returned OK" << endl; + } else { + cerr << "operf-read process ended abnormally. Status = " + << WEXITSTATUS(waitpid_status) << endl; + rc = rc ? PERF_BOTH_ERROR : PERF_READ_ERROR; + } + } + } + } return rc; } @@ -408,13 +467,40 @@ static end_code_t _run(void) sigfillset(&ss); sigprocmask(SIG_BLOCK, &ss, NULL); + // Create pipe to which operf-record process writes sample data and + // from which the operf-read process reads. + if (pipe(sample_data_pipe) < 0) { + perror("Internal error: operf-record could not create pipe"); + _exit(EXIT_FAILURE); + } + if (start_profiling_app() < 0) { return PERF_RECORD_ERROR; } // parent continues here if (startApp) cverb << vdebug << "app " << app_PID << " is running" << endl; + + /* If we're not doing system wide profiling and no app is started, then + * there's no profile data to convert. So if this condition is NOT true, + * then we'll do the convert. + */ + if (!(!app_started && !operf_options::system_wide)) { + convert_pid = fork(); + if (convert_pid < 0) { + perror("Internal error: fork failed"); + _exit(EXIT_FAILURE); + } else if (convert_pid == 0) { // child process + close(sample_data_pipe[1]); + convert_sample_data(); + } + // parent + close(sample_data_pipe[0]); + close(sample_data_pipe[1]); + } + set_signals(); + cout << "operf: Profiler started" << endl; if (startApp) { // User passed in command or program name to start cverb << vdebug << "going into waitpid on profiled app " << app_PID << endl; @@ -438,6 +524,7 @@ static end_code_t _run(void) rc = _kill_operf_pid(); } else { // User passed in --pid or --system-wide + cout << "operf: Press Ctl-c to stop profiling" << endl; cverb << vdebug << "going into waitpid on operf record process " << operf_pid << endl; if (waitpid(operf_pid, &waitpid_status, 0) < 0) { if (errno == EINTR) { @@ -453,7 +540,7 @@ static end_code_t _run(void) cverb << vdebug << "waitpid for operf-record process returned OK" << endl; } else if (WIFEXITED(waitpid_status)) { cerr << "operf-record process ended abnormally: " - << WEXITSTATUS(waitpid_status) << endl; + << WEXITSTATUS(waitpid_status) << endl; rc = PERF_RECORD_ERROR; } else if (WIFSIGNALED(waitpid_status)) { cerr << "operf-record process killed by signal " @@ -467,12 +554,10 @@ static end_code_t _run(void) static void cleanup(void) { - string cmd = "rm -f " + outputfile; free(app_name_SAVE); free(app_args); events.clear(); verbose_string.clear(); - system(cmd.c_str()); } static void _jitconv_complete(int val __attribute__((unused))) @@ -572,17 +657,17 @@ static int __delete_old_previous_sample_data(const char *fpath, } } -static void complete(void) +/* Read perf_events sample data written by the operf-record process + * through the sample_data_pipe and convert this to oprofile format + * sample files. + */ +static void convert_sample_data(void) { int rc; string current_sampledir = samples_dir + "/current/"; string previous_sampledir = samples_dir + "/previous"; current_sampledir.copy(op_samples_current_dir, current_sampledir.length(), 0); - if (!app_started && !operf_options::system_wide) { - cleanup(); - return; - } if (!operf_options::append) { int flags = FTW_DEPTH | FTW_ACTIONRETVAL; errno = 0; @@ -592,47 +677,40 @@ static void complete(void) if (errno) cerr << strerror(errno) << endl; cleanup(); - exit(1); + _exit(EXIT_FAILURE); } if (rename(current_sampledir.c_str(), previous_sampledir.c_str()) < 0) { if (errno && (errno != ENOENT)) { cerr << "Unable to move old profile data to " << previous_sampledir << endl; cerr << strerror(errno) << endl; cleanup(); - exit(1); + _exit(EXIT_FAILURE); } } - reset_done = true; } rc = mkdir(current_sampledir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); if (rc && (errno != EEXIST)) { cerr << "Error trying to create " << current_sampledir << " dir." << endl; perror("mkdir failed with"); - exit(EXIT_FAILURE); + _exit(EXIT_FAILURE); } - operfRead.init(outputfile, current_sampledir, cpu_type, events); + operfRead.init(sample_data_pipe[0], current_sampledir, cpu_type, events); if ((rc = operfRead.readPerfHeader()) < 0) { if (rc != OP_PERF_HANDLED_ERROR) - cerr << "Error: Cannot create read header info for sample file " << outputfile << endl; + cerr << "Error: Cannot create read header info for sample data " << endl; cleanup(); - exit(1); + _exit(EXIT_FAILURE); } - cverb << vdebug << "Successfully read header info for sample file " << outputfile << endl; - // TODO: We may want to do incremental conversion of the perf data, since the perf sample format - // is very inefficient to store. For example, using a simple test program that does many - // millions of memcpy's over a 12 second span of time, a profile taken via legacy oprofile, - // with --separate=all and --image=<app_name> requires ~300K of storage. Using the perf tool - // (not operf) to profile the same application creates an 18MB perf.data file!! + cverb << vdebug << "Successfully read header info for sample data " << endl; if (operfRead.is_valid()) { try { operfRead.convertPerfData(); - cerr << endl << "Use '--session-dir=" << operf_options::session_dir << "'" << endl - << "with opreport and other post-processing tools to view your profile data." - << endl; } catch (runtime_error e) { cerr << "Caught exception from operf_read::convertPerfData" << endl; cerr << e.what() << endl; + cleanup(); + _exit(EXIT_FAILURE); } } // Invoke opjitconv and set up a SIGCHLD signal for when it's done @@ -645,7 +723,8 @@ static void complete(void) if (jit_conversion_running) { kill(jitconv_pid, SIGKILL); } - cleanup(); + _exit(EXIT_SUCCESS); + } @@ -1072,7 +1151,6 @@ static void process_args(int argc, char const ** argv) } _process_session_dir(); - outputfile = samples_dir + "/" + DEFAULT_OPERF_OUTFILE; if (operf_options::evts.empty()) { // Use default event @@ -1137,6 +1215,7 @@ static void _precheck_permissions_to_samplesdir(string sampledir, bool for_curre cleanup(); exit(1); } + afile.close(); } @@ -1200,13 +1279,18 @@ int main(int argc, char const *argv[]) perror("Attempt to kill profiled app failed."); } } - if (run_result == PERF_RECORD_ERROR) { + if ((run_result == PERF_RECORD_ERROR) || (run_result == PERF_BOTH_ERROR)) { cerr << "Error running profiler" << endl; - exit(1); + } else if (run_result == PERF_READ_ERROR) { + cerr << "Error converting operf sample data to oprofile sample format" << endl; } else { cerr << "WARNING: Profile results may be incomplete due to to abend of profiled app." << endl; } + } else { + cout << endl << "Use '--session-dir=" << operf_options::session_dir << "'" << endl + << "with opreport and other post-processing tools to view your profile data." + << endl; } - complete(); - return 0; + cleanup(); + return run_result;; } -- 1.6.2.rc2 |