From: Maynard J. <may...@us...> - 2012-03-21 23:32:31
|
Add support for system-wide profiling to libperf_events and operf. While making the changes in libperf_events to support the system-wide profiling that the oprofile daemon will need to use, it seemed logical to add support for the --system-wide option in operf as an initial step to validate the functionality of libperf_events. Signed-off-by: Maynard Johnson <may...@us...> --- doc/operf.1.in | 24 ++++++++++++---- libperf_events/operf_counter.cpp | 21 +++++++++----- libperf_events/operf_counter.h | 8 +++++- libperf_events/operf_utils.cpp | 56 +++++++++++++++++++++++++++++++------- libperf_events/operf_utils.h | 2 +- pe_profiling/operf.cpp | 56 +++++++++++++++++++++++++------------ 6 files changed, 123 insertions(+), 44 deletions(-) diff --git a/doc/operf.1.in b/doc/operf.1.in index f283d25..5b7964c 100644 --- a/doc/operf.1.in +++ b/doc/operf.1.in @@ -9,17 +9,19 @@ operf \- Performance profiler tool for Linux [ .I options ] -[ --pid <pid> | [ command [ args ] ] ] +[ --system-wide | --pid <pid> | [ command [ args ] ] ] .SH DESCRIPTION Operf is a Linux tool that can be used in place of opcontrol for profiling. Operf uses Linux Performance Events Subsystem, and hence, does not need the opcontrol daemon to be running when running operf. More than that, opcontrol shouldn't be running when running operf. -.br -The profile data is saved in the file system directory (usually at -.I oprofile_data -in the current directory). +.P +By default, operf stores profiling data in <current_dir>/oprofile_data. You can change +this by way of the +.I --session-dir +option. +.P The usual post-profiling analysis tools such as .BI opreport(1) and @@ -34,8 +36,10 @@ The command or application to be profiled. .I args are the input arguments that the command or application requires. Either .I command -or +, .I --pid +or +.I --system-wide is required, but .B cannot be used simultaneously. @@ -46,6 +50,14 @@ This option enables operf to profile a running application. <PID> should be the process ID of the process you wish to profile. .br .TP +.BI "--system-wide / -s" +This option is for performing a system-wide profile. You must +have root authority to run operf in this mode. It is recommended +that when running operf with this option, the user's current working +directory should be /root or a subdirectory of /root to avoid +storing sample data files in locations accessible by regular users. +.br +.TP .BI "--verbose / -V [level]" This increases the verbosity of the output. Level could be: debug, perf_events, misc, all. diff --git a/libperf_events/operf_counter.cpp b/libperf_events/operf_counter.cpp index b83ad3f..c21a766 100644 --- a/libperf_events/operf_counter.cpp +++ b/libperf_events/operf_counter.cpp @@ -100,7 +100,6 @@ int operf_counter::perf_event_open(pid_t ppid, int cpu, unsigned event, operf_re ret = OP_PERF_HANDLED_ERROR; } else { cerr << "perf_event_open failed with " << strerror(errno) << endl; - cerr << "cpu is " << cpu << endl; } return ret; } @@ -127,23 +126,26 @@ operf_record::~operf_record() perfCounters.clear(); } -operf_record::operf_record(string outfile, pid_t the_pid, bool pid_running, +operf_record::operf_record(string outfile, bool sys_wide, pid_t the_pid, bool pid_running, vector<operf_event_t> & events, vmlinux_info_t vi) { int flags = O_CREAT|O_RDWR|O_TRUNC; struct sigaction sa; sigset_t ss; - vmlinux_file = vi.image_name; kernel_start = vi.start; kernel_end = vi.end; pid = the_pid; pid_started = pid_running; + system_wide = sys_wide; total_bytes_recorded = 0; poll_count = 0; evts = events; valid = false; + if (system_wide && (pid != -1 || pid_started)) + return; // object is not valid + opHeader.data_size = 0; outputFile = open(outfile.c_str(), flags, S_IRUSR|S_IWUSR); if (outputFile < 0) { @@ -257,9 +259,12 @@ void operf_record::setup() string err_msg; char cpus_online[129]; - cverb << vperf << "operf_record::setup() with pid_started = " << pid_started << endl; + if (system_wide) + cverb << vperf << "operf_record::setup() for system-wide profiling" << endl; + else + cverb << vperf << "operf_record::setup() with pid_started = " << pid_started << endl; - if (pid_started) { + if (!system_wide && pid_started) { /* We need to verify the existence of the passed PID before trying * perf_event_open or all hell will break loose. */ @@ -330,7 +335,7 @@ void operf_record::setup() perfCounters.push_back(tmp_pcvec); for (unsigned event = 0; event < evts.size(); event++) { evts[event].counter = event; - perfCounters[cpu].push_back(operf_counter(evts[event], !pid_started)); + perfCounters[cpu].push_back(operf_counter(evts[event], (!pid_started && !system_wide))); if ((rc = perfCounters[cpu][event].perf_event_open(pid, real_cpu, event, this)) < 0) { err_msg = "Internal Error. Perf event setup failed."; goto error; @@ -344,8 +349,8 @@ void operf_record::setup() if (!all_cpus_avail) closedir(dir); write_op_header_info(); - if (pid_started) { - if (op_record_process_info(pid, this, outputFile) < 0) { + if (pid_started || system_wide) { + if (op_record_process_info(system_wide, pid, this, outputFile) < 0) { for (int i = 0; i < num_cpus; i++) { for (unsigned int evt = 0; evt < evts.size(); evt++) ioctl(perfCounters[i][evt].get_fd(), PERF_EVENT_IOC_DISABLE); diff --git a/libperf_events/operf_counter.h b/libperf_events/operf_counter.h index f0e4ca4..20c5577 100644 --- a/libperf_events/operf_counter.h +++ b/libperf_events/operf_counter.h @@ -71,7 +71,11 @@ private: class operf_record { public: - operf_record(std::string outfile, pid_t the_pid, bool pid_running, + /* For system-wide profiling, set sys_wide=true, the_pid=-1, and pid_running=false. + * For single app profiling, set sys_wide=false, the_pid=<processID-to-profile>, + * and pid_running=true if profiling an already active process; otherwise false. + */ + operf_record(std::string outfile, bool sys_wide, pid_t the_pid, bool pid_running, std::vector<operf_event_t> & evts, OP_perf_utils::vmlinux_info_t vi); ~operf_record(); void recordPerfData(void); @@ -82,6 +86,7 @@ public: bool get_valid(void) { return valid; } private: + void create(std::string outfile, std::vector<operf_event_t> & evts); void setup(void); int prepareToRecord(int counter, int cpu, int fd); void write_op_header_info(void); @@ -91,6 +96,7 @@ private: int num_cpus; pid_t pid; bool pid_started; + bool system_wide; std::vector< std::vector<operf_counter> > perfCounters; int total_bytes_recorded; int poll_count; diff --git a/libperf_events/operf_utils.cpp b/libperf_events/operf_utils.cpp index 15f9363..8472e30 100644 --- a/libperf_events/operf_utils.cpp +++ b/libperf_events/operf_utils.cpp @@ -568,12 +568,8 @@ static void op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, o return; } -/* Obtain process information for an active process (where the user has - * passed in a process ID via the --pid option), and generate the - * necessary PERF_RECORD_COMM and PERF_RECORD_MMAP entries into the - * profile data stream. - */ -int OP_perf_utils::op_record_process_info(pid_t pid, operf_record * pr, int output_fd) +static int _record_one_process_info(pid_t pid, operf_record * pr, + int output_fd) { struct comm_event comm; char fname[PATH_MAX]; @@ -585,8 +581,6 @@ int OP_perf_utils::op_record_process_info(pid_t pid, operf_record * pr, int outp struct dirent dirent, *next; int ret = 0; - cverb << vperf << "op_record_process_info" << endl; - snprintf(fname, sizeof(fname), "/proc/%d/status", pid); fp = fopen(fname, "r"); if (fp == NULL) { @@ -628,7 +622,7 @@ int OP_perf_utils::op_record_process_info(pid_t pid, operf_record * pr, int outp if (tgid != pid) { // passed pid must have been a secondary thread comm.tid = pid; - int num = op_write_output(output_fd, &comm, comm.header.size); + int num = OP_perf_utils::op_write_output(output_fd, &comm, comm.header.size); pr->add_to_total(num); goto out; } @@ -650,7 +644,7 @@ int OP_perf_utils::op_record_process_info(pid_t pid, operf_record * pr, int outp comm.tid = pid; - int num = op_write_output(output_fd, &comm, comm.header.size); + int num = OP_perf_utils::op_write_output(output_fd, &comm, comm.header.size); pr->add_to_total(num); } closedir(tids); @@ -665,6 +659,48 @@ out: else cverb << vperf << "Created COMM event for " << comm.comm << endl; return ret; + +} + +/* Obtain process information for an active process (where the user has + * passed in a process ID via the --pid option) or all active processes + * (where system_wide==true). Then generate the necessary PERF_RECORD_COMM + * and PERF_RECORD_MMAP entries into the profile data stream. + */ +int OP_perf_utils::op_record_process_info(bool system_wide, pid_t pid, operf_record * pr, + int output_fd) +{ + int ret; + cverb << vperf << "op_record_process_info" << endl; + if (!system_wide) { + ret = _record_one_process_info(pid, pr, output_fd); + } else { + char buff[BUFSIZ]; + pid_t tgid = 0; + size_t size = 0; + DIR *pids; + struct dirent dirent, *next; + + pids = opendir("/proc"); + if (pids == NULL) { + cerr << "Unable to open /proc." << endl; + return -1; + } + + while (!readdir_r(pids, &dirent, &next) && next) { + char *end; + pid = strtol(dirent.d_name, &end, 10); + if (((errno == ERANGE && (pid == LONG_MAX || pid == LONG_MIN)) + || (errno != 0 && pid == 0)) || (end == dirent.d_name)) { + cverb << vmisc << "/proc entry " << dirent.d_name << " is not a PID" << endl; + continue; + } + if ((ret = _record_one_process_info(pid, pr, output_fd)) < 0) + break; + } + closedir(pids); + } + return ret; } void OP_perf_utils::op_record_kernel_info(string vmlinux_file, u64 start_addr, u64 end_addr, diff --git a/libperf_events/operf_utils.h b/libperf_events/operf_utils.h index 219704e..bc783c3 100644 --- a/libperf_events/operf_utils.h +++ b/libperf_events/operf_utils.h @@ -64,7 +64,7 @@ void op_get_kernel_event_data(struct mmap_data *md, operf_record * pr); void op_perfrecord_sigusr1_handler(int sig __attribute__((unused)), siginfo_t * siginfo __attribute__((unused)), void *u_context __attribute__((unused))); -int op_record_process_info(pid_t pid, operf_record * pr, int output_fd); +int op_record_process_info(bool system_wide, pid_t pid, operf_record * pr, int output_fd); int op_write_output(int output, void *buf, size_t size); int op_write_event(event_t * event); int op_read_from_stream(std::ifstream & is, char * buf, std::streamsize sz); diff --git a/pe_profiling/operf.cpp b/pe_profiling/operf.cpp index a20e2fd..e667d19 100644 --- a/pe_profiling/operf.cpp +++ b/pe_profiling/operf.cpp @@ -70,6 +70,7 @@ static pid_t operf_pid; static string samples_dir; static string outputfile; static bool startApp; +static bool reset_done = false; uint op_nr_counters; vector<operf_event_t> events; @@ -219,7 +220,7 @@ int start_profiling_app(void) { // The only process that should return from this function is the process // which invoked it. Any forked process must do _exit() rather than return(). - startApp = app_PID != operf_options::pid; + startApp = ((app_PID != operf_options::pid) && (operf_options::system_wide == false)); if (startApp) { if (pipe(app_ready_pipe) < 0 || pipe(start_app_pipe) < 0) { @@ -251,7 +252,7 @@ int start_profiling_app(void) vi.image_name = operf_options::vmlinux; vi.start = kernel_start; vi.end = kernel_end; - operf_record operfRecord(outputfile, app_PID, + operf_record operfRecord(outputfile, operf_options::system_wide, app_PID, (operf_options::pid == app_PID), events, vi); if (operfRecord.get_valid() == false) { /* If valid is false, it means that one of the "known" errors has @@ -267,6 +268,7 @@ int start_profiling_app(void) ofstream of; of.open(outputfile.c_str(), ios_base::trunc); of.close(); + cerr << "operf record init failed" << endl; cerr << "usage: operf [options] --pid=<PID> | appname [args]" << endl; // Exit with SUCCESS to avoid the unnecessary "operf-record process ended // abnormally" message @@ -322,7 +324,8 @@ int start_profiling_app(void) } } } - app_started = true; + if (!operf_options::system_wide) + app_started = true; // parent returns return 0; @@ -369,7 +372,8 @@ static end_code_t _run(void) return PERF_RECORD_ERROR; } // parent continues here - cverb << vdebug << "app " << app_PID << " is running" << endl; + if (startApp) + cverb << vdebug << "app " << app_PID << " is running" << endl; set_signals(); if (startApp) { // User passed in command or program name to start @@ -393,16 +397,16 @@ static end_code_t _run(void) } rc = _kill_operf_pid(); } else { - // User passed in --pid - cverb << vdebug << "going into waitpid on operf record process " << app_PID << endl; + // User passed in --pid or --system-wide + cverb << vdebug << "going into waitpid on operf record process " << operf_pid << endl; if (waitpid(operf_pid, &waitpid_status, 0) < 0) { if (errno == EINTR) { cverb << vdebug << "Caught ctrl-C. Killing operf-record process . . ." << endl; - _kill_operf_pid(); + rc = _kill_operf_pid(); } else { cerr << "waitpid errno is " << errno << endl; perror("waitpid for operf-record process failed"); - rc = APP_ABNORMAL_END; + rc = PERF_RECORD_ERROR; } } else { if (WIFEXITED(waitpid_status) && (!WEXITSTATUS(waitpid_status))) { @@ -410,7 +414,11 @@ static end_code_t _run(void) } else if (WIFEXITED(waitpid_status)) { cerr << "operf-record process ended abnormally: " << WEXITSTATUS(waitpid_status) << endl; - rc = APP_ABNORMAL_END; + rc = PERF_RECORD_ERROR; + } else if (WIFSIGNALED(waitpid_status)) { + cerr << "operf-record process killed by signal " + << WTERMSIG(waitpid_status) << endl; + rc = PERF_RECORD_ERROR; } } } @@ -446,7 +454,7 @@ static void complete(void) string current_sampledir = samples_dir + "/current/"; current_sampledir.copy(op_samples_current_dir, current_sampledir.length(), 0); - if (!app_started) { + if (!app_started && !operf_options::system_wide) { cleanup(); return; } @@ -461,8 +469,9 @@ static void complete(void) cleanup(); exit(1); } + reset_done = true; } - rc = mkdir(current_sampledir.c_str(), S_IRWXU); + rc = mkdir(current_sampledir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); if (rc && (errno != EEXIST)) { cerr << "Error trying to create " << current_sampledir << " dir." << endl; perror("mkdir failed with"); @@ -486,8 +495,14 @@ static void complete(void) try { operfRead.convertPerfData(); cerr << endl << "Use '--session-dir=" << operf_options::session_dir << "'" << endl - << "with opreport and other post processing tools to view your profile data." + << "with opreport and other post-processing tools to view your profile data." << endl; + if (operf_options::system_wide) + cerr << "\nNOTE: The system-wide profile you requested was collected " + "on a per-process basis." << endl + << "Adding '--merge=tgid' when using post-processing tools will make the output" + << endl << "more readable." << endl; + cerr << "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" << endl; } catch (runtime_error e) { cerr << "Caught exception from operf_read::convertPerfData" << endl; @@ -714,13 +729,13 @@ static void _process_session_dir(void) operf_options::session_dir +="/oprofile_data"; samples_dir = operf_options::session_dir + "/samples"; free(cwd); - rc = mkdir(operf_options::session_dir.c_str(), S_IRWXU); + rc = mkdir(operf_options::session_dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); if (rc && (errno != EEXIST)) { cerr << "Error trying to create " << operf_options::session_dir << " dir." << endl; perror("mkdir failed with"); exit(EXIT_FAILURE); } - rc = mkdir(samples_dir.c_str(), S_IRWXU); + rc = mkdir(samples_dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); if (rc && (errno != EEXIST)) { cerr << "Error trying to create " << samples_dir << " dir." << endl; perror("mkdir failed with"); @@ -893,10 +908,8 @@ static void process_args(int argc, char const ** argv) __print_usage_and_exit(NULL); app_PID = operf_options::pid; } else if (operf_options::system_wide) { - cerr << "The --system-wide option is not yet supported." << endl; - exit(EXIT_FAILURE); - } - else { + app_PID = -1; + } else { __print_usage_and_exit(NULL); } /* At this point, we know which of the three kinds of profiles the user requested: @@ -973,6 +986,11 @@ int main(int argc, char const *argv[]) cpu_type = op_get_cpu_type(); cpu_speed = op_cpu_frequency(); process_args(argc, argv); + uid_t uid = geteuid(); + if (operf_options::system_wide && uid != 0) { + cerr << "You must be root to do system-wide profiling." << endl; + exit(1); + } if (cpu_type == CPU_NO_GOOD) { cerr << "Unable to ascertain cpu type. Exiting." << endl; @@ -1002,5 +1020,7 @@ int main(int argc, char const *argv[]) } } complete(); + if (operf_options::reset && reset_done == false) + cerr << "Requested reset was not performed due to problem running operf command." << endl; return 0; } -- 1.7.1 |