From: John L. <le...@mo...> - 2005-04-20 00:40:17
|
On Tue, Apr 19, 2005 at 10:32:32PM +0100, John Levon wrote: > Here it is. Lightly tested what bits I could. Please try it out and > report back. Here's a newer one (more stuff moved over only) john Index: libutil++/Makefile.am =================================================================== RCS file: /cvsroot/oprofile/oprofile/libutil++/Makefile.am,v retrieving revision 1.18 diff -u -a -p -r1.18 Makefile.am --- libutil++/Makefile.am 12 Apr 2005 03:14:10 -0000 1.18 +++ libutil++/Makefile.am 20 Apr 2005 00:37:33 -0000 @@ -9,6 +9,8 @@ noinst_LIBRARIES = libutil++.a libutil___a_SOURCES = \ op_bfd.cpp \ op_bfd.h \ + bfd_support.cpp \ + bfd_support.h \ string_filter.cpp \ string_filter.h \ glob_filter.cpp \ Index: libutil++/bfd_support.cpp =================================================================== RCS file: libutil++/bfd_support.cpp diff -N libutil++/bfd_support.cpp --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ libutil++/bfd_support.cpp 20 Apr 2005 00:37:34 -0000 @@ -0,0 +1,532 @@ +/** + * @file bfd_support.cpp + * BFD muck we have to deal with. + * + * @remark Copyright 2005 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#include "bfd_support.h" + +#include "op_bfd.h" +#include "op_fileio.h" +#include "string_manip.h" +#include "cverb.h" + +#include <iostream> +#include <fstream> +#include <sstream> + +using namespace std; + +extern verbose vbfd; + +namespace { + + +void check_format(string const & file, bfd ** ibfd) +{ + if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) { + cverb << vbfd << "BFD format failure for " << file << endl; + bfd_close(*ibfd); + *ibfd = NULL; + } +} + + +bool separate_debug_file_exists(string const & name, unsigned long const crc) +{ + unsigned long file_crc = 0; + // The size of 2*1024 elements for the buffer is arbitrary. + char buffer[2*1024]; + + ifstream file(name.c_str()); + if (!file) + return false; + + cverb << vbfd << "found " << name; + while (file) { + file.read(buffer, sizeof(buffer)); + file_crc = calc_crc32(file_crc, + reinterpret_cast<unsigned char *>(&buffer[0]), + file.gcount()); + } + cverb << vbfd << " with crc32 = " << hex << file_crc << endl; + return crc == file_crc; +} + + +bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32) +{ + asection * sect; + + cverb << vbfd << "fetching .gnu_debuglink section" << endl; + sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink"); + + if (sect == NULL) + return false; + + bfd_size_type debuglink_size = bfd_section_size(ibfd, sect); + char contents[debuglink_size]; + cverb << vbfd + << ".gnu_debuglink section has size " << debuglink_size << endl; + + bfd_get_section_contents(ibfd, sect, + reinterpret_cast<unsigned char *>(contents), + static_cast<file_ptr>(0), debuglink_size); + + /* CRC value is stored after the filename, aligned up to 4 bytes. */ + size_t filename_len = strlen(contents); + size_t crc_offset = filename_len + 1; + crc_offset = (crc_offset + 3) & ~3; + + crc32 = bfd_get_32(ibfd, + reinterpret_cast<bfd_byte *>(contents + crc_offset)); + filename = string(contents, filename_len); + cverb << vbfd << ".gnu_debuglink filename is " << filename << endl; + return true; +} + + +/** + * With Objective C, we'll get strings like: + * + * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range + * + * for the symbol name, and: + * -[GSUnicodeString rangeOfCharacterFromSet:options:range:] + * + * for the function name, so we have to do some looser matching + * than for other languages (unfortunately, it's not possible + * to demangle Objective C symbols). + */ +bool objc_match(string const & sym, string const & method) +{ + if (method.length() < 3) + return false; + + string mangled; + + if (is_prefix(method, "-[")) { + mangled += "_i_"; + } else if (is_prefix(method, "+[")) { + mangled += "_c_"; + } else { + return false; + } + + string::const_iterator it = method.begin() + 2; + string::const_iterator const end = method.end(); + + bool found_paren = false; + + for (; it != end; ++it) { + switch (*it) { + case ' ': + mangled += '_'; + if (!found_paren) + mangled += '_'; + break; + case ':': + mangled += '_'; + break; + case ')': + case ']': + break; + case '(': + found_paren = true; + mangled += '_'; + break; + default: + mangled += *it; + } + } + + return sym == mangled; +} + + +/* + * With a binary image where some objects are missing debug + * info, we can end up attributing to a completely different + * function (#484660): bfd_nearest_line() will happily move from one + * symbol to the nearest one it can find with debug information. + * To mitigate this problem, we check that the symbol name + * matches the returned function name. + * + * However, this check fails in some cases it shouldn't: + * Objective C, and C++ static inline functions (as discussed in + * GCC bugzillla #11774). So, we have a looser check that + * accepts merely a substring, plus some magic for Objective C. + * + * If even the loose check fails, then we give up. + */ +bool is_correct_function(string const & function, string const & name) +{ + if (name == function) + return true; + + if (objc_match(name, function)) + return true; + + // warn the user if we had to use the loose check + if (name.find(function) != string::npos) { + static bool warned = false; + if (!warned) { + cerr << "warning: some functions compiled without " + << "debug information may have incorrect source " + << "line attributions" << endl; + warned = true; + } + cverb << vbfd << "is_correct_function(" << function << ", " + << name << ") fuzzy match." << endl; + return true; + } + + return false; +} + + +/* + * binutils 2.12 and below have a small bug where functions without a + * debug entry at the prologue start do not give a useful line number + * from bfd_find_nearest_line(). This can happen with certain gcc + * versions such as 2.95. + * + * We work around this problem by scanning forward for a vma with valid + * linenr info, if we can't get a valid line number. Problem uncovered + * by Norbert Kaufmann. The work-around decreases, on the tincas + * application, the number of failure to retrieve linenr info from 835 + * to 173. Most of the remaining are c++ inline functions mainly from + * the STL library. Fix #529622 + */ +void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms, + string const & name, bfd_vma pc, + char const ** filename, unsigned int * line) +{ + char const * cfilename; + char const * function; + unsigned int linenr; + + // FIXME: looking at debug info for all gcc version shows than + // the same problems can -perhaps- occur for epilog code: find a + // samples files with samples in epilog and try opreport -l -g + // on it, check it also with opannotate. + + // first restrict the search on a sensible range of vma, 16 is + // an intuitive value based on epilog code look + size_t max_search = 16; + size_t section_size = bfd_section_size(abfd, section); + if (pc + max_search > section_size) + max_search = section_size - pc; + + for (size_t i = 1; i < max_search; ++i) { + bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i, + &cfilename, &function, + &linenr); + + if (ret && cfilename && function && linenr != 0 + && is_correct_function(function, name)) { + *filename = cfilename; + *line = linenr; + return; + } + } +} + + +} // namespace anon + + +bfd * open_bfd(string const & file) +{ + /* bfd keeps its own reference to the filename char *, + * so it must have a lifetime longer than the ibfd */ + bfd * ibfd = bfd_openr(file.c_str(), NULL); + if (!ibfd) { + cverb << vbfd << "bfd_openr failed for " << file << endl; + return NULL; + } + + check_format(file, &ibfd); + + return ibfd; +} + + +bfd * fdopen_bfd(string const & file, int fd) +{ + /* bfd keeps its own reference to the filename char *, + * so it must have a lifetime longer than the ibfd */ + bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd); + if (!ibfd) { + cverb << vbfd << "bfd_openr failed for " << file << endl; + return NULL; + } + + check_format(file, &ibfd); + + return ibfd; +} + + +bool find_separate_debug_file(bfd * ibfd, string const & dir_in, + string const & global_in, string & filename) +{ + string dir(dir_in); + string global(global_in); + string basename; + unsigned long crc32; + + if (!get_debug_link_info(ibfd, basename, crc32)) + return false; + + if (dir.size() > 0 && dir.at(dir.size() - 1) != '/') + dir += '/'; + + if (global.size() > 0 && global.at(global.size() - 1) != '/') + global += '/'; + + cverb << vbfd << "looking for debugging file " << basename + << " with crc32 = " << hex << crc32 << endl; + + string first_try(dir + basename); + string second_try(dir + ".debug/" + basename); + + if (dir.size() > 0 && dir[0] == '/') + dir = dir.substr(1); + + string third_try(global + dir + basename); + + if (separate_debug_file_exists(first_try, crc32)) + filename = first_try; + else if (separate_debug_file_exists(second_try, crc32)) + filename = second_try; + else if (separate_debug_file_exists(third_try, crc32)) + filename = third_try; + else + return false; + + return true; +} + + +bool interesting_symbol(asymbol * sym) +{ + // #717720 some binutils are miscompiled by gcc 2.95, one of the + // typical symptom can be catched here. + if (!sym->section) { + ostringstream os; + os << "Your version of binutils seems to have a bug.\n" + << "Read http://oprofile.sf.net/faq/#binutilsbug\n"; + throw op_runtime_error(os.str()); + } + + if (!(sym->section->flags & SEC_CODE)) + return false; + + // returning true for fix up in op_bfd_symbol() + if (!sym->name || sym->name[0] == '\0') + return true; + + // C++ exception stuff + if (sym->name[0] == '.' && sym->name[1] == 'L') + return false; + + /* This case cannot be moved to boring_symbol(), + * because that's only used for duplicate VMAs, + * and sometimes this symbol appears at an address + * different from all other symbols. + */ + if (!strcmp("gcc2_compiled.", sym->name)) + return false; + + return true; +} + + +bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second) +{ + if (first.name() == "Letext") + return true; + else if (second.name() == "Letext") + return false; + + if (first.name().substr(0, 2) == "??") + return true; + else if (second.name().substr(0, 2) == "??") + return false; + + if (first.hidden() && !second.hidden()) + return true; + else if (!first.hidden() && second.hidden()) + return false; + + if (first.name()[0] == '_' && second.name()[0] != '_') + return true; + else if (first.name()[0] != '_' && second.name()[0] == '_') + return false; + + if (first.weak() && !second.weak()) + return true; + else if (!first.weak() && second.weak()) + return false; + + return false; +} + + +bool bfd_info::has_debug_info() const +{ + if (!valid()) + return false; + + for (asection const * sect = abfd->sections; sect; sect = sect->next) { + if (sect->flags & SEC_DEBUGGING) + return true; + } + + return false; +} + + +bfd_info::~bfd_info() +{ + close(); +} + + +void bfd_info::close() +{ + if (abfd) + bfd_close(abfd); +} + + +#if SYNTHESIZE_SYMBOLS +bool bfd_info::get_synth_symbols() +{ + extern const bfd_target bfd_elf64_powerpc_vec; + extern const bfd_target bfd_elf64_powerpcle_vec; + bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec) + || (abfd->xvec == &bfd_elf64_powerpcle_vec); + + if (!is_elf64_powerpc_target) + return false; + + void * mini_syms; + uint tmp; + long nr_mini_syms = bfd_read_minisymbols(abfd, 0, mini_syms, &tmp); + if (nr_mini_syms < 1) + return false; + + void * synth_syms; + long nr_synth_syms = bfd_get_synthetic_symtab(abfd, nr_mini_syms, 0, + NULL, synth_syms); + + if (nr_synth_syms < 0) + return false; + + nr_syms = nr_mini_syms + nr_synth_syms; + stored_syms.reset(new asymbol[nr_syms + 1]); + + for (size_t i = 0; i < nr_mini_syms; ++i) + stored_syms[i] = ((asymbol *)mini_syms)[i]; + + for (size_t i = 0; i < nr_synth_syms; ++i) + stored_syms[nr_mini_syms + i] = ((asymbol *)synth_syms)[i]; + + free(mini_syms); + free(synth_syms); + + // finally, make our normal symbols list + syms.reset(new asymbol *[nr_syms + 1]); + + for (size_t i = 0; i < nr_syms; ++i) + syms[i] = &stored_syms[i]; + + // bfd_canonicalize_symtab does this, so shall we + syms[nr_syms] = NULL; +} +#else +bool bfd_info::get_synth_symbols() +{ + return false; +} +#endif /* SYNTHESIZE_SYMBOLS */ + + +void bfd_info::get_symbols() +{ + if (!abfd) + return; + + if (get_synth_symbols()) + return; + + if (bfd_get_file_flags(abfd) & HAS_SYMS) + nr_syms = bfd_get_symtab_upper_bound(abfd); + + if (nr_syms < 1) + return; + + syms.reset(new asymbol *[nr_syms]); + + nr_syms = bfd_canonicalize_symtab(abfd, syms.get()); +} + + +linenr_info const +find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym, + unsigned int offset) +{ + char const * function = ""; + char const * cfilename = ""; + unsigned int linenr = 0; + linenr_info info; + + if (!b.valid()) + goto fail; + + // take care about artificial symbol + if (!sym.symbol()) + goto fail; + + bfd * abfd = b.abfd; + asymbol ** syms = b.syms.get(); + asection * section = sym.symbol()->section; + bfd_vma const pc = (sym.value() + offset) - sym.filepos(); + + if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0) + goto fail; + + if (pc >= bfd_section_size(abfd, section)) + goto fail; + + bool ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename, + &function, &linenr); + + if (!ret || !cfilename) + goto fail; + + if (!is_correct_function(function, sym.name())) + goto fail; + + if (linenr == 0) { + fixup_linenr(abfd, section, syms, sym.name(), + pc, &cfilename, &linenr); + } + + info.found = true; + info.filename = cfilename; + info.line = linenr; + return info; + +fail: + info.found = false; + info.filename.clear(); + info.line = 0; + return info; +} Index: libutil++/bfd_support.h =================================================================== RCS file: libutil++/bfd_support.h diff -N libutil++/bfd_support.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ libutil++/bfd_support.h 20 Apr 2005 00:37:34 -0000 @@ -0,0 +1,119 @@ +/** + * @file bfd_support.h + * BFD muck we have to deal with. + * + * @remark Copyright 2005 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef BFD_SUPPORT_H +#define BFD_SUPPORT_H + +#include "utility.h" +#include "op_types.h" + +#include <bfd.h> + +#include <string> + +class op_bfd_symbol; + +/// holder for BFD state we must keep +struct bfd_info { + bfd_info() : abfd(0), nr_syms(0) {} + + ~bfd_info(); + + /// close the BFD, setting abfd to NULL + void close(); + + /// return true if BFD is readable + bool valid() const { return abfd; } + + /// return true if BFD has debug info + bool has_debug_info() const; + + /// pick out the symbols from the bfd, if we can + void get_symbols(); + + /// the actual BFD + bfd * abfd; + /// normal symbols (includes synthesized symbols) + scoped_array<asymbol *> syms; + /// nr. symbols + size_t nr_syms; + +private: + /** + * PPC64 causes us no end of effort, because we have to create some + * synthetic symbols. And we need the lifetime of those to outlast our + * pointers to them, so we have to use the bfd_info thingy. + */ + bool get_synth_symbols(); + + /// symbols read if needed + scoped_array<asymbol> stored_syms; +}; + + +/* + * find_separate_debug_file - return true if a valid separate debug file found + * @param ibfd binary file + * @param dir_in directory holding the binary file + * @param global_in + * @param filename path to valid debug file + * + * Search order for debug file and use first one found: + * 1) dir_in directory + * 2) dir_in/.debug directory + * 3) global_in/dir_in directory + * + * Newer binutils and Linux distributions (e.g. Fedora) allow the + * creation of debug files that are separate from the binary. The + * debugging information is stripped out of the binary file, placed in + * this separate file, and a link to the new file is placed in the + * binary. The debug files hold the information needed by the debugger + * (and OProfile) to map machine instructions back to source code. + */ +extern bool +find_separate_debug_file(bfd * ibfd, + std::string const & dir_in, + std::string const & global_in, + std::string & filename); + +/// open the given BFD +bfd * open_bfd(std::string const & file); + +/// open the given BFD from the fd +bfd * fdopen_bfd(std::string const & file, int fd); + +/// Return true if the symbol is worth looking at +bool interesting_symbol(asymbol * sym); + +/** + * return true if the first symbol is less interesting than the second symbol + * boring symbol are eliminated when multiple symbol exist at the same vma + */ +bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second); + +/// debug info for a given pc +struct linenr_info { + /// did we find something? + bool found; + /// filename + std::string filename; + /// line number + unsigned int line; +}; + +/** + * Attempt to locate a filename + line number for the given symbol and + * offset. + */ +linenr_info const +find_nearest_line(bfd_info const & ibfd, op_bfd_symbol const & sym, + unsigned int offset); + +#endif /* !BFD_SUPPORT_H */ Index: libutil++/growable_vector.h =================================================================== RCS file: /cvsroot/oprofile/oprofile/libutil++/growable_vector.h,v retrieving revision 1.3 diff -u -a -p -r1.3 growable_vector.h --- libutil++/growable_vector.h 14 Apr 2005 03:28:17 -0000 1.3 +++ libutil++/growable_vector.h 20 Apr 2005 00:37:34 -0000 @@ -94,8 +94,8 @@ public: /// return true if all elements have the default constructed value bool zero() const { return std::find_if(container.begin(), container.end(), - std::bind2nd(std::not_equal_to<T>(), T( ))) - == container.end(); + std::bind2nd(std::not_equal_to<T>(), T())) + == container.end(); } private: Index: libutil++/op_bfd.cpp =================================================================== RCS file: /cvsroot/oprofile/oprofile/libutil++/op_bfd.cpp,v retrieving revision 1.65 diff -u -a -p -r1.65 op_bfd.cpp --- libutil++/op_bfd.cpp 13 Apr 2005 15:35:25 -0000 1.65 +++ libutil++/op_bfd.cpp 20 Apr 2005 00:37:35 -0000 @@ -13,241 +13,43 @@ #include "op_config.h" #include "config.h" -#include <sys/stat.h> -#include <sys/types.h> #include <fcntl.h> -#include <cerrno> -#include <cstring> #include <cstdlib> -#include <algorithm> #include <iostream> -#include <fstream> #include <iomanip> #include <sstream> -#include "op_exception.h" #include "op_bfd.h" -#include "string_manip.h" #include "string_filter.h" #include "stream_util.h" #include "cverb.h" -#include "op_fileio.h" using namespace std; -namespace { verbose vbfd("bfd"); -void check_format(string const & file, bfd ** ibfd) -{ - if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) { - cverb << vbfd << "BFD format failure for " << file << endl; - bfd_close(*ibfd); - *ibfd = NULL; - } -} - -/** - * With Objective C, we'll get strings like: - * - * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range - * - * for the symbol name, and: - * -[GSUnicodeString rangeOfCharacterFromSet:options:range:] - * - * for the function name, so we have to do some looser matching - * than for other languages (unfortunately, it's not possible - * to demangle Objective C symbols). - */ -bool objc_match(string const & sym, string const & method) -{ - if (method.length() < 3) - return false; - - string mangled; - - if (is_prefix(method, "-[")) { - mangled += "_i_"; - } else if (is_prefix(method, "+[")) { - mangled += "_c_"; - } else { - return false; - } - - string::const_iterator it = method.begin() + 2; - string::const_iterator const end = method.end(); - - bool found_paren = false; - - for (; it != end; ++it) { - switch (*it) { - case ' ': - mangled += '_'; - if (!found_paren) - mangled += '_'; - break; - case ':': - mangled += '_'; - break; - case ')': - case ']': - break; - case '(': - found_paren = true; - mangled += '_'; - break; - default: - mangled += *it; - } - } - - return sym == mangled; -} - -} // namespace anon - -bfd * open_bfd(string const & file) -{ - /* bfd keeps its own reference to the filename char *, - * so it must have a lifetime longer than the ibfd */ - bfd * ibfd = bfd_openr(file.c_str(), NULL); - if (!ibfd) { - cverb << vbfd << "bfd_openr failed for " << file << endl; - return NULL; - } - - check_format(file, &ibfd); - - return ibfd; -} - namespace { -bfd * fdopen_bfd(string const & file, int fd) -{ - /* bfd keeps its own reference to the filename char *, - * so it must have a lifetime longer than the ibfd */ - bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd); - if (!ibfd) { - cverb << vbfd << "bfd_openr failed for " << file << endl; - return NULL; - } - - check_format(file, &ibfd); - - return ibfd; -} - - -bool -separate_debug_file_exists(string const & name, - unsigned long const crc) -{ - unsigned long file_crc = 0; - // The size of 8*1024 element for the buffer is arbitrary. - char buffer[2*1024]; - - ifstream file(name.c_str()); - if (!file) - return false; +/// function object for filtering symbols to remove +struct remove_filter { + remove_filter(string_filter const & filter) + : filter_(filter) {} - cverb << vbfd << "found " << name; - while (file) { - file.read(buffer, sizeof(buffer)); - file_crc = calc_crc32(file_crc, - reinterpret_cast<unsigned char *>(&buffer[0]), - file.gcount()); + bool operator()(op_bfd_symbol const & symbol) { + return !filter_.match(symbol.name()); } - cverb << vbfd << " with crc32 = " << hex << file_crc << endl; - return crc == file_crc; -} + string_filter filter_; +}; -bool -get_debug_link_info(bfd * ibfd, - string & filename, - unsigned long & crc32) -{ - asection * sect; - - cverb << vbfd << "fetching .gnu_debuglink section" << endl; - sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink"); - - if (sect == NULL) - return false; - - bfd_size_type debuglink_size = bfd_section_size(ibfd, sect); - char contents[debuglink_size]; - cverb << vbfd - << ".gnu_debuglink section has size " << debuglink_size << endl; - - bfd_get_section_contents(ibfd, sect, - reinterpret_cast<unsigned char *>(contents), - static_cast<file_ptr>(0), debuglink_size); - - /* CRC value is stored after the filename, aligned up to 4 bytes. */ - size_t filename_len = strlen(contents); - size_t crc_offset = filename_len + 1; - crc_offset = (crc_offset + 3) & ~3; - - crc32 = bfd_get_32(ibfd, - reinterpret_cast<bfd_byte *>(contents + crc_offset)); - filename = string(contents, filename_len); - cverb << vbfd << ".gnu_debuglink filename is " << filename << endl; - return true; -} } // namespace anon -bool -find_separate_debug_file(bfd * ibfd, - string const & dir_in, - string const & global_in, - string & filename) -{ - string dir(dir_in); - string global(global_in); - string basename; - unsigned long crc32; - - if (!get_debug_link_info(ibfd, basename, crc32)) - return false; - - if (dir.size() > 0 && dir.at(dir.size() - 1) != '/') - dir += '/'; - - if (global.size() > 0 && global.at(global.size() - 1) != '/') - global += '/'; - - cverb << vbfd << "looking for debugging file " << basename - << " with crc32 = " << hex << crc32 << endl; - - string first_try(dir + basename); - string second_try(dir + ".debug/" + basename); - - if (dir.size() > 0 && dir[0] == '/') - dir = dir.substr(1); - - string third_try(global + dir + basename); - - if (separate_debug_file_exists(first_try, crc32)) - filename = first_try; - else if (separate_debug_file_exists(second_try, crc32)) - filename = second_try; - else if (separate_debug_file_exists(third_try, crc32)) - filename = third_try; - else - return false; - - return true; -} - - op_bfd_symbol::op_bfd_symbol(asymbol const * a) : bfd_symbol(a), symb_value(a->value), section_filepos(a->section->filepos), @@ -277,16 +79,19 @@ op_bfd_symbol::op_bfd_symbol(bfd_vma vma } +bool op_bfd_symbol::operator<(op_bfd_symbol const & rhs) const +{ + return filepos() < rhs.filepos(); +} + + op_bfd::op_bfd(string const & archive, string const & fname, string_filter const & symbol_filter, bool & ok) : filename(fname), archive_path(archive), file_size(-1), - ibfd(0), - dbfd(0), - text_offset(0), - prev_total_symcount(0) + text_offset(0) { int fd; struct stat st; @@ -319,16 +124,16 @@ op_bfd::op_bfd(string const & archive, s file_size = st.st_size; - ibfd = fdopen_bfd(image_path, fd); + ibfd.abfd = fdopen_bfd(image_path, fd); - if (!ibfd) { + if (!ibfd.valid()) { cverb << vbfd << "fdopen_bfd failed for " << image_path << endl; ok = false; goto out_fail; } // find the first text section and use that as text_offset - for (sect = ibfd->sections; sect; sect = sect->next) { + for (sect = ibfd.abfd->sections; sect; sect = sect->next) { if (sect->flags & SEC_CODE) { text_offset = sect->filepos; io_state state(cverb << vbfd); @@ -344,12 +149,8 @@ out: add_symbols(symbols, symbol_filter); return; out_fail: - if (ibfd) - bfd_close(ibfd); - ibfd = NULL; - if (dbfd) - bfd_close(dbfd); - dbfd = NULL; + ibfd.close(); + dbfd.close(); // make the fake symbol fit within the fake file file_size = -1; goto out; @@ -358,219 +159,12 @@ out_fail: op_bfd::~op_bfd() { - if (ibfd) - bfd_close(ibfd); - if (dbfd) - bfd_close(dbfd); -} - - -bool op_bfd_symbol::operator<(op_bfd_symbol const & rhs) const -{ - return filepos() < rhs.filepos(); -} - -namespace { - -/** - * Return true if the symbol is worth looking at - */ -bool interesting_symbol(asymbol * sym) -{ - // #717720 some binutils are miscompiled by gcc 2.95, one of the - // typical symptom can be catched here. - if (!sym->section) { - ostringstream os; - os << "Your version of binutils seems to have a bug.\n" - << "Read http://oprofile.sf.net/faq/#binutilsbug\n"; - throw op_runtime_error(os.str()); - } - - if (!(sym->section->flags & SEC_CODE)) - return false; - - // returning true for fix up in op_bfd_symbol() - if (!sym->name || sym->name[0] == '\0') - return true; - - // C++ exception stuff - if (sym->name[0] == '.' && sym->name[1] == 'L') - return false; - - /* This case cannot be moved to boring_symbol(), - * because that's only used for duplicate VMAs, - * and sometimes this symbol appears at an address - * different from all other symbols. - */ - if (!strcmp("gcc2_compiled.", sym->name)) - return false; - - return true; -} - -/** - * return true if the first symbol is less interesting than the second symbol - * boring symbol are eliminated when multiple symbol exist at the same vma - */ -bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second) -{ - if (first.name() == "Letext") - return true; - else if (second.name() == "Letext") - return false; - - if (first.name().substr(0, 2) == "??") - return true; - else if (second.name().substr(0, 2) == "??") - return false; - - if (first.hidden() && !second.hidden()) - return true; - else if (!first.hidden() && second.hidden()) - return false; - - if (first.name()[0] == '_' && second.name()[0] != '_') - return true; - else if (first.name()[0] != '_' && second.name()[0] == '_') - return false; - - if (first.weak() && !second.weak()) - return true; - else if (!first.weak() && second.weak()) - return false; - - return false; -} - - -/// function object for filtering symbols to remove -struct remove_filter { - remove_filter(string_filter const & filter) - : filter_(filter) {} - - bool operator()(op_bfd_symbol const & symbol) { - return !filter_.match(symbol.name()); - } - - string_filter filter_; -}; - - -} // namespace anon - - -#if SYNTHESIZE_SYMBOLS - -uint op_bfd::process_symtab(bfd * ibfd, size_t start) -{ - extern const bfd_target bfd_elf64_powerpc_vec; - extern const bfd_target bfd_elf64_powerpcle_vec; - bool is_elf64_powerpc_target = (ibfd->xvec == &bfd_elf64_powerpc_vec) - || (ibfd->xvec == &bfd_elf64_powerpcle_vec); - - if (!is_elf64_powerpc_target) - return bfd_canonicalize_symtab(ibfd, bfd_syms.get() + start); - - void * minisyms; - uint minisym_count = 0; - uint size; - - minisym_count = bfd_read_minisymbols(ibfd, 0, &minisyms, &size); - if (minisym_count < 1) - return 0; - - asymbol ** mysyms; - asymbol * synthsyms; - long synth_count; - - mysyms = (asymbol **)minisyms; - synth_count = bfd_get_synthetic_symtab(ibfd, minisym_count, mysyms, - 0, NULL, &synthsyms); - - /* synth_count will be zero for binaries that already have - * dot symbols, so that's a valid return value that's handled - * by the code below, But if synth_count is < 0, this indicates - * an error, so we return immediately. - */ - if (synth_count < 0) - return 0; - - uint cur_symcount = (uint) synth_count + minisym_count; - - asymbol ** symp; - asymbol ** new_mini; - - new_mini = (asymbol **) malloc((cur_symcount + 1) * sizeof(*symp)); - symp = new_mini; - memcpy(symp, minisyms, minisym_count * sizeof(*symp)); - symp += minisym_count; - - for (long i = 0; i < synth_count; i++) - *symp++ = synthsyms + i; - - scoped_array<asymbol *> synth_syms; - synth_syms.reset(new asymbol * [cur_symcount + start]); - - for (symbol_index_t i = 0; i < start; i++) - synth_syms[i + cur_symcount] = bfd_syms[i]; - - for (symbol_index_t i = start; i < start + cur_symcount; i++) - synth_syms[i] = new_mini[i]; - - bfd_syms.swap(synth_syms); - free(new_mini); - - free(minisyms); - - prev_total_symcount = cur_symcount; - - return cur_symcount; - -} - -#else -uint op_bfd::process_symtab(bfd * ibfd, size_t start) -{ - return bfd_canonicalize_symtab(ibfd, bfd_syms.get() + start); -} -#endif - - -void op_bfd::get_symbols_from_file(bfd * ibfd, size_t start, - op_bfd::symbols_found_t & symbols, bool debug_file) -{ - uint nr_all_syms; - - if (prev_total_symcount) - start = prev_total_symcount; - nr_all_syms = process_symtab(ibfd, start); - if (nr_all_syms < 1) - return; - - for (symbol_index_t i = start; i < start + nr_all_syms; i++) { - if (interesting_symbol(bfd_syms[i])) { - // need to use filepos of original file for debug - // file symbs - if (debug_file) - // FIXME: this is not enough, we must get the - // offset where this symbol live in the - // original file. - bfd_syms[i]->section->filepos = text_offset; - symbols.push_back(op_bfd_symbol(bfd_syms[i])); - } - } - } void op_bfd::get_symbols(op_bfd::symbols_found_t & symbols) { - size_t size; - size_t size_binary = 0; - size_t size_debug = 0; - - if (bfd_get_file_flags(ibfd) & HAS_SYMS) - size_binary = bfd_get_symtab_upper_bound(ibfd); + ibfd.get_symbols(); // On separate debug file systems, the main bfd has no symbols, // so even for non -g reports, we want to process the dbfd. @@ -578,22 +172,23 @@ void op_bfd::get_symbols(op_bfd::symbols // have much choice at the moment. has_debug_info(); - if (dbfd && (bfd_get_file_flags(dbfd) & HAS_SYMS)) - size_debug += bfd_get_symtab_upper_bound(dbfd); - - size = size_binary + size_debug; - - /* HAS_SYMS can be set with no symbols */ - if (size < 1) - return; + dbfd.get_symbols(); - bfd_syms.reset(new asymbol *[size]); + size_t i; + for (i = 0; i < ibfd.nr_syms; i++) { + if (interesting_symbol(ibfd.syms[i])) + symbols.push_back(op_bfd_symbol(ibfd.syms[i])); + } + + for (i = 0; i < dbfd.nr_syms; i++) { + // need to use filepos of original file for debug + // file symbols. FIXME: this is not enough, we must get the + // offset where this symbol live in the original file. + dbfd.syms[i]->section->filepos = text_offset; - if (size_binary > 0) - get_symbols_from_file(ibfd, 0, symbols, false); - - if (size_debug > 0) - get_symbols_from_file(dbfd, size_binary, symbols, true); + if (interesting_symbol(dbfd.syms[i])) + symbols.push_back(op_bfd_symbol(dbfd.syms[i])); + } symbols.sort(); @@ -659,7 +254,7 @@ unsigned long op_bfd::sym_offset(symbol_ bfd_vma op_bfd::offset_to_pc(bfd_vma offset) const { - asection const * sect = ibfd->sections; + asection const * sect = ibfd.abfd->sections; for (; sect; sect = sect->next) { if (offset >= bfd_vma(sect->filepos) && @@ -677,30 +272,22 @@ bool op_bfd::has_debug_info() const if (debug_info.cached()) return debug_info.get(); - if (!ibfd) + if (!ibfd.valid()) return debug_info.reset(true); - asection const * sect; - - for (sect = ibfd->sections; sect; sect = sect->next) { - if (sect->flags & SEC_DEBUGGING) - return debug_info.reset(true); - } + if (ibfd.has_debug_info()) + return debug_info.reset(true); // check to see if there is an .debug file string const global(archive_path + DEBUGDIR); string const image_path = archive_path + filename; string const dirname(image_path.substr(0, image_path.rfind('/'))); - if (find_separate_debug_file(ibfd, dirname, global, debug_filename)) { + if (find_separate_debug_file(ibfd.abfd, dirname, global, debug_filename)) { cverb << vbfd << "now loading: " << debug_filename << endl; - dbfd = open_bfd(debug_filename); - if (dbfd) { - for (sect = dbfd->sections; sect; sect = sect->next) { - if (sect->flags & SEC_DEBUGGING) - return debug_info.reset(true); - } - } + dbfd.abfd = open_bfd(debug_filename); + if (dbfd.has_debug_info()) + return debug_info.reset(true); } // .debug is optional, so will not fail if there's a problem @@ -714,140 +301,22 @@ bool op_bfd::has_debug_info() const bool op_bfd::get_linenr(symbol_index_t sym_idx, unsigned int offset, string & source_filename, unsigned int & linenr) const { - linenr = 0; - if (!has_debug_info()) return false; - char const * functionname; - char const * cfilename = ""; - bfd_vma pc; - - op_bfd_symbol const & sym = syms[sym_idx]; - - // take care about artificial symbol - if (sym.symbol() == 0) - return false; - - // Section symbols with no name are problematic for - // some versions of BFD, so we'll skip the unnecessary - // attempt to find a line number for a section symbol - if ((sym.name().substr(0,2) == "??") && - (sym.symbol()->flags & BSF_SECTION_SYM)) - return false; - - asection * section = sym.symbol()->section; - - if ((bfd_get_section_flags(ibfd, section) & SEC_ALLOC) == 0) - return false; - - pc = sym_offset(sym_idx, offset) + sym.value(); - // FIXME: to test, I'm unsure if from this point we must use abfd // or the check if (pc >= bfd_section_size(abfd, section)) must be done // with ibfd. - bfd * abfd = dbfd ? dbfd : ibfd; - - if (pc >= bfd_section_size(abfd, section)) - return false; - - bool ret = bfd_find_nearest_line(abfd, section, bfd_syms.get(), pc, - &cfilename, &functionname, &linenr); + bfd_info const & b = dbfd.valid() ? dbfd : ibfd; - if (cfilename == 0 || !ret) { - cfilename = ""; - linenr = 0; - ret = false; - } - - // functionname and symbol name can be different if we accept it we - // can get samples for the wrong symbol (#484660) - // Note this break static inline function, since for these functions we - // get a different symbol name than symbol name but we recover later. - if (ret && functionname && sym.name() != string(functionname)) { - // gcc doesn't emit mangled name for C++ static function so we - // try to recover by accepting this linenr info if functionname - // is a substring of sym.name, this is not a bug see gcc - // bugzilla #11774. Check against the filename part of the - // is error prone error (e.g. namespace A { static int f1(); }) - // so we check only for a substring and warn the user. - static bool warned = false; - if (!warned) { - // FIXME: enough precise message ? We will get this - // message for static C++ function too, must we - // warn only if the following check fails ? - cerr << "warning: \"" << get_filename() << "\" some " - << "functions compiled without debug information " - << "may have incorrect source line attributions" - << endl; - warned = true; - } - if (sym.name().find(functionname) == string::npos) - ret = false; - if (!ret) - ret = objc_match(sym.name(), functionname); - } - - /* binutils 2.12 and below have a small bug where functions without a - * debug entry at the prologue start do not give a useful line number - * from bfd_find_nearest_line(). This can happen with certain gcc - * versions such as 2.95. - * - * We work around this problem by scanning forward for a vma with - * valid linenr info, if we can't get a valid line number. - * Problem uncovered by Norbert Kaufmann. The work-around decreases, - * on the tincas application, the number of failure to retrieve linenr - * info from 835 to 173. Most of the remaining are c++ inline functions - * mainly from the STL library. Fix #529622 - */ - if (linenr == 0) { - // FIXME: looking at debug info for all gcc version shows - // than the same problems can -perhaps- occur for epilog code: - // find a samples files with samples in epilog and try oreport - // -l -g on it, check it also with opannotate. - - // first restrict the search on a sensible range of vma, - // 16 is an intuitive value based on epilog code look - size_t max_search = 16; - size_t section_size = bfd_section_size(abfd, section); - if (pc + max_search > section_size) - max_search = section_size - pc; - - for (size_t i = 1 ; i < max_search ; ++i) { - bool ret = bfd_find_nearest_line(abfd, section, - bfd_syms.get(), pc+i, - &cfilename, - &functionname, - &linenr); - - if (ret && functionname && linenr != 0 - && sym.name() == string(functionname)) { - return true; - } - } - - // We lose it's pointless to try more. - - // bfd_find_nearest_line clobber the memory pointed by filename - // from a previous call when the filename change across - // multiple calls. The more easy way to recover is to reissue - // the first call, we don't need to recheck return value, we - // know that the call will succeed. - // As mentioned above a previous work-around break static - // inline function. We recover here by not checking than - // functionname == sym.name - bfd_find_nearest_line(abfd, section, bfd_syms.get(), pc, - &cfilename, &functionname, &linenr); - } + linenr_info const info = find_nearest_line(b, syms[sym_idx], offset); - if (cfilename) { - source_filename = cfilename; - } else { - source_filename = ""; - linenr = 0; - } + if (!info.found) + return false; - return ret; + source_filename = info.filename; + linenr = info.line; + return true; } @@ -946,8 +415,8 @@ string op_bfd::get_filename() const size_t op_bfd::bfd_arch_bits_per_address() const { - if (ibfd) - return ::bfd_arch_bits_per_address(ibfd); + if (ibfd.valid()) + return ::bfd_arch_bits_per_address(ibfd.abfd); // FIXME: this function should be called only if the underlined ibfd // is ok, must we throw ? return sizeof(bfd_vma); Index: libutil++/op_bfd.h =================================================================== RCS file: /cvsroot/oprofile/oprofile/libutil++/op_bfd.h,v retrieving revision 1.36 diff -u -a -p -r1.36 op_bfd.h --- libutil++/op_bfd.h 9 Apr 2005 03:21:26 -0000 1.36 +++ libutil++/op_bfd.h 20 Apr 2005 00:37:35 -0000 @@ -14,12 +14,11 @@ #include "config.h" -#include <bfd.h> - #include <vector> #include <string> #include <list> +#include "bfd_support.h" #include "utility.h" #include "cached_value.h" #include "op_types.h" @@ -174,50 +173,10 @@ public: bool has_debug_info() const; private: - /// filename we open (not including archive path) - std::string filename; - - /// path to archive - std::string archive_path; - - /// file size in bytes - off_t file_size; - - // the bfd object, NULL if the binary file can't be accessed. - bfd * ibfd; - - // The following member variables: debug_filename and dbfd are - // used to access the optional debugging information file. See - // the comment for find_separate_debug_file() for additional - // information. - - // corresponding debug file name - mutable std::string debug_filename; - - // corresponding debug bfd object. - mutable bfd * dbfd; - - // vector of symbol filled by the bfd lib. - scoped_array<asymbol*> bfd_syms; - // image file such the linux kernel need than all vma are offset - // by this value. - unsigned long text_offset; - - /// true if at least one section has (flags & SEC_DEBUGGING) != 0 - mutable cached_value<bool> debug_info; - - /// temporary container for getting symbols + /// temporary container type for getting symbols typedef std::list<op_bfd_symbol> symbols_found_t; /** - * Helper function for get_symbols. - * Populates bfd_syms and extracts the "interesting_symbol"s. - */ - void get_symbols_from_file(bfd * ibfd, size_t start, - op_bfd::symbols_found_t & symbols, - bool debug_file); - - /** * Parse and sort in ascending order all symbols * in the file pointed to by abfd that reside in * a %SEC_CODE section. @@ -229,8 +188,16 @@ private: void get_symbols(symbols_found_t & symbols); /** + * Helper function for get_symbols. + * Populates bfd_syms and extracts the "interesting_symbol"s. + */ + void get_symbols_from_file(bfd_info & bfd, size_t start, + op_bfd::symbols_found_t & symbols, + bool debug_file); + + /** * Add the symbols in the binary, applying filtering, - * and handling artificial symbol. + * and handling artificial symbols. */ void add_symbols(symbols_found_t & symbols, string_filter const & symbol_filter); @@ -238,7 +205,7 @@ private: /** * symbol_size - return the size of a symbol * @param sym symbol to get size - * @param next next symbol in vma roder if any + * @param next next symbol in vma order if any */ size_t symbol_size(op_bfd_symbol const & sym, op_bfd_symbol const * next) const; @@ -249,41 +216,33 @@ private: /* Generate symbols using bfd functions for * the image file associated with the ibfd arg. */ - uint process_symtab(bfd * ibfd, size_t start); + uint process_symtab(bfd_info * bfd, uint start); - /* Since process_symtab may occur twice for a given image file, - * this instance variable may be used as needed to keep track - * of the number of symbols generated in the previous call. - */ - uint prev_total_symcount; + /// filename we open (not including archive path) + std::string filename; -}; + /// path to archive + std::string archive_path; -/* - * find_separate_debug_file - return true if a valid separate debug file found - * @param ibfd binary file - * @param dir_in directory holding the binary file - * @param global_in - * @param filename path to valid debug file - * - * Search order for debug file and use first one found: - * 1) dir_in directory - * 2) dir_in/.debug directory - * 3) global_in/dir_in directory - * - * Newer binutils and Linux distributions (e.g. Fedora) allow the - * creation of debug files that are separate from the binary. The - * debugging information is stripped out of the binary file, placed in - * this separate file, and a link to the new file is placed in the - * binary. The debug files hold the information needed by the debugger - * (and OProfile) to map machine instructions back to source code. - */ + /// file size in bytes + off_t file_size; + + /// corresponding debug file name + mutable std::string debug_filename; + + /// true if at least one section has (flags & SEC_DEBUGGING) != 0 + mutable cached_value<bool> debug_info; + + /// our main bfd object: .bfd may be NULL + bfd_info ibfd; + + // corresponding debug bfd object, if one is found + mutable bfd_info dbfd; + + // image file such the linux kernel need than all vma are offset + // by this value. + unsigned long text_offset; +}; -extern bool -find_separate_debug_file(bfd * ibfd, - std::string const & dir_in, - std::string const & global_in, - std::string & filename); -extern bfd * open_bfd(std::string const & file); #endif /* !OP_BFD_H */ |