From: <com...@bi...> - 2013-07-05 14:01:35
|
14 new commits in net-responsibility: https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/df24d8f0e056/ Changeset: df24d8f0e056 User: roggan87 Date: 2012-11-17 18:33:21 Summary: No need to rename the new plugin locale files any more. Affected #: 1 file diff -r 9e74594abafeade82e16d0178ef7310824d746ce -r df24d8f0e056da2ab376c563e746df7c056d3053 plugins/create.sh --- a/plugins/create.sh +++ b/plugins/create.sh @@ -69,8 +69,6 @@ fi done -mv ${C_NAME}/locale/en_EN/general.xml ${C_NAME}/locale/en_EN/${C_NAME}.xml - echo "" echo "Your plugin is set up. Customize README, write your code, tweak " echo "configure.ac and Makefile.am if needed, then run the following " https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/98da4d89d8a0/ Changeset: 98da4d89d8a0 User: roggan87 Date: 2012-11-17 23:48:35 Summary: Putting the configuration inside a ConfigurationMapper, and closing #53. Restructuring the configuration file to more reflect the plugin separation. Affected #: 8 files diff -r df24d8f0e056da2ab376c563e746df7c056d3053 -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd include/Options.h --- a/include/Options.h +++ b/include/Options.h @@ -39,6 +39,7 @@ #include "Poco/Environment.h" #include "Poco/Util/Application.h" #include "Poco/Util/LayeredConfiguration.h" +#include "Poco/Util/ConfigurationMapper.h" #include "Poco/Logger.h" #include "Poco/SharedLibrary.h" #include "Poco/DynamicAny.h" @@ -83,6 +84,7 @@ using ::Poco::SingletonHolder; using ::Poco::Util::Application; using ::Poco::Util::LayeredConfiguration; +using ::Poco::Util::ConfigurationMapper; using namespace ::std; class MainApplication; @@ -130,20 +132,10 @@ virtual ~Options(); - bool doSaveHistory() const; - - bool doSendImprovementData() const; - /// Returns true if we're supposed to send the improvement data to - /// the server. - - bool doCompressAttachedReport() const; - /// Returns true if the report should be zipped. - - bool isReportPart(string part) const; - /// Returns true if the given part should be included in the regular report. - - bool isAttachedReportPart(string part) const; - /// Returns true if the given part should be included in the attached report. + bool hasChildValue(string key, string value) const; + /// Returns true if the given value is found in one of the + /// children to given key. Useful for checking up report parts + /// etc. bool isAccountEnabled() const; /// Returns false if accounts are disabled at compilation time. @@ -189,7 +181,6 @@ map<string, string> _txt; MyLogger *_logger; vector<Bypass> _initBypasses; - AutoPtr<MyXml> _xml; void loadDefaultValues(); void loadLocale(string plugin = "general"); diff -r df24d8f0e056da2ab376c563e746df7c056d3053 -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd plugins/DefaultReport/src/Report.cpp --- a/plugins/DefaultReport/src/Report.cpp +++ b/plugins/DefaultReport/src/Report.cpp @@ -64,10 +64,10 @@ } } - if (_options->isReportPart("bypasses")) + if (_options->hasChildValue("cfg.plugins.DefaultReport.reportParts", "bypasses")) _body <<makeTableBranch( _options->getString("report.bypassesTitle"), content); - if (_options->isAttachedReportPart("bypasses")) + if (_options->hasChildValue("cfg.plugins.DefaultReport.attachedReportParts", "bypasses")) _attached <<makeJavascriptBranch( jsContent(_options->getString("report.bypassesTitle")), attContent); @@ -120,10 +120,10 @@ + jsContent(_options->getString("report.noWarnings")) + "', ['']]"; } - if (_options->isReportPart("warnings")) + if (_options->hasChildValue("cfg.plugins.DefaultReport.reportParts", "warnings")) _body <<makeTableBranch( _options->getString("report.warningsTitle"), keywordsContent); - if (_options->isAttachedReportPart("warnings")) + if (_options->hasChildValue("cfg.plugins.DefaultReport.attachedReportParts", "warnings")) _attached <<makeJavascriptBranch( jsContent(_options->getString("report.warningsTitle")), attKeywordsContent); @@ -148,10 +148,10 @@ } } - if (_options->isReportPart("whitelist")) + if (_options->hasChildValue("cfg.plugins.DefaultReport.reportParts", "whitelist")) _body <<makeTableBranch( _options->getString("report.whitelistTitle"), urlsContent); - if (_options->isAttachedReportPart("whitelist")) + if (_options->hasChildValue("cfg.plugins.DefaultReport.attachedReportParts", "whitelist")) _attached <<makeJavascriptBranch( jsContent(_options->getString("report.whitelistTitle")), attUrlsContent); @@ -160,8 +160,19 @@ -void Report::makeHistorySection() { - bool doIncludePaths = _options->isAttachedReportPart("history_paths"); +void Report::makeHistorySection() { + // Don't bother running through the complete history if it's not going to + // be included in the report anyway. + if (!_options->hasChildValue("cfg.plugins.DefaultReport.attachedReportParts", + "history_hostnames") && !_options->hasChildValue + ("cfg.plugins.DefaultReport.attachedReportParts", "history_paths")) + { + return; + } + + bool doIncludePaths = _options->hasChildValue + ("cfg.plugins.DefaultReport.reportParts", "history_paths"); + NodePtr history = _db->getHistory(Node::STRUCTURE_DOMAIN); string domains; @@ -252,7 +263,9 @@ StreamCopier::copyStream(_attached, htmFile); htmFile.close(); int fsize = File(fname).getSize(); - if (_options->doCompressAttachedReport() || fsize > (10 * 1024 * 1024)) { + if (_options->getBool("cfg.plugins.DefaultReport.compressAttachedReport") + || fsize > (10 * 1024 * 1024)) + { string zfname(dir + ss.str() + ".zip"); ofstream zipFile(zfname.c_str(), std::ios::binary); Compress c(zipFile, true); diff -r df24d8f0e056da2ab376c563e746df7c056d3053 -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd src/ConfigSubsystem.cpp --- a/src/ConfigSubsystem.cpp +++ b/src/ConfigSubsystem.cpp @@ -38,15 +38,15 @@ Options* options = &Options::instance(); if (options->getBool("arg.config", false)) { if (options->isAccountEnabled()) { - string username = options->getString("username", ""), - password = options->getString("password", ""); + string username = options->getString("cfg.username", ""), + password = options->getString("cfg.password", ""); if ((username == "" || password == "") && MainApplication::instance().isInteractive()) { getLogin(username, password); } else if (username == "") - username = options->getString("username", ""); + username = options->getString("cfg.username", ""); options->setUsername(username); Request::addMac(options, password); } diff -r df24d8f0e056da2ab376c563e746df7c056d3053 -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd src/MainApplication.cpp --- a/src/MainApplication.cpp +++ b/src/MainApplication.cpp @@ -118,7 +118,7 @@ void MainApplication::defineOptions(OptionSet& options) { ServerApplication::defineOptions(options); - config().setString("logfile", ""); + config().setString("arg.logfile", ""); options.addOption( Option("install", "", "Send installation report") .required(false) @@ -201,7 +201,7 @@ .required(false) .repeatable(false) .argument("file") - .binding("logfile")); + .binding("arg.logfile")); options.addOption( Option("no-sniffer", "ns", "Skip the sniffer on this instance") @@ -229,7 +229,7 @@ .required(false) .repeatable(false) .argument("xx_ZZ") - .binding("locale")); + .binding("arg.locale")); } diff -r df24d8f0e056da2ab376c563e746df7c056d3053 -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd src/MyLogger.cpp --- a/src/MyLogger.cpp +++ b/src/MyLogger.cpp @@ -99,7 +99,7 @@ LayeredConfiguration *appConfig = &Application::instance().config(); string level; string logfile = ""; - string customLogfile = appConfig->getString("logfile", ""); + string customLogfile = appConfig->getString("arg.logfile", ""); bool isInteractive = !(appConfig->getBool("application.runAsDaemon", false) || appConfig->getBool("application.runAsService", false)); bool debug = appConfig->getBool("arg.debug", false); diff -r df24d8f0e056da2ab376c563e746df7c056d3053 -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd src/Options.cpp --- a/src/Options.cpp +++ b/src/Options.cpp @@ -54,39 +54,11 @@ -bool Options::doSaveHistory() const { - return isAttachedReportPart("history_hostnames") - || isAttachedReportPart("history_paths"); -} - - - -bool Options::doSendImprovementData() const { - return _sendImprovementData; -} - - - -bool Options::doCompressAttachedReport() const { - return _compressAttachedReport; -} - - - -bool Options::isReportPart(string part) const { - vector<string> parts = this->getVector<string>("reportParts"); +bool Options::hasChildValue(string key, string value) const { + vector<string> children = this->getVector<string>(key.c_str()); vector<string>::const_iterator it; - it = find (parts.begin(), parts.end(), part); - return it != parts.end(); -} - - - -bool Options::isAttachedReportPart(string part) const { - vector<string> parts = this->getVector<string>("attachedReportParts"); - vector<string>::const_iterator it; - it = find (parts.begin(), parts.end(), part); - return it != parts.end(); + it = find (children.begin(), children.end(), value); + return it != children.end(); } @@ -175,11 +147,9 @@ string pluginName; string path(PLUGIN_DIR); if (pluginType == "report") - pluginName = this->getString - ("plugin[@type='report'][@selected='true'][@name]", "DefaultReport"); + pluginName = this->getString("cfg.plugins.selected.report", "DefaultReport"); else - pluginName = this->getString - ("plugin[@type='sniffer'][@selected='true'][@name]", "DefaultSniffer"); + pluginName = this->getString("cfg.plugins.selected.sniffer", "DefaultSniffer"); path += pluginName; path += ::Poco::SharedLibrary::suffix(); loadLocale(pluginName); @@ -196,7 +166,7 @@ void Options::setUsername(string username) { - this->setString("username", username); + this->setString("cfg.username", username); } @@ -210,10 +180,9 @@ this->setString("path.reports", REPORT_DIR); this->setString("server.hostname", SERVER); this->setString("program.version", VERSION); - this->setInt("reportFrequency", 7); - this->setInt("reportStrengthThreshold", 0); - this->setBool("improveData", false); - this->setBool("compressAttachedReport", false); + this->setInt("cfg.reportFrequency", 7); + this->setInt("cfg.reportStrengthThreshold", 0); + this->setBool("cfg.improvementData", false); _logger->debug("Version ${program.version}"); } @@ -224,11 +193,14 @@ bool doUpdate = this->getBool("arg.update-files", false); for (int moreTries = 3; moreTries > 0; moreTries--) { try { - _xml = new MyXml(this->getString("path.config")); + AutoPtr<MyXml> unmapped = new MyXml(this->getString("path.config")); + + AutoPtr<ConfigurationMapper> mapper; + mapper = new ConfigurationMapper("", "cfg", unmapped); // Add the configurations from our XML file to the LayeredConfiguration. // This is top priority. - this->add(_xml, 0); + this->add(mapper, 0); // If the account is enabled, update and retry if needed. if (!this->isAccountEnabled() || !Request::updateConfig(this, doUpdate)) @@ -306,7 +278,7 @@ // TODO: Configuration file // Command argument - string cliLocale = this->getString("locale", ""); + string cliLocale = this->getString("arg.locale", ""); if (cliLocale != "") _locales.push_back(cliLocale); diff -r df24d8f0e056da2ab376c563e746df7c056d3053 -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd src/ReportBase.cpp --- a/src/ReportBase.cpp +++ b/src/ReportBase.cpp @@ -100,12 +100,13 @@ int ReportBase::send(bool receiveCopy) { try { - MailMessage message; - message.setSender(_options->get<string>("name") + " <re...@ne...>"); - message.set("Reply-To", _options->get<string>("emailFrom")); - vector<string> recipients = _options->getVector<string>("emailTo"); + MailMessage message; + string userEmail = _options->getString("cfg.email"); + message.setSender(_options->getString("cfg.name") + " <re...@ne...>"); + message.set("Reply-To", userEmail); + vector<string> recipients = _options->getVector<string>("accountabilityPartners"); if (receiveCopy) - recipients.push_back(_options->get<string>("emailFrom")); + recipients.push_back(userEmail); for (vector<string>::iterator it = recipients.begin(); it != recipients.end(); it++) { @@ -128,8 +129,8 @@ new FilePartSource(it->toString(), mimeType)); } - string host = _options->getString("smtp.host", "dummy"); - int port = _options->getInt("smtp.port", 25); + string host = _options->getString("cfg.smtp.host", "dummy"); + int port = _options->getInt("cfg.smtp.port", 25); SMTPClientSession session(host, port); session.login(SMTPClientSession::AUTH_LOGIN, @@ -182,7 +183,7 @@ void ReportBase::sendImprovementData() { NodePtr warnings = _db->getImprovementData(); - if (_options->getBool("improveData", false) && warnings->size() > 0) { + if (_options->getBool("cfg.improvementData", false) && warnings->size() > 0) { stringstream impData; int i = 0; for (NodeIterator it = warnings->begin(); it != warnings->end(); it++) { diff -r df24d8f0e056da2ab376c563e746df7c056d3053 -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd src/Request.cpp --- a/src/Request.cpp +++ b/src/Request.cpp @@ -165,7 +165,7 @@ HTTPRequest req(HTTPRequest::HTTP_POST, path); req.setContentType("application/x-www-form-urlencoded"); string reqBody = ""; - reqBody += "online_user=" + options->getString("username", "") + reqBody += "online_user=" + options->getString("cfg.username", "") + "&mac=" + options->getMacAddress() + "&version=" + options->getString("program.version"); if (morePostVars != "") https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/58b41a68360a/ Changeset: 58b41a68360a User: roggan87 Date: 2012-11-18 23:06:04 Summary: Send improvement data regardless of Report plugin used. Affected #: 2 files diff -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd -r 58b41a68360a85ec578dbe1683db404fab06afa5 src/ReportBase.cpp --- a/src/ReportBase.cpp +++ b/src/ReportBase.cpp @@ -138,7 +138,6 @@ "407298f00758c47a635065f7bfa1954d"); session.sendMessage(message); session.close(); - sendImprovementData(); } catch (::Poco::Net::NoAddressFoundException& exc) { return 1; diff -r 98da4d89d8a04feb0fd6d677bd33fcb1dfbc97fd -r 58b41a68360a85ec578dbe1683db404fab06afa5 src/ReportSubsystem.cpp --- a/src/ReportSubsystem.cpp +++ b/src/ReportSubsystem.cpp @@ -69,8 +69,9 @@ ::Poco::Thread::sleep(20000); errorCode = report->send(); } - if (errorCode == 0) { - _logger->notice("reportFinished"); + if (errorCode == 0) { + _logger->notice("reportFinished"); + report->sendImprovementData(); report->logFinish(); _options->setInt("arg.report", REPORT_FALSE); } https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/6d02885c1f9b/ Changeset: 6d02885c1f9b User: roggan87 Date: 2013-03-25 20:42:41 Summary: Cleaned up some small warnings. Affected #: 7 files diff -r 58b41a68360a85ec578dbe1683db404fab06afa5 -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 configure.ac --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # -*- Autoconf -*- # Process this file with autoconf to produce a configure script. AC_PREREQ(2.58) -AC_INIT([Net Responsibility],[3.1r323],[co...@ne...]) +AC_INIT([Net Responsibility],[3.1.0-dev],[co...@ne...]) AC_SUBST([LIBTOOL_DEPS]) AC_CONFIG_AUX_DIR([.]) AC_CONFIG_SRCDIR([src/main.cpp]) diff -r 58b41a68360a85ec578dbe1683db404fab06afa5 -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 include/Filter.h --- a/include/Filter.h +++ b/include/Filter.h @@ -53,7 +53,7 @@ /// This class will run all test to find out if the URLs are appropriate or /// not. It is done by initially loading the blacklists, and then testing /// each URL against them. When keywords are found within the given text, - /// Filter thries to determine how strong the match is. This means every + /// Filter tries to determine how strong the match is. This means every /// match will be given an individual strength to indicate how likely it is /// to be an inappropriate site. (The higher number, the more likely). /// @@ -66,7 +66,7 @@ Filter(string blacklistFile); /// Load the Filter, given the path to the blacklist. This is - /// especially useful when improvign the algorithms. + /// especially useful when improving the algorithms. void loadBlacklist(string path); diff -r 58b41a68360a85ec578dbe1683db404fab06afa5 -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 plugins/DefaultReport/include/Report.h --- a/plugins/DefaultReport/include/Report.h +++ b/plugins/DefaultReport/include/Report.h @@ -62,17 +62,16 @@ /// Generates the report. This method invokes a number of other methods protected: - /*Inherited: - stringstream _body; - string _subject; - string _contentType; - vector<Path> _attachments; - Options *_options; - Database *_db; - Logger *_logger; - - */ - + /* Inherited members: + * + * stringstream _body; + * string _subject; + * string _contentType; + * vector<Path> _attachments; + * Options *_options; + * Database *_db; + * MyLogger *_logger; + */ private: stringstream _attached; void makeBypassesSection(); diff -r 58b41a68360a85ec578dbe1683db404fab06afa5 -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 plugins/Template/include/Report.h --- a/plugins/Template/include/Report.h +++ b/plugins/Template/include/Report.h @@ -48,7 +48,7 @@ * vector<Path> _attachments; * Options *_options; * Database *_db; - * Logger *_logger; + * MyLogger *_logger; */ private: diff -r 58b41a68360a85ec578dbe1683db404fab06afa5 -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 src/BlacklistKeyword.cpp --- a/src/BlacklistKeyword.cpp +++ b/src/BlacklistKeyword.cpp @@ -66,6 +66,7 @@ _category = keyword._category; _strength = keyword._strength; _regexps = keyword._regexps; + return *this; } diff -r 58b41a68360a85ec578dbe1683db404fab06afa5 -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 src/Filter.cpp --- a/src/Filter.cpp +++ b/src/Filter.cpp @@ -15,7 +15,7 @@ // // <Filter> determines whether the URLs are appropriate or not. -#include "Filter.h" +#include "Filter.h" namespace NetResponsibility { diff -r 58b41a68360a85ec578dbe1683db404fab06afa5 -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 src/Options.cpp --- a/src/Options.cpp +++ b/src/Options.cpp @@ -106,7 +106,7 @@ vector<T> vec; vector<string> subkeys; vector<string>::iterator it; - string prefix = (property == "" ? "" : string(property) + string(".")); + string prefix = (property == string("") ? string("") : string(property) + string(".")); this->keys(property, subkeys); for (it = subkeys.begin(); it != subkeys.end(); ++it) { string p = prefix + *it; @@ -121,7 +121,7 @@ map<string, T> Options::getMap(const char* property) const { map<string, T> m; vector<string> subkeys; - string prefix = (property == "" ? "" : string(property) + string(".")); + string prefix = (property == string("") ? string("") : string(property) + string(".")); this->keys(property, subkeys); for (vector<string>::iterator it = subkeys.begin(); it != subkeys.end(); ++it) @@ -235,7 +235,6 @@ void Options::loadLocale(string plugin) { string path; - bool isFound = false; for (vector<string>::reverse_iterator it = _locales.rbegin(); it != _locales.rend(); it++) { @@ -247,7 +246,6 @@ this->setString("path.txt", path); _logger->debug("printPath.txt"); - isFound = true; } } catch (::Poco::Exception &err) { https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/463bc7245ea7/ Changeset: 463bc7245ea7 User: roggan87 Date: 2013-03-26 18:28:21 Summary: Added the Blacklist as a real class. Bringing consistency to the terminology. BlacklistCategory -> Blacklist vector<Blacklist> -> Blacklists Affected #: 14 files diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e Makefile.am --- a/Makefile.am +++ b/Makefile.am @@ -5,6 +5,7 @@ lib_LTLIBRARIES = libNetResponsibility.la libNetResponsibility_la_SOURCES = \ @top_srcdir@/src/MainApplication.cpp\ + @top_srcdir@/src/Blacklist.cpp\ @top_srcdir@/src/BlacklistKeyword.cpp\ @top_srcdir@/src/BootHistory.cpp\ @top_srcdir@/src/Bypass.cpp\ diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e include/Blacklist.h --- a/include/Blacklist.h +++ b/include/Blacklist.h @@ -29,68 +29,34 @@ #include "BlacklistKeyword.h" -#include "Poco/RegularExpression.h" #include "Poco/SharedPtr.h" -#include "Poco/Timestamp.h" -#include "Poco/Data/Common.h" -#include "Poco/Data/Connector.h" -#include "Poco/Data/SQLite/Connector.h" + namespace NetResponsibility { - -using ::Poco::Timestamp; + using ::Poco::SharedPtr; -using ::Poco::RegularExpression; -using namespace ::Poco::Data; using namespace ::std; - - -struct BlacklistCategory - /// This struct holds each specific category. -{ - string name; - /// The name of the category. +class Blacklist +{ + public: + Blacklist(); + Blacklist(string name); + Blacklist(const Blacklist& blacklist); - vector<BlacklistKeyword> keyword; // Change to keywords since its more than one - /// All the keywords. -}; - - - -struct BlacklistMatch - /// If the filter catches a match, in other words a suspicious URL, it's - /// stored inside a BlacklistMatch. Here we have all the important - /// information related to the match. -{ - string boldUrl; - /// The URL with every suspicious word in bold. + string getName() const; + bool isComplementary() const; - string abbrUrl; - /// The same as boldUrl, but abbreviated to be easier to read and save - /// some space. + void addKeyword(BlacklistKeyword keyword); - int strength; - /// The total strength of the match. This is displayed in the reports - /// if this strength is lower than reportStrengthThreshold. - - bool whitelist; - /// This bool is set to true if it is a whitelist match. In that case it - /// will show up in the whitelist section of the report. - - vector<BlacklistKeyword> keyword; // Change to keywords, more than one. - /// The keywords that are found in the URL. -}; - - - -typedef vector<BlacklistCategory> Blacklist; - /// Blacklist is the top-level container for all the blacklists. - // Change this typedef to: - // typedef map<string, vector<BlacklistKeyword> > Blacklist; - // This way we can skip BlacklistCategory. - + const vector<BlacklistKeyword> &getKeywords(); + private: + string _name; + vector<BlacklistKeyword> _keywords; +}; + +typedef vector<Blacklist> Blacklists; struct Extension { /// The strength of the BlacklistMatch will also take the type of the URL in @@ -115,96 +81,4 @@ } // namespace NetResponsibility -namespace Poco { -namespace Data { - -template <> -class TypeHandler<class ::NetResponsibility::BlacklistMatch> -{ -public: - static size_t size() - { - return 4; // we handle four columns of the Table! - } - - static void bind(size_t pos, const ::NetResponsibility::BlacklistMatch& obj, - AbstractBinder* pBinder) - { - poco_assert_dbg (pBinder != 0); - TypeHandler<std::string>::bind(pos++, obj.boldUrl, pBinder); - TypeHandler<std::string>::bind(pos++, obj.abbrUrl, pBinder); - TypeHandler<int>::bind(pos++, obj.strength, pBinder); - TypeHandler<int>::bind(pos++, (int)obj.whitelist, pBinder); - } - - static void prepare(size_t pos, const ::NetResponsibility::BlacklistMatch& obj, - AbstractPreparation* pPrepare) - { - poco_assert_dbg (pBinder != 0); - TypeHandler<std::string>::prepare(pos++, obj.boldUrl, pPrepare); - TypeHandler<std::string>::prepare(pos++, obj.abbrUrl, pPrepare); - TypeHandler<int>::prepare(pos++, obj.strength, pPrepare); - TypeHandler<int>::prepare(pos++, (int)obj.whitelist, pPrepare); - } - - static void extract(size_t pos, ::NetResponsibility::BlacklistMatch& obj, - const ::NetResponsibility::BlacklistMatch& defVal, AbstractExtractor* pExt) - /// obj will contain the result, defVal contains values we should use when one column is NULL - { - poco_assert_dbg (pExt != 0); - int w; - TypeHandler<std::string>::extract(pos++, obj.boldUrl, defVal.boldUrl, pExt); - TypeHandler<std::string>::extract(pos++, obj.abbrUrl, defVal.abbrUrl, pExt); - TypeHandler<int>::extract(pos++, obj.strength, defVal.strength, pExt); - TypeHandler<int>::extract(pos++, w, defVal.whitelist, pExt); - obj.whitelist = (w != 0); - } -}; - - -template <> -class TypeHandler<class ::NetResponsibility::BlacklistKeyword> -{ -public: - static size_t size() - { - return 3; // we handle four columns of the Table! - } - - static void bind(size_t pos, const ::NetResponsibility::BlacklistKeyword& obj, - AbstractBinder* pBinder) - { - poco_assert_dbg (pBinder != 0); - TypeHandler<std::string>::bind(pos++, obj.getKeyword(), pBinder); - TypeHandler<std::string>::bind(pos++, obj.getCategory(), pBinder); - TypeHandler<int>::bind(pos++, obj.getStrength(), pBinder); - } - - static void prepare(size_t pos, const ::NetResponsibility::BlacklistKeyword& obj, - AbstractPreparation* pPrepare) - { - poco_assert_dbg (pBinder != 0); - TypeHandler<std::string>::prepare(pos++, obj.getKeyword(), pPrepare); - TypeHandler<std::string>::prepare(pos++, obj.getCategory(), pPrepare); - TypeHandler<int>::prepare(pos++, obj.getStrength(), pPrepare); - } - - static void extract(size_t pos, ::NetResponsibility::BlacklistKeyword& obj, - const ::NetResponsibility::BlacklistKeyword& defVal, AbstractExtractor* pExt) - /// obj will contain the result, defVal contains values we should use when one column is NULL - { - poco_assert_dbg (pExt != 0); - std::string keyword; - std::string category; - int strength; - TypeHandler<std::string>::extract(pos++, keyword, defVal.getKeyword(), pExt); - TypeHandler<std::string>::extract(pos++, category, defVal.getCategory(), pExt); - TypeHandler<int>::extract(pos++, strength, defVal.getStrength(), pExt); - obj = ::NetResponsibility::BlacklistKeyword(keyword, category, strength); - } -}; - -} } // namespace Poco::Data - - #endif // BLACKLIST_H_INCLUDED diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e include/BlacklistKeyword.h --- a/include/BlacklistKeyword.h +++ b/include/BlacklistKeyword.h @@ -29,13 +29,17 @@ #include <vector> #include "Poco/RegularExpression.h" -#include "Poco/SharedPtr.h" +#include "Poco/SharedPtr.h" +#include "Poco/Data/Common.h" +#include "Poco/Data/Connector.h" +#include "Poco/Data/SQLite/Connector.h" namespace NetResponsibility { using ::Poco::SharedPtr; -using ::Poco::RegularExpression; +using ::Poco::RegularExpression; + using namespace ::std; typedef vector< SharedPtr<RegularExpression> >::const_iterator RegexpIterator; @@ -79,4 +83,51 @@ } // namespace NetResponsibility +namespace Poco { +namespace Data { + +template <> +class TypeHandler<class ::NetResponsibility::BlacklistKeyword> +{ +public: + static size_t size() + { + return 3; // we handle four columns of the Table! + } + + static void bind(size_t pos, const ::NetResponsibility::BlacklistKeyword& obj, + AbstractBinder* pBinder) + { + poco_assert_dbg (pBinder != 0); + TypeHandler<std::string>::bind(pos++, obj.getKeyword(), pBinder); + TypeHandler<std::string>::bind(pos++, obj.getCategory(), pBinder); + TypeHandler<int>::bind(pos++, obj.getStrength(), pBinder); + } + + static void prepare(size_t pos, const ::NetResponsibility::BlacklistKeyword& obj, + AbstractPreparation* pPrepare) + { + poco_assert_dbg (pBinder != 0); + TypeHandler<std::string>::prepare(pos++, obj.getKeyword(), pPrepare); + TypeHandler<std::string>::prepare(pos++, obj.getCategory(), pPrepare); + TypeHandler<int>::prepare(pos++, obj.getStrength(), pPrepare); + } + + static void extract(size_t pos, ::NetResponsibility::BlacklistKeyword& obj, + const ::NetResponsibility::BlacklistKeyword& defVal, AbstractExtractor* pExt) + /// obj will contain the result, defVal contains values we should use when one column is NULL + { + poco_assert_dbg (pExt != 0); + std::string keyword; + std::string category; + int strength; + TypeHandler<std::string>::extract(pos++, keyword, defVal.getKeyword(), pExt); + TypeHandler<std::string>::extract(pos++, category, defVal.getCategory(), pExt); + TypeHandler<int>::extract(pos++, strength, defVal.getStrength(), pExt); + obj = ::NetResponsibility::BlacklistKeyword(keyword, category, strength); + } +}; + +} } // namespace Poco::Data + #endif // BLACKLISTKEYWORD_H diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e include/Filter.h --- a/include/Filter.h +++ b/include/Filter.h @@ -68,9 +68,9 @@ /// Load the Filter, given the path to the blacklist. This is /// especially useful when improving the algorithms. - void loadBlacklist(string path); + void loadBlacklists(string path); - void loadBlacklist(Options* options, Database* db); + void loadBlacklists(Options* options, Database* db); void run(HTTPHit& hit); /// This is the method used for running a complete scan on the @@ -82,7 +82,7 @@ bool isTokenMatch(HTTPHit& hit); private: - Blacklist _blacklist; + Blacklists _blacklists; Extensions _extensions; SharedPtr<RegularExpression> _splitToken; SharedPtr<RegularExpression> _splitExtension; diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e include/MyXml.h --- a/include/MyXml.h +++ b/include/MyXml.h @@ -77,7 +77,7 @@ map<string, string> getStringMap() const; /// Returns a map<string, string> with all values found in the document. - Blacklist getBlacklist(); + Blacklists getBlacklists(); /// Extracts and compiles the keywords in the blacklist file. Extensions getExtensions(); diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e include/Node.h --- a/include/Node.h +++ b/include/Node.h @@ -157,7 +157,7 @@ virtual ~Node(); virtual Node& operator = (const Node& node); virtual Node& operator += (NodePtr node); - virtual NodePtr operator [] (int i); + virtual NodePtr operator [] (unsigned int i); int getStrength() const; int size() const; diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e include/Request.h --- a/include/Request.h +++ b/include/Request.h @@ -77,8 +77,8 @@ static void downloadConfig(Options* options, string password = ""); /// Download a new config file. - static void downloadBlacklist(Options* options); - /// Download a new blacklist + static void downloadBlacklists(Options* options); + /// Download a new blacklist file. static string sendImprovementData(Options* options, string impData); /// Send the improvement data to the server. It's given in impData. @@ -87,7 +87,7 @@ /// Updates the config file from the server if it's modifed or doForce /// is true. Returns true if the file was updated. - static bool updateBlacklist(Options* options, bool doForce = false); + static bool updateBlacklists(Options* options, bool doForce = false); /// Updates the blacklist file from the server if it's modifed or /// doForce is true. Returns true if the file was updated. @@ -95,7 +95,7 @@ /// The last time the config file was downloaded from the server. Used /// to determine if the file has been modified. - static int blacklistDownloaded; + static int blacklistsDownloaded; /// The last time the blacklists were downloaded from the server. Used /// to determine if the file has been modified. diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e projectFiles/linux/net-responsibility.cbp --- a/projectFiles/linux/net-responsibility.cbp +++ b/projectFiles/linux/net-responsibility.cbp @@ -168,6 +168,7 @@ <Unit filename="../../plugins/DefaultSniffer/include/SnifferThread.h" /><Unit filename="../../plugins/DefaultSniffer/src/Sniffer.cpp" /><Unit filename="../../plugins/DefaultSniffer/src/SnifferThread.cpp" /> + <Unit filename="../../src/Blacklist.cpp" /><Unit filename="../../src/BlacklistKeyword.cpp" /><Unit filename="../../src/BootHistory.cpp" /><Unit filename="../../src/Bypass.cpp" /> diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e src/Blacklist.cpp --- /dev/null +++ b/src/Blacklist.cpp @@ -0,0 +1,75 @@ +// This file is part of Net Responsibility. +// +// Net Responsibility is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// Net Responsibility is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Net Responsibility. If not, see <http://www.gnu.org/licenses/>. +// +// <Blacklist> is what we're using to filter the HTTPHits + + +#include "Blacklist.h" + + + +namespace NetResponsibility { + + + +Blacklist::Blacklist() +{ + //ctor +} + + + +Blacklist::Blacklist(string name) +{ + _name = name; +} + + +Blacklist::Blacklist(const Blacklist& blacklist) +{ + _name = blacklist._name; + _keywords = blacklist._keywords; +} + + + +string Blacklist::getName() const +{ + return _name; +} + + + +bool Blacklist::isComplementary() const +{ + // TODO determine! + return false; +} + +void Blacklist::addKeyword(BlacklistKeyword keyword) +{ + _keywords.push_back(keyword); +} + + + +const vector<BlacklistKeyword> &Blacklist::getKeywords() +{ + return _keywords; +} + + + +} // namespace NetResponsibility diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e src/Filter.cpp --- a/src/Filter.cpp +++ b/src/Filter.cpp @@ -25,25 +25,26 @@ Options* options = &Options::instance(); Database* db = &Database::instance(); setRegexps(); - loadBlacklist(options, db); + loadBlacklists(options, db); } Filter::Filter(string blacklistFile) { setRegexps(); - loadBlacklist(blacklistFile); + loadBlacklists(blacklistFile); } -void Filter::loadBlacklist(string path) { +void Filter::loadBlacklists(string path) { try { - AutoPtr<MyXml> xmlBlacklist (new MyXml(path)); - _blacklist = xmlBlacklist->getBlacklist(); - _extensions = xmlBlacklist->getExtensions(); + AutoPtr<MyXml> xmlBlacklists (new MyXml(path)); + _blacklists = xmlBlacklists->getBlacklists(); + _extensions = xmlBlacklists->getExtensions(); } - catch (Exception &err) { + catch (Exception &err) { + cout <<err.displayText() <<endl; Application::instance().logger().information("Couldn't load blacklist"); //download new } @@ -52,19 +53,19 @@ -void Filter::loadBlacklist(Options *options, Database *db) { +void Filter::loadBlacklists(Options *options, Database *db) { for (int moreTries = 3; moreTries > 0; moreTries--) { try { if (options->isAccountEnabled()) { // Update the blacklist if it's modified bool doUpdate = options->getBool("arg.update-files", false); - Request::updateBlacklist(options, doUpdate); + Request::updateBlacklists(options, doUpdate); } // Load it - AutoPtr<MyXml> xmlBlacklist (new MyXml(options->get<string>("path.blacklist"))); - _blacklist = xmlBlacklist->getBlacklist(); - _extensions = xmlBlacklist->getExtensions(); + AutoPtr<MyXml> xmlBlacklists (new MyXml(options->get<string>("path.blacklist"))); + _blacklists = xmlBlacklists->getBlacklists(); + _extensions = xmlBlacklists->getExtensions(); moreTries = 0; } catch (::Poco::FileNotFoundException &err) { @@ -75,7 +76,7 @@ db->logBypass(Bypass(Bypass::BYPASS_MISSING_FILE, "Blacklist")); if (options->isAccountEnabled()) - Request::downloadBlacklist(options); + Request::downloadBlacklists(options); else moreTries = 0; } @@ -87,7 +88,7 @@ db->logBypass(Bypass(Bypass::BYPASS_MODIFIED_FILE, "Blacklist")); if (options->isAccountEnabled()) - Request::downloadBlacklist(options); + Request::downloadBlacklists(options); else moreTries = 0; } @@ -111,9 +112,10 @@ int strength = 0; RegularExpression::Match m; - for (Blacklist::iterator c = _blacklist.begin(); c != _blacklist.end(); c++) { - for (vector<BlacklistKeyword>::iterator k = c->keyword.begin(); - k != c->keyword.end(); k++) + for (Blacklists::iterator b = _blacklists.begin(); b != _blacklists.end(); b++) { + vector<BlacklistKeyword> keywords = b->getKeywords(); + for (vector<BlacklistKeyword>::iterator k = keywords.begin(); + k != keywords.end(); k++) { isSubMatch = true; for (RegexpIterator r = k->begin(); r != k->end(); r++) @@ -126,7 +128,7 @@ if (isSubMatch) { BlacklistKeyword keyword = *k; hit.addBlacklistKeyword(keyword); - if (c->name != "Whitelist") { + if (b->getName() != "Whitelist") { strength += k->getStrength(); isMatch = true; } diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e src/HTTPHit.cpp --- a/src/HTTPHit.cpp +++ b/src/HTTPHit.cpp @@ -138,7 +138,7 @@ bool HTTPHit::isWhitelistMatch(int threshold) { - int whitelistKeywords = 0; + unsigned int whitelistKeywords = 0; for (std::set<BlacklistKeyword>::const_iterator k = _blacklistKeywords.begin(); k != _blacklistKeywords.end(); k++) { diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e src/MyXml.cpp --- a/src/MyXml.cpp +++ b/src/MyXml.cpp @@ -72,7 +72,7 @@ vector<T> vec; vector<string> subkeys; vector<string>::iterator it; - string prefix = (property == "" ? "" : string(property) + string(".")); + string prefix = (property == string("") ? string("") : string(property) + string(".")); this->keys(property, subkeys); for (it = subkeys.begin(); it != subkeys.end(); ++it) { string p = prefix + *it; @@ -87,7 +87,7 @@ map<string, T> MyXml::getMap(const char* property) const { map<string, T> m; vector<string> subkeys; - string prefix = (property == "" ? "" : string(property) + string(".")); + string prefix = (property == string("") ? string("") : string(property) + string(".")); this->keys(property, subkeys); for (vector<string>::iterator it = subkeys.begin(); it != subkeys.end(); ++it) @@ -127,43 +127,43 @@ -Blacklist MyXml::getBlacklist() { - Blacklist blacklist; - BlacklistCategory tempCategory; - vector<string> categories, keywords; +Blacklists MyXml::getBlacklists() { + Blacklists blacklists; + // TODO: change the elements 'category' to 'blacklist' in blacklist file! + vector<string> stringBlacklists, keywords; const int DEFAULT_STRENGTH = 100; - this->keys(categories); + this->keys(stringBlacklists); - for (vector<string>::iterator c = categories.begin(); - c != categories.end(); c++) + for (vector<string>::iterator b = stringBlacklists.begin(); + b != stringBlacklists.end(); b++) { - if (this->hasProperty(*c + "[@name]")) { - tempCategory.name = this->getString(*c + "[@name]"); - blacklist.push_back(tempCategory); - this->keys(*c, keywords); + if (this->hasProperty(*b + "[@name]")) { + Blacklist blacklist(this->getString(*b + "[@name]")); + this->keys(*b, keywords); for (vector<string>::iterator k = keywords.begin(); k != keywords.end(); k++) { string keyword; try { - keyword = this->getString(*c + '.' + *k); - int strength = (this->hasProperty(*c + '.' + *k + "[@s]") - ? this->getInt(*c + '.' + *k + "[@s]") : DEFAULT_STRENGTH); - BlacklistKeyword tempKeyword(keyword, tempCategory.name, strength); - blacklist.back().keyword.push_back(tempKeyword); + keyword = this->getString(*b + '.' + *k); + int strength = (this->hasProperty(*b + '.' + *k + "[@s]") + ? this->getInt(*b + '.' + *k + "[@s]") : DEFAULT_STRENGTH); + BlacklistKeyword tempKeyword(keyword, blacklist.getName(), strength); + blacklist.addKeyword(tempKeyword); } catch (::Poco::Exception &err) { _logger->information(err.displayText() + (string)": " + keyword); continue; } - } + } + blacklists.push_back(blacklist); } } - return blacklist; + return blacklists; } diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e src/Node.cpp --- a/src/Node.cpp +++ b/src/Node.cpp @@ -61,16 +61,18 @@ _children = node._children; _order = node._order; _nodeType = Node::TYPE_NODE; + return *this; } Node& Node::operator += (NodePtr node) { addChild(node); + return *this; } -NodePtr Node::operator [] (int i) { +NodePtr Node::operator [] (unsigned int i) { if (_children.size() > i) return _children[i]; else diff -r 6d02885c1f9bc02b10be7d78fc133d3d9f0026d2 -r 463bc7245ea71b2b05f131e0e4325c9b242d836e src/Request.cpp --- a/src/Request.cpp +++ b/src/Request.cpp @@ -24,7 +24,7 @@ int Request::configDownloaded = -1; -int Request::blacklistDownloaded = -1; +int Request::blacklistsDownloaded = -1; void Request::addMac(Options *options, string password) { @@ -51,7 +51,7 @@ -void Request::downloadBlacklist(Options *options) { +void Request::downloadBlacklists(Options *options) { MyLogger::instance().information("downloading.blacklists"); send(options, "/request/blacklist.php", options->get<string>("path.blacklist")); setDownloadedTime(options); @@ -120,15 +120,15 @@ -bool Request::updateBlacklist(Options *options, bool doForce) { +bool Request::updateBlacklists(Options *options, bool doForce) { try { - if (blacklistDownloaded == -1) + if (blacklistsDownloaded == -1) setDownloadedTime(options); // Determine what time the blacklist file was edited last Timestamp ft = File(options->get<string>("path.blacklist")).getLastModified(); - if (ft.epochTime() > (blacklistDownloaded + 15)) { - downloadBlacklist(options); + if (ft.epochTime() > (blacklistsDownloaded + 15)) { + downloadBlacklists(options); // This won't get caught as a bypass, but maybe it doesn't have to // either, as long as we get the correct blacklist? Bypass bypass(Bypass::BYPASS_MODIFIED_FILE, "Blacklist file"); @@ -136,7 +136,7 @@ return true; } else if (doForce) { - downloadBlacklist(options); + downloadBlacklists(options); return true; } else @@ -206,7 +206,7 @@ if (downloaded != "") { // Read values and set them accordingly configDownloaded = xml->getInt("config", 0); - blacklistDownloaded = xml->getInt("blacklist", 0); + blacklistsDownloaded = xml->getInt("blacklist", 0); } } catch (Exception& exc) { https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/e15bfcd6123c/ Changeset: e15bfcd6123c User: roggan87 Date: 2013-03-26 20:20:04 Summary: Now possible to create HTTPHit by URL directly. Affected #: 2 files diff -r 463bc7245ea71b2b05f131e0e4325c9b242d836e -r e15bfcd6123c05daa59def5071f064a15f18e108 include/HTTPHit.h --- a/include/HTTPHit.h +++ b/include/HTTPHit.h @@ -65,6 +65,7 @@ public: HTTPHit(); + HTTPHit(string url); HTTPHit(const HTTPRequest& request); HTTPHit(const HTTPResponse& response); HTTPHit(const HTTPHit& hit); @@ -112,6 +113,7 @@ void setHost(string hostname); void setPath(string path); + void setUrl(string url); void addBlacklistKeyword(BlacklistKeyword keyword); void setBlacklistKeywords(const std::set<BlacklistKeyword> keywords); void setBlacklistKeywords(const string keywords); diff -r 463bc7245ea71b2b05f131e0e4325c9b242d836e -r e15bfcd6123c05daa59def5071f064a15f18e108 src/HTTPHit.cpp --- a/src/HTTPHit.cpp +++ b/src/HTTPHit.cpp @@ -8,6 +8,13 @@ +HTTPHit::HTTPHit(string url) { + setDefaultValues(); + setUrl(url); +} + + + HTTPHit::HTTPHit(const HTTPRequest& request) { setDefaultValues(); @@ -168,6 +175,19 @@ +void HTTPHit::setUrl(string url) { + RegularExpression::MatchVec m; + SharedPtr<RegularExpression> regexp = new RegularExpression("^(\\w+:\\/\\/)?([^\\/]+)", 0, true); + regexp->match(url, 0, m); + unsigned int off = m[2].offset; + unsigned int len = m[2].length; + _host = url.substr(off, len); + if (url.length() > off + len) + _path = url.substr(off + len); +} + + + void HTTPHit::addBlacklistKeyword(BlacklistKeyword keyword) { _blacklistKeywords.insert(keyword); } https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/35f188063572/ Changeset: 35f188063572 User: roggan87 Date: 2013-03-26 20:20:56 Summary: Moved the most basic NR filtering method to BlacklistKeyword. Affected #: 3 files diff -r e15bfcd6123c05daa59def5071f064a15f18e108 -r 35f18806357231523ab4d779fb52014fa5aabf86 include/BlacklistKeyword.h --- a/include/BlacklistKeyword.h +++ b/include/BlacklistKeyword.h @@ -68,6 +68,8 @@ RegexpIterator end() const; + RegularExpression::MatchVec match(string token) const; + protected: int _strength; /// The strength of this keyword. 100 is default. diff -r e15bfcd6123c05daa59def5071f064a15f18e108 -r 35f18806357231523ab4d779fb52014fa5aabf86 src/BlacklistKeyword.cpp --- a/src/BlacklistKeyword.cpp +++ b/src/BlacklistKeyword.cpp @@ -123,4 +123,17 @@ } + +RegularExpression::MatchVec BlacklistKeyword::match(string token) const { + RegularExpression::MatchVec matches, empty; + for (RegexpIterator r = _regexps.begin(); r != _regexps.end(); r++) { + RegularExpression::Match m; + if ((**r).match(token, m)) + matches.push_back(m); + else + return empty; + } + return matches; +} + } // namespace NetResponsibility diff -r e15bfcd6123c05daa59def5071f064a15f18e108 -r 35f18806357231523ab4d779fb52014fa5aabf86 src/Filter.cpp --- a/src/Filter.cpp +++ b/src/Filter.cpp @@ -106,26 +106,17 @@ bool Filter::isUrlMatch(HTTPHit& hit) { - string url = hit.getUrl(); - bool isSubMatch; bool isMatch = false; int strength = 0; - RegularExpression::Match m; for (Blacklists::iterator b = _blacklists.begin(); b != _blacklists.end(); b++) { vector<BlacklistKeyword> keywords = b->getKeywords(); for (vector<BlacklistKeyword>::iterator k = keywords.begin(); k != keywords.end(); k++) - { - isSubMatch = true; - for (RegexpIterator r = k->begin(); r != k->end(); r++) - { - if (!(**r).match(url, m)) { - isSubMatch = false; - break; - } - } - if (isSubMatch) { + { + string url = hit.getUrl(); + RegularExpression::MatchVec matches = k->match(url); + if (matches.size() > 0) { BlacklistKeyword keyword = *k; hit.addBlacklistKeyword(keyword); if (b->getName() != "Whitelist") { https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/fb5d7d6b7fb8/ Changeset: fb5d7d6b7fb8 User: roggan87 Date: 2013-03-26 22:42:55 Summary: Added a Token struct to be able to give tokens different strength. Splitting of tokens now happens right in the HTTPHit. Affected #: 4 files diff -r 35f18806357231523ab4d779fb52014fa5aabf86 -r fb5d7d6b7fb832dccc786b6e07f575a4bbaa6ef0 include/Blacklist.h --- a/include/Blacklist.h +++ b/include/Blacklist.h @@ -58,6 +58,10 @@ typedef vector<Blacklist> Blacklists; +// TODO: Put extensions, or more correctly; MimeTypes into each Blacklist. +// Different blacklists may have different viewpoints on what kind of files +// that are harmful. + struct Extension { /// The strength of the BlacklistMatch will also take the type of the URL in /// consideration. If it's an image or video it will be much stronger than diff -r 35f18806357231523ab4d779fb52014fa5aabf86 -r fb5d7d6b7fb832dccc786b6e07f575a4bbaa6ef0 include/HTTPHit.h --- a/include/HTTPHit.h +++ b/include/HTTPHit.h @@ -37,6 +37,7 @@ #include "Poco/SharedPtr.h" #include "Poco/Timestamp.h" #include "Poco/DateTimeFormatter.h" +#include "Poco/URI.h" #include "Poco/Net/HTTPRequest.h" #include "Poco/Net/HTTPResponse.h" #include "Poco/Net/HTTPMessage.h" @@ -64,6 +65,16 @@ { public: + + struct Token + /// This struct makes it possible to make each different + /// token have its own strength. + { + Token(string c, int s): content(c), strength(s) {} + string content; + int strength; + }; + HTTPHit(); HTTPHit(string url); HTTPHit(const HTTPRequest& request); @@ -120,6 +131,8 @@ std::set<BlacklistKeyword> getBlacklistKeywords(); + vector<HTTPHit::Token> getTokens(); + Node::StrengthLevel getStrengthLevel(); /// Returns the StrengthLevel of the HTTPHit. Slightly modified /// version of Node::getStrengthLevel(). @@ -128,6 +141,7 @@ string _host; string _path; int _parent; + vector<Token> _tokens; std::set<BlacklistKeyword> _blacklistKeywords; diff -r 35f18806357231523ab4d779fb52014fa5aabf86 -r fb5d7d6b7fb832dccc786b6e07f575a4bbaa6ef0 src/Filter.cpp --- a/src/Filter.cpp +++ b/src/Filter.cpp @@ -134,67 +134,55 @@ bool Filter::isTokenMatch(HTTPHit& hit) { bool isMatch = false; - RegularExpression::Match m, n; + RegularExpression::Match n; float strength = 0; float wordFactor = 0.5; int tokenMatches = 0; - unsigned int o = 0; - string url = hit.getUrl(); - string decodedUrl = ""; - try { - ::Poco::URI::decode(url, decodedUrl); - } - catch (::Poco::Exception &exc) { - decodedUrl = url; - } - while (o < decodedUrl.length()) { - _splitToken->match(decodedUrl, o, m); - if (m.offset == string::npos) - m.offset = decodedUrl.length(); - string token = decodedUrl.substr(o, m.offset-o); - if (token.length() > 2) { - tokenMatches = 0; - std::set<BlacklistKeyword> keywords = hit.getBlacklistKeywords(); - for (std::set<BlacklistKeyword>::const_iterator - k = keywords.begin(); k != keywords.end(); k++) + + vector<HTTPHit::Token> tokens = hit.getTokens(); + for(vector<HTTPHit::Token>::iterator it = tokens.begin(); + it != tokens.end(); it++) + { + tokenMatches = 0; + std::set<BlacklistKeyword> keywords = hit.getBlacklistKeywords(); + for (std::set<BlacklistKeyword>::const_iterator + k = keywords.begin(); k != keywords.end(); k++) + { + bool isSubMatch = true; + float strengthFactor = 0; + string token = it->content; + for (RegexpIterator r = k->begin(); r != k->end(); r++) { - bool isSubMatch = true; - float strengthFactor = 0; - for (RegexpIterator r = k->begin(); r != k->end(); r++) - { - if ((**r).match(token, n)) { - while (n.offset != string::npos) { - if (n.offset == 0 - || _wordDelimiter-> - match(token.substr(n.offset-1, 1))) - wordFactor += 0.5; - if (n.offset + n.length == token.length() - || _wordDelimiter->match( - token.substr(n.offset + n.length, 1))) - wordFactor += 0.5; - strengthFactor += (float)n.length/token.length() - * wordFactor; - wordFactor = 0.5; - tokenMatches++; - (**r).match(token, n.offset + n.length, n); - } - } - else { - isSubMatch = false; - break; + if ((**r).match(token, n)) { + while (n.offset != string::npos) { + if (n.offset == 0 + || _wordDelimiter-> + match(token.substr(n.offset-1, 1))) + wordFactor += 0.5; + if (n.offset + n.length == token.length() + || _wordDelimiter->match( + token.substr(n.offset + n.length, 1))) + wordFactor += 0.5; + strengthFactor += (float)n.length/token.length() + * wordFactor; + wordFactor = 0.5; + tokenMatches++; + (**r).match(token, n.offset + n.length, n); } } - if (isSubMatch) { - isMatch = true; - strengthFactor += strengthFactor/tokenMatches; - strength += strengthFactor * k->getStrength(); + else { + isSubMatch = false; + break; } - + } + if (isSubMatch) { + isMatch = true; + strengthFactor += strengthFactor/tokenMatches; + strength += strengthFactor * k->getStrength(); } } - o = m.offset + m.length; } - strength *= getExtensionFactor(url); + strength *= getExtensionFactor(hit.getUrl()); if (isMatch) hit.setStrength((int)strength); diff -r 35f18806357231523ab4d779fb52014fa5aabf86 -r fb5d7d6b7fb832dccc786b6e07f575a4bbaa6ef0 src/HTTPHit.cpp --- a/src/HTTPHit.cpp +++ b/src/HTTPHit.cpp @@ -44,6 +44,7 @@ copyHeaders(hit); _host = hit._host; _path = hit._path; + _tokens = hit._tokens; _strength = hit._strength; _dateTime = hit._dateTime; _blacklistKeywords = hit._blacklistKeywords; @@ -221,6 +222,40 @@ +vector<HTTPHit::Token> HTTPHit::getTokens() { + // Adding host and path to tokens at request. + // Not before, in case they would change. + vector<Token> ret = _tokens; + ret.push_back(Token(_host, 150)); + + string decoded; + unsigned int o = 0; + RegularExpression::Match m; + SharedPtr<RegularExpression> _splitToken + = new RegularExpression("((\\?|\\&|;).*?\\=)|\\/|(\\%2F)|#", 0, true); + + try { + ::Poco::URI::decode(_path, decoded); + } + catch (::Poco::Exception &exc) { + decoded = _path; + } + + while (o < decoded.length()) { + _splitToken->match(decoded, o, m); + if (m.offset == string::npos) + m.offset = decoded.length(); + string token = decoded.substr(o, m.offset-o); + if (token.length() > 2) + ret.push_back(Token(token, 100)); + o = m.offset + m.length; + } + + return ret; +} + + + Node::StrengthLevel HTTPHit::getStrengthLevel() { if (isMatch()) return (Node::StrengthLevel)(((_strength + 12) / 25) + 2); https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/9533905bcc12/ Changeset: 9533905bcc12 User: roggan87 Date: 2013-03-26 22:44:28 Summary: Removed _splitToken from Filter too. Affected #: 2 files diff -r fb5d7d6b7fb832dccc786b6e07f575a4bbaa6ef0 -r 9533905bcc12e37a529ccdedf96b6e51f3251040 include/Filter.h --- a/include/Filter.h +++ b/include/Filter.h @@ -84,7 +84,6 @@ private: Blacklists _blacklists; Extensions _extensions; - SharedPtr<RegularExpression> _splitToken; SharedPtr<RegularExpression> _splitExtension; SharedPtr<RegularExpression> _wordDelimiter; diff -r fb5d7d6b7fb832dccc786b6e07f575a4bbaa6ef0 -r 9533905bcc12e37a529ccdedf96b6e51f3251040 src/Filter.cpp --- a/src/Filter.cpp +++ b/src/Filter.cpp @@ -192,8 +192,6 @@ void Filter::setRegexps() { - _splitToken = - new RegularExpression("((\\?|\\&|;).*?\\=)|\\/|(\\%2F)|#", 0, true); _wordDelimiter = new RegularExpression("[\\s-_+\"']|\\.", 0, true); _splitExtension = new RegularExpression("/?(?:[^/?#]+/)+(?:[^?#]+\\.)" "([a-zA-Z0-9]{1,4})(?:$|\\?|#).*", 0, true); https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/cc3313daf508/ Changeset: cc3313daf508 User: roggan87 Date: 2013-03-26 23:06:21 Summary: Added Filter::findMatches() to eventually replace isUrlMatch(). Affected #: 2 files diff -r 9533905bcc12e37a529ccdedf96b6e51f3251040 -r cc3313daf508b5f52a1ed7fe74dd74a1b8b616fb include/Filter.h --- a/include/Filter.h +++ b/include/Filter.h @@ -77,6 +77,10 @@ /// HTTPHit. The result is writted to the hit in terms of /// strength, keywords found etc. + void findMatches(HTTPHit& hit); + /// This method runs a quick scan on the HTTPHit to sort + /// out which BlacklistKeywords that are found in it. + bool isUrlMatch(HTTPHit& hit); bool isTokenMatch(HTTPHit& hit); diff -r 9533905bcc12e37a529ccdedf96b6e51f3251040 -r cc3313daf508b5f52a1ed7fe74dd74a1b8b616fb src/Filter.cpp --- a/src/Filter.cpp +++ b/src/Filter.cpp @@ -101,6 +101,40 @@ isUrlMatch(hit); isTokenMatch(hit); // TODO: Don't count as a match unless isTokenMatch +} + + + +void Filter::findMatches(HTTPHit& hit) { + set<BlacklistKeyword> keywords; + string url = hit.getUrl(); + + // First run through all blacklists and put matches => keywords + for (Blacklists::iterator b = _blacklists.begin(); b != _blacklists.end(); b++) { + vector<BlacklistKeyword> bk = b->getKeywords(); + for (vector<BlacklistKeyword>::iterator k =bk.begin(); k != bk.end(); k++) { + RegularExpression::MatchVec matches = k->match(url); + if (matches.size() > 0) + keywords.insert(*k); + } + } + + // Next find out if the keywords survive the token test + vector<HTTPHit::Token> tokens = hit.getTokens(); + for (set<BlacklistKeyword>::iterator k = keywords.begin(); + k != keywords.end(); k++) + { + bool doPass = false; + for (vector<HTTPHit::Token>::iterator t = tokens.begin(); t != tokens.end(); t++) { + RegularExpression::MatchVec m = k->match(t->content); + if (m.size() > 0) + doPass = true; + } + if (!doPass) + keywords.erase(k); + } + + hit.setBlacklistKeywords(keywords); } https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/1c8a1fdc4073/ Changeset: 1c8a1fdc4073 User: roggan87 Date: 2013-03-26 23:20:18 Summary: Added Filter::determineStrength() to eventually replace isTokenMatch(). Affected #: 2 files diff -r cc3313daf508b5f52a1ed7fe74dd74a1b8b616fb -r 1c8a1fdc4073bfdc41b834916d8d1b0cf6e61104 include/Filter.h --- a/include/Filter.h +++ b/include/Filter.h @@ -81,6 +81,11 @@ /// This method runs a quick scan on the HTTPHit to sort /// out which BlacklistKeywords that are found in it. + void determineStrength(HTTPHit& hit); + /// Here we use the BlacklistKeywords found with findMatches() + /// and run several tests on the HTTPHit to determine how + /// strong the match is. + bool isUrlMatch(HTTPHit& hit); bool isTokenMatch(HTTPHit& hit); diff -r cc3313daf508b5f52a1ed7fe74dd74a1b8b616fb -r 1c8a1fdc4073bfdc41b834916d8d1b0cf6e61104 src/Filter.cpp --- a/src/Filter.cpp +++ b/src/Filter.cpp @@ -136,7 +136,25 @@ hit.setBlacklistKeywords(keywords); } - + + + +void Filter::determineStrength(HTTPHit& hit) { + float strength = 0; // TODO: Base strength on number of keywords #45 + vector<HTTPHit::Token> tokens = hit.getTokens(); + set<BlacklistKeyword> keywords = hit.getBlacklistKeywords(); + for (vector<HTTPHit::Token>::iterator t = tokens.begin(); t != tokens.end(); t++) { + for (set<BlacklistKeyword>::iterator k = keywords.begin(); + k != keywords.end(); k++) + { + RegularExpression::MatchVec m = k->match(t->content); + // TODO: Handle suborder #46 + } + // TODO: Handle total length of matches vs. tokenlength + } + hit.setStrength((int)strength); +} + bool Filter::isUrlMatch(HTTPHit& hit) { https://bitbucket.org/netresponsibilityteam/net-responsibility/commits/dfbfbacfa58f/ Changeset: dfbfbacfa58f User: roggan87 Date: 2013-07-05 11:17:08 Summary: Using utmpx to parse the boot history (optional) Now it's possible to use either utmp or utmpx to extract the boot history. For Windows we'll need something else. It's possible to disable either technique at build time like $ ./configure --disable-utmpx --disable-utmp This closes bug #34. Affected #: 6 files diff -r 1c8a1fdc4073bfdc41b834916d8d1b0cf6e61104 -r dfbfbacfa58f306ace1643080d6de990ca2dedd1 Makefile.am --- a/Makefile.am +++ b/Makefile.am @@ -55,11 +55,20 @@ ldFlags += -L$(top_srcdir)/lib endif +defines = -DPKGDATADIR='$(pkgdatadir)'\ + -DPKGLIBDIR='$(pkglibdir)'\ + -DDATABASEDIR='$(databasedir)'\ + -DPIDDIR='$(piddir)' + +if UTMPX_ENABLED + defines += -DUTMPX_ENABLED=1 +endif +if UTMP_ENABLED + defines += -DUTMP_ENABLED=1 +endif + cppFlags = $(incDirs)\ - -DPKGDATADIR='$(pkgdatadir)'\ - -DPKGLIBDIR='$(pkglibdir)'\ - -DDATABASEDIR='$(databasedir)'\ - -DPIDDIR='$(piddir)'\ + $(defines)\ @CPPFLAGS@ AM_CXXFLAGS = $(incDirs) @CXXFLAGS@ diff -r 1c8a1fdc4073bfdc41b834916d8d1b0cf6e61104 -r dfbfbacfa58f306ace1643080d6de990ca2dedd1 configure.ac --- a/configure.ac +++ b/configure.ac @@ -22,7 +22,7 @@ [AS_HELP_STRING( [--with-initdir=/path], [Install init script to /path.] - )], + )], [AC_SUBST([initdir], [$withval])], [AC_SUBST([initdir], ['/etc/init.d'])] ) @@ -31,7 +31,7 @@ [AS_HELP_STRING( [--with-databasedir=/path], [Save the database in /path.] - )], + )], [AC_SUBST([databasedir], [$withv... [truncated message content] |