From: <arn...@us...> - 2007-12-17 23:13:38
|
Revision: 106 http://adchpp.svn.sourceforge.net/adchpp/?rev=106&view=rev Author: arnetheduck Date: 2007-12-17 15:13:35 -0800 (Mon, 17 Dec 2007) Log Message: ----------- Update bloom filter to use multiple hashes, fix access script Modified Paths: -------------- adchpp/trunk/adchpp/CID.h adchpp/trunk/adchpp/ClientManager.cpp adchpp/trunk/adchpp/ClientManager.h adchpp/trunk/plugins/Bloom/src/BloomManager.cpp adchpp/trunk/plugins/Bloom/src/BloomManager.h adchpp/trunk/plugins/Bloom/src/HashBloom.h adchpp/trunk/plugins/Bloom/src/HashValue.h adchpp/trunk/plugins/Script/examples/access.lua Added Paths: ----------- adchpp/trunk/plugins/Bloom/src/HashBloom.cpp Modified: adchpp/trunk/adchpp/CID.h =================================================================== --- adchpp/trunk/adchpp/CID.h 2007-12-14 15:16:58 UTC (rev 105) +++ adchpp/trunk/adchpp/CID.h 2007-12-17 23:13:35 UTC (rev 106) @@ -29,10 +29,6 @@ enum { SIZE = 192 / 8 }; enum { BASE32_SIZE = 39 }; - struct Hash { - size_t operator()(const CID& c) const { return c.toHash(); } - bool operator()(const CID& a, const CID& b) const { return a < b; } - }; CID() { memset(cid, 0, sizeof(cid)); } explicit CID(const uint8_t* data) { memcpy(cid, data, sizeof(cid)); } explicit CID(const std::string& base32) { Encoder::fromBase32(base32.c_str(), cid, sizeof(cid)); } @@ -62,4 +58,14 @@ } +namespace std { namespace tr1 { +template<> +struct hash<adchpp::CID> { + size_t operator()(const adchpp::CID& rhs) const { + return *reinterpret_cast<const size_t*>(rhs.data()); + } +}; +} +} + #endif Modified: adchpp/trunk/adchpp/ClientManager.cpp =================================================================== --- adchpp/trunk/adchpp/ClientManager.cpp 2007-12-14 15:16:58 UTC (rev 105) +++ adchpp/trunk/adchpp/ClientManager.cpp 2007-12-17 23:13:35 UTC (rev 106) @@ -431,10 +431,10 @@ setState(c, Client::STATE_IDENTIFY); } -vector<uint8_t> ClientManager::enterVerify(Client& c, bool sendData) throw() { +ByteVector ClientManager::enterVerify(Client& c, bool sendData) throw() { dcassert(c.getState() == Client::STATE_IDENTIFY); dcdebug("%s entering VERIFY\n", AdcCommand::fromSID(c.getSID()).c_str()); - vector<uint8_t> challenge; + ByteVector challenge; if (sendData) { for (int i = 0; i < 32/4; ++i) { uint32_t r = Util::rand(); Modified: adchpp/trunk/adchpp/ClientManager.h =================================================================== --- adchpp/trunk/adchpp/ClientManager.h 2007-12-14 15:16:58 UTC (rev 105) +++ adchpp/trunk/adchpp/ClientManager.h 2007-12-17 23:13:35 UTC (rev 106) @@ -176,7 +176,7 @@ ClientMap clients; typedef std::tr1::unordered_map<std::string, uint32_t> NickMap; NickMap nicks; - typedef std::tr1::unordered_map<CID, uint32_t, CID::Hash> CIDMap; + typedef std::tr1::unordered_map<CID, uint32_t> CIDMap; CIDMap cids; typedef std::tr1::unordered_set<uint32_t> SIDSet; SIDSet sids; Modified: adchpp/trunk/plugins/Bloom/src/BloomManager.cpp =================================================================== --- adchpp/trunk/plugins/Bloom/src/BloomManager.cpp 2007-12-14 15:16:58 UTC (rev 105) +++ adchpp/trunk/plugins/Bloom/src/BloomManager.cpp 2007-12-17 23:13:35 UTC (rev 106) @@ -20,9 +20,13 @@ #include "BloomManager.h" #include <adchpp/LogManager.h> +#include <adchpp/Client.h> +#include <adchpp/AdcCommand.h> +#include <adchpp/Util.h> using namespace std; using namespace std::tr1::placeholders; +using namespace adchpp; BloomManager* BloomManager::instance = 0; const string BloomManager::className = "BloomManager"; @@ -38,18 +42,33 @@ LOG(className, "Shutting down"); } -static const std::string FEATURE = "BLOM"; +static const std::string FEATURE = "BLO0"; void BloomManager::onReceive(Client& c, AdcCommand& cmd, int& override) { - std::string tth; + string tmp; if(cmd.getCommand() == AdcCommand::CMD_INF && c.supports(FEATURE)) { - AdcCommand get(AdcCommand::CMD_GET); - get.addParam("blom"); - get.addParam("/"); - get.addParam("0"); - get.addParam("-1"); - c.send(get); + if(cmd.getParam("SF", 0, tmp)) { + size_t n = adchpp::Util::toInt(tmp); + if(n == 0) { + return; + } + + size_t k = HashBloom::get_k(n); + size_t m = HashBloom::get_m(n, k); + + HashBloom bloom = blooms[c.getCID()]; + + bloom.reset(k); + + AdcCommand get(AdcCommand::CMD_GET); + get.addParam("blom"); + get.addParam("/"); + get.addParam("0"); + get.addParam(Util::toString(m/8)); + get.addParam("BK", Util::toString(k)); + c.send(get); + } } else if(cmd.getCommand() == AdcCommand::CMD_SND) { if(cmd.getParameters().size() < 4) { return; @@ -58,14 +77,15 @@ return; } - int64_t bytes = Util::toInt(cmd.getParam(4)); + int64_t bytes = Util::toInt(cmd.getParam(3)); c.setDataMode(std::tr1::bind(&BloomManager::onData, this, _1, _2, _3), bytes); - } else if(cmd.getCommand() == AdcCommand::CMD_SCH && cmd.getParam("TH", 0, tth)) { - + override |= ClientManager::DONT_DISPATCH | ClientManager::DONT_SEND; + } else if(cmd.getCommand() == AdcCommand::CMD_SCH && cmd.getParam("TR", 0, tmp)) { BloomMap::const_iterator i = blooms.find(c.getCID()); - if(i != blooms.end() && i->second.match(TTHValue(tth))) { + if(i != blooms.end() && !i->second.match(TTHValue(tmp))) { // Stop it + dcdebug("Stopping search\n"); override |= ClientManager::DONT_DISPATCH | ClientManager::DONT_SEND; } } @@ -83,4 +103,3 @@ void BloomManager::onDisconnected(Client& c) { blooms.erase(c.getCID()); } - Modified: adchpp/trunk/plugins/Bloom/src/BloomManager.h =================================================================== --- adchpp/trunk/plugins/Bloom/src/BloomManager.h 2007-12-14 15:16:58 UTC (rev 105) +++ adchpp/trunk/plugins/Bloom/src/BloomManager.h 2007-12-17 23:13:35 UTC (rev 106) @@ -64,7 +64,7 @@ friend class Singleton<BloomManager>; static BloomManager* instance; - typedef std::tr1::unordered_map<CID, HashBloom, CID::Hash> BloomMap; + typedef std::tr1::unordered_map<CID, HashBloom> BloomMap; BloomMap blooms; ClientManager::SignalReceive::ManagedConnection receiveConn; Added: adchpp/trunk/plugins/Bloom/src/HashBloom.cpp =================================================================== --- adchpp/trunk/plugins/Bloom/src/HashBloom.cpp (rev 0) +++ adchpp/trunk/plugins/Bloom/src/HashBloom.cpp 2007-12-17 23:13:35 UTC (rev 106) @@ -0,0 +1,54 @@ +#include "stdinc.h" +#include "HashBloom.h" + + +size_t HashBloom::get_k(size_t n) { + const size_t bits = TTHValue::SIZE * 8; + for(size_t k = static_cast<size_t>(sqrt(bits)); k > 1; --k) { + // We only want the k's where the bits will end up on a byte boundary to ease hash implementation + if((bits % k) == 0 && (bits / k) % 8 == 0) { + uint64_t m = get_m(n, k); + if(m >> (TTHValue::SIZE * 8 / k) == 0) { + return k; + } + } + } + return 1; +} + +uint64_t HashBloom::get_m(size_t n, size_t k) { + uint64_t m = (static_cast<uint64_t>(ceil(static_cast<double>(n) * k / log(2.)))); + return ((m / 8) + 1) * 8; +} + +void HashBloom::add(const TTHValue& tth) { + for(size_t i = 0; i < k; ++i) { + bloom[pos(tth, i)] = true; + } +} + +bool HashBloom::match(const TTHValue& tth) const { + if(bloom.empty()) { + return true; + } + for(size_t i = 0; i < k; ++i) { + if(!bloom[pos(tth, i)]) { + return false; + } + } + return true; +} + +void HashBloom::push_back(bool v) { + bloom.push_back(v); +} + +void HashBloom::reset(size_t k_) { + bloom.resize(0); + k = k; +} + +size_t HashBloom::pos(const TTHValue& tth, size_t n) const { + return (*(size_t*)(tth.data + (TTHValue::SIZE / k) * n)) % bloom.size(); +} + Modified: adchpp/trunk/plugins/Bloom/src/HashBloom.h =================================================================== --- adchpp/trunk/plugins/Bloom/src/HashBloom.h 2007-12-14 15:16:58 UTC (rev 105) +++ adchpp/trunk/plugins/Bloom/src/HashBloom.h 2007-12-17 23:13:35 UTC (rev 106) @@ -3,19 +3,33 @@ #include "HashValue.h" +/** + * According to http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf + * the optimal number of hashes k is (m/n)*ln(2), m = number of bits in the filter and n = number + * of items added. The largest k that we can get from a single TTH value depends on the number of + * bits we need to address the bloom structure, which in turn depends on m, so the optimal size + * for our filter is m = n * k / ln(2) where n is the number of TTH values, or in our case, number of + * files in share since each file is identified by one TTH value. We try that for each even dividend + * of the key size (2, 3, 4, 6, 8, 12) and if m fits within the bits we're able to address (2^(keysize/k)), + * we can use that value when requesting the bloom filter. + */ class HashBloom { public: - void add(const TTHValue& tth) { bloom[pos(tth)] = true; } - bool match(const TTHValue& tth) const { return bloom[pos(tth)]; } - void resize(size_t hashes) { bloom.resize(hashes); std::fill(bloom.begin(), bloom.end(), false); } - void push_back(bool v) { bloom.push_back(v); } + /** Return the largest k such that get_m returns a value smaller than 2^(TTHValue::SIZE/k) */ + static size_t get_k(size_t n); + /** Optimal number of bits to allocate for n elements when using k hashes */ + static uint64_t get_m(size_t n, size_t k); + + void add(const TTHValue& tth); + bool match(const TTHValue& tth) const; + void reset(size_t k); + void push_back(bool v); private: - size_t pos(const TTHValue& tth) const { - return (*(size_t*)tth.data) % bloom.size(); - } + size_t pos(const TTHValue& tth, size_t n) const; std::vector<bool> bloom; + size_t k; }; #endif /*HASHBLOOM_H_*/ Modified: adchpp/trunk/plugins/Bloom/src/HashValue.h =================================================================== --- adchpp/trunk/plugins/Bloom/src/HashValue.h 2007-12-14 15:16:58 UTC (rev 105) +++ adchpp/trunk/plugins/Bloom/src/HashValue.h 2007-12-17 23:13:35 UTC (rev 106) @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001-2006 Jacek Sieka, arnetheduck on gmail point com + * Copyright (C) 2001-2007 Jacek Sieka, arnetheduck on gmail point com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,24 +16,16 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#if !defined(HASH_VALUE_H) -#define HASH_VALUE_H +#ifndef BLOOM_HASH_VALUE_H +#define BLOOM_HASH_VALUE_H #include <adchpp/TigerHash.h> +#include <adchpp/Encoder.h> template<class Hasher> struct HashValue { static const size_t SIZE = Hasher::HASH_SIZE; - struct Hash { -#ifdef _MSC_VER - static const size_t bucket_size = 4; - static const size_t min_buckets = 8; -#endif - size_t operator()(const HashValue& rhs) const { return *(size_t*)&rhs; } - bool operator()(const HashValue& a, const HashValue& b) const { return a < b; } - }; - HashValue() { } explicit HashValue(uint8_t* aData) { memcpy(data, aData, SIZE); } explicit HashValue(const std::string& base32) { Encoder::fromBase32(base32.c_str(), data, SIZE); } @@ -49,6 +41,15 @@ uint8_t data[SIZE]; }; +namespace std { namespace tr1 { +template<> +template<typename T> +struct hash<HashValue<T> > { + size_t operator()(const HashValue<T>& rhs) const { return *(size_t*)rhs.data; } +}; +} +} + typedef HashValue<TigerHash> TTHValue; #endif // !defined(HASH_VALUE_H) Modified: adchpp/trunk/plugins/Script/examples/access.lua =================================================================== --- adchpp/trunk/plugins/Script/examples/access.lua 2007-12-14 15:16:58 UTC (rev 105) +++ adchpp/trunk/plugins/Script/examples/access.lua 2007-12-17 23:13:35 UTC (rev 106) @@ -355,7 +355,7 @@ reply(c, "Welcome back") cm:enterNormal(c, true, true) - if user.level > 1 and (c:supports("UCMD") or c:supports("UCM0") then + if user.level > 1 and (c:supports("UCMD") or c:supports("UCM0")) then for k, v in pairs(user_commands) do ucmd = adchpp.AdcCommand(adchpp.AdcCommand_CMD_CMD, adchpp.AdcCommand_TYPE_INFO, adchpp.AdcCommand_HUB_SID) ucmd:addParam(k) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |