From: <arn...@us...> - 2008-01-15 13:53:59
|
Revision: 127 http://adchpp.svn.sourceforge.net/adchpp/?rev=127&view=rev Author: arnetheduck Date: 2008-01-15 05:53:57 -0800 (Tue, 15 Jan 2008) Log Message: ----------- Rework bloom filter implementation Modified Paths: -------------- adchpp/trunk/adchpp/ClientManager.cpp adchpp/trunk/adchpp/SocketManager.cpp adchpp/trunk/adchpp/TigerHash.h adchpp/trunk/changelog.txt adchpp/trunk/plugins/Bloom/src/BloomManager.cpp adchpp/trunk/plugins/Bloom/src/HashBloom.cpp adchpp/trunk/plugins/Bloom/src/HashBloom.h adchpp/trunk/plugins/Bloom/src/HashValue.h adchpp/trunk/swig/adchpp.i Modified: adchpp/trunk/adchpp/ClientManager.cpp =================================================================== --- adchpp/trunk/adchpp/ClientManager.cpp 2008-01-06 19:18:26 UTC (rev 126) +++ adchpp/trunk/adchpp/ClientManager.cpp 2008-01-15 13:53:57 UTC (rev 127) @@ -269,9 +269,9 @@ TigerHash tiger; tiger.update(&password[0], password.size()); tiger.update(&salt[0], salt.size()); - uint8_t tmp[TigerHash::HASH_SIZE]; - Encoder::fromBase32(suppliedHash.c_str(), tmp, TigerHash::HASH_SIZE); - if (memcmp(tiger.finalize(), tmp, TigerHash::HASH_SIZE) == 0) { + uint8_t tmp[TigerHash::BYTES]; + Encoder::fromBase32(suppliedHash.c_str(), tmp, TigerHash::BYTES); + if (memcmp(tiger.finalize(), tmp, TigerHash::BYTES) == 0) { return true; } @@ -284,7 +284,7 @@ tiger2.update(c.getCID().data(), CID::SIZE); tiger2.update(&password[0], password.size()); tiger2.update(&salt[0], salt.size()); - if (memcmp(tiger2.finalize(), tmp, TigerHash::HASH_SIZE) == 0) { + if (memcmp(tiger2.finalize(), tmp, TigerHash::BYTES) == 0) { c.send(AdcCommand(AdcCommand::CMD_MSG).addParam("Your client uses an old PAS encoding, please upgrade")); return true; } Modified: adchpp/trunk/adchpp/SocketManager.cpp =================================================================== --- adchpp/trunk/adchpp/SocketManager.cpp 2008-01-06 19:18:26 UTC (rev 126) +++ adchpp/trunk/adchpp/SocketManager.cpp 2008-01-15 13:53:57 UTC (rev 127) @@ -534,12 +534,16 @@ ssize_t bytes = ::recv(ms->getSocket(), buf->data(), buf->size(), MSG_DONTWAIT); if(bytes == -1) { int error = errno; - if(error != EAGAIN && error != EINTR) { - ms->close(); - disconnect(ms, error); - return false; + if(error == EINTR) { + continue; } - break; + if(error == EAGAIN) { + break; + } + + ms->close(); + disconnect(ms, error); + return false; } else if(bytes == 0) { ms->close(); disconnect(ms, 0); @@ -557,33 +561,31 @@ return; } BufferList buffers; - while(true) { - ms->prepareWrite(buffers); - if(buffers.empty()) { - uint32_t now = GET_TICK(); - if(ms->disc || (ms->isBlocked() && ms->disc < now)) { - disconnect(ms, 0); - } - return; + ms->prepareWrite(buffers); + if(buffers.empty()) { + uint32_t now = GET_TICK(); + if(ms->disc || (ms->isBlocked() && ms->disc < now)) { + disconnect(ms, 0); } - std::vector<iovec> iov(buffers.size()); - for(size_t i = 0; i < buffers.size(); ++i) { - iov[i].iov_base = buffers[i]->data(); - iov[i].iov_len = buffers[i]->size(); - } - ssize_t bytes = ::writev(ms->getSocket(), &iov[0], iov.size()); - if(bytes == -1) { - int error = errno; - if(error == EAGAIN) { - ms->completeWrite(buffers, 0); - return; - } + return; + } + std::vector<iovec> iov(buffers.size()); + for(size_t i = 0; i < buffers.size(); ++i) { + iov[i].iov_base = buffers[i]->data(); + iov[i].iov_len = buffers[i]->size(); + } + ssize_t bytes = ::writev(ms->getSocket(), &iov[0], iov.size()); + if(bytes == -1) { + int error = errno; + if(error == EAGAIN) { + ms->setBlocked(true); + } else if(error != EINTR) { disconnect(ms, error); return; } - if(!ms->completeWrite(buffers, bytes)) { - break; - } + ms->completeWrite(buffers, 0); + } else { + ms->completeWrite(buffers, bytes); } } Modified: adchpp/trunk/adchpp/TigerHash.h =================================================================== --- adchpp/trunk/adchpp/TigerHash.h 2008-01-06 19:18:26 UTC (rev 126) +++ adchpp/trunk/adchpp/TigerHash.h 2008-01-15 13:53:57 UTC (rev 127) @@ -23,8 +23,9 @@ class TigerHash { public: - /** Hash size in bytes */ - enum { HASH_SIZE = 24 }; + /** Hash size */ + static const size_t BITS = 192; + static const size_t BYTES = BITS / 8; TigerHash() : pos(0) { res[0]=_ULL(0x0123456789ABCDEF); Modified: adchpp/trunk/changelog.txt =================================================================== --- adchpp/trunk/changelog.txt 2008-01-06 19:18:26 UTC (rev 126) +++ adchpp/trunk/changelog.txt 2008-01-15 13:53:57 UTC (rev 127) @@ -1,2 +1,7 @@ --- 2.1 06.01.2008 -- - * Initial ADC 1.0 release \ No newline at end of file +-- -- +* access.lua: fixed message type for MSG +* Some minor socket fixes +* Initial Bloom filter implementation + +-- 2.1 2008-01-06 -- +* Initial ADC 1.0 release Modified: adchpp/trunk/plugins/Bloom/src/BloomManager.cpp =================================================================== --- adchpp/trunk/plugins/Bloom/src/BloomManager.cpp 2008-01-06 19:18:26 UTC (rev 126) +++ adchpp/trunk/plugins/Bloom/src/BloomManager.cpp 2008-01-15 13:53:57 UTC (rev 127) @@ -32,6 +32,9 @@ BloomManager* BloomManager::instance = 0; const string BloomManager::className = "BloomManager"; +// TODO Make configurable +const size_t h = 24; + BloomManager::BloomManager() : searches(0), tthSearches(0), stopped(0) { LOG(className, "Starting"); ClientManager* cm = ClientManager::getInstance(); @@ -56,7 +59,7 @@ return; } - size_t k = HashBloom::get_k(n); + size_t k = HashBloom::get_k(n, h); size_t m = HashBloom::get_m(n, k); blooms.erase(c.getSID()); @@ -68,6 +71,7 @@ get.addParam("0"); get.addParam(Util::toString(m/8)); get.addParam("BK", Util::toString(k)); + get.addParam("BH", Util::toString(h)); c.send(get); } } else if(cmd.getCommand() == AdcCommand::CMD_SND) { @@ -149,7 +153,7 @@ if(v.size() == get<1>(i->second) / 8) { HashBloom& bloom = blooms[c.getSID()]; - bloom.reset(v, get<2>(i->second)); + bloom.reset(v, get<2>(i->second), h); pending.erase(i); } } Modified: adchpp/trunk/plugins/Bloom/src/HashBloom.cpp =================================================================== --- adchpp/trunk/plugins/Bloom/src/HashBloom.cpp 2008-01-06 19:18:26 UTC (rev 126) +++ adchpp/trunk/plugins/Bloom/src/HashBloom.cpp 2008-01-15 13:53:57 UTC (rev 127) @@ -1,8 +1,9 @@ #include "stdinc.h" + #include "HashBloom.h" -size_t HashBloom::get_k(size_t n) { - for(size_t k = TTHValue::SIZE/3; k > 1; --k) { +size_t HashBloom::get_k(size_t n, size_t h) { + for(size_t k = TTHValue::BITS/h; k > 1; --k) { uint64_t m = get_m(n, k); if(m >> 24 == 0) { return k; @@ -13,8 +14,8 @@ uint64_t HashBloom::get_m(size_t n, size_t k) { uint64_t m = (static_cast<uint64_t>(ceil(static_cast<double>(n) * k / log(2.)))); - // 64-bit boundary allows us to use a bitset based on uint64_t's - return ((m / 64) + 1) * 64; + // 64-bit boundary as per spec + return ((m + 63 )/ 64) * 64; } void HashBloom::add(const TTHValue& tth) { @@ -39,8 +40,9 @@ bloom.push_back(v); } -void HashBloom::reset(ByteVector& v, size_t k_) { +void HashBloom::reset(ByteVector& v, size_t k_, size_t h_) { k = k_; + h = h_; bloom.resize(v.size() * 8); for(size_t i = 0; i < v.size(); ++i) { @@ -51,12 +53,22 @@ } size_t HashBloom::pos(const TTHValue& tth, size_t n) const { - uint32_t x = 0; - for(size_t i = n*3; i < TTHValue::SIZE; i += 3*k) { - x ^= static_cast<uint32_t>(tth.data[i]) << 2*8; - x ^= static_cast<uint32_t>(tth.data[i+1]) << 8; - x ^= static_cast<uint32_t>(tth.data[i+2]); + uint64_t x = 0; + + size_t start = n * h; + if((n+1)*h > TTHValue::BITS) { + return 0; } + + for(size_t i = 0; i < h; ++i) { + size_t bit = start + i; + size_t byte = bit / 8; + size_t pos = bit % 8; + + if(tth.data[byte] & (1 << pos)) { + x |= (1 << i); + } + } return x % bloom.size(); } Modified: adchpp/trunk/plugins/Bloom/src/HashBloom.h =================================================================== --- adchpp/trunk/plugins/Bloom/src/HashBloom.h 2008-01-06 19:18:26 UTC (rev 126) +++ adchpp/trunk/plugins/Bloom/src/HashBloom.h 2008-01-15 13:53:57 UTC (rev 127) @@ -15,14 +15,16 @@ */ class HashBloom { public: + HashBloom() : k(0), h(0) { } + /** Return a suitable value for k based on n */ - static size_t get_k(size_t n); + static size_t get_k(size_t n, size_t h); /** Optimal number of bits to allocate for n elements when using k hashes */ static uint64_t get_m(size_t n, size_t k); void add(const TTHValue& tth); bool match(const TTHValue& tth) const; - void reset(ByteVector& v, size_t k); + void reset(ByteVector& v, size_t k, size_t h); void push_back(bool v); size_t size() const { return bloom.size(); } @@ -32,6 +34,7 @@ std::vector<bool> bloom; size_t k; + size_t h; }; #endif /*HASHBLOOM_H_*/ Modified: adchpp/trunk/plugins/Bloom/src/HashValue.h =================================================================== --- adchpp/trunk/plugins/Bloom/src/HashValue.h 2008-01-06 19:18:26 UTC (rev 126) +++ adchpp/trunk/plugins/Bloom/src/HashValue.h 2008-01-15 13:53:57 UTC (rev 127) @@ -24,21 +24,22 @@ template<class Hasher> struct HashValue { - static const size_t SIZE = Hasher::HASH_SIZE; + static const size_t BITS = Hasher::BITS; + static const size_t BYTES = Hasher::BYTES; HashValue() { } - explicit HashValue(uint8_t* aData) { memcpy(data, aData, SIZE); } - explicit HashValue(const std::string& base32) { Encoder::fromBase32(base32.c_str(), data, SIZE); } - HashValue(const HashValue& rhs) { memcpy(data, rhs.data, SIZE); } - HashValue& operator=(const HashValue& rhs) { memcpy(data, rhs.data, SIZE); return *this; } + explicit HashValue(uint8_t* aData) { memcpy(data, aData, BYTES); } + explicit HashValue(const std::string& base32) { Encoder::fromBase32(base32.c_str(), data, BYTES); } + HashValue(const HashValue& rhs) { memcpy(data, rhs.data, BYTES); } + HashValue& operator=(const HashValue& rhs) { memcpy(data, rhs.data, BYTES); return *this; } bool operator!=(const HashValue& rhs) const { return !(*this == rhs); } - bool operator==(const HashValue& rhs) const { return memcmp(data, rhs.data, SIZE) == 0; } - bool operator<(const HashValue& rhs) const { return memcmp(data, rhs.data, SIZE) < 0; } + bool operator==(const HashValue& rhs) const { return memcmp(data, rhs.data, BYTES) == 0; } + bool operator<(const HashValue& rhs) const { return memcmp(data, rhs.data, BYTES) < 0; } - std::string toBase32() const { return Encoder::toBase32(data, SIZE); } - std::string& toBase32(std::string& tmp) const { return Encoder::toBase32(data, SIZE, tmp); } + std::string toBase32() const { return Encoder::toBase32(data, BYTES); } + std::string& toBase32(std::string& tmp) const { return Encoder::toBase32(data, BYTES, tmp); } - uint8_t data[SIZE]; + uint8_t data[BYTES]; }; namespace std { namespace tr1 { Modified: adchpp/trunk/swig/adchpp.i =================================================================== --- adchpp/trunk/swig/adchpp.i 2008-01-06 19:18:26 UTC (rev 126) +++ adchpp/trunk/swig/adchpp.i 2008-01-15 13:53:57 UTC (rev 127) @@ -680,7 +680,7 @@ class TigerHash { public: /** Hash size in bytes */ - enum { HASH_SIZE = 24 }; + enum { BITS = 192, BYTES = BITS / 8 }; // Keep old name for a while TigerHash(); @@ -689,7 +689,7 @@ self->update(data.data(), data.size()); } std::string finalize() { - return std::string(reinterpret_cast<const char*>(self->finalize()), TigerHash::HASH_SIZE); + return std::string(reinterpret_cast<const char*>(self->finalize()), TigerHash::BYTES); } } }; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |