Thread: [Assorted-commits] SF.net SVN: assorted:[1166] ydb/trunk/src
Brought to you by:
yangzhang
From: <yan...@us...> - 2009-02-06 00:32:37
|
Revision: 1166 http://assorted.svn.sourceforge.net/assorted/?rev=1166&view=rev Author: yangzhang Date: 2009-02-06 00:32:30 +0000 (Fri, 06 Feb 2009) Log Message: ----------- - cleaned up the makefile a bit - cleaned up and added some more info to the thread profiling - added TxnBatch - added serperf benchmark Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ydb.proto Added Paths: ----------- ydb/trunk/src/serperf.cc Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-02-05 09:41:13 UTC (rev 1165) +++ ydb/trunk/src/Makefile 2009-02-06 00:32:30 UTC (rev 1166) @@ -25,28 +25,38 @@ ifneq ($(GCOV),) GCOV := -fprofile-arcs -ftest-coverage endif -LDFLAGS := -pthread -lstx -lst -lresolv -lprotobuf -lgtest \ +ifneq ($(PPROF),) + PPROF := -lprofiler +endif +ifneq ($(OPT),) + OPT := -O3 +else + OPT := -g3 +endif +CXX := $(WTF) $(CXX) +LDFLAGS := -pthread $(GPROF) +LDLIBS := -lstx -lst -lresolv -lprotobuf -lgtest \ -lboost_program_options-gcc43-mt -lboost_thread-gcc43-mt \ - -lboost_serialization-gcc43-mt $(GPROF) + -lboost_serialization-gcc43-mt $(PPROF) # The -Wno- warnings are for boost. -CXXFLAGS := -g3 -pthread $(GPROF) -Wall -Werror -Wextra -Woverloaded-virtual \ +CXXFLAGS := $(OPT) -pthread $(GPROF) -Wall -Werror -Wextra -Woverloaded-virtual \ -Wconversion -Wno-conversion -Wno-ignored-qualifiers \ -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings \ -Winit-self -Wsign-promo -Wno-unused-parameter -Wc++0x-compat \ -Wparentheses -Wmissing-format-attribute -Wfloat-equal \ - -Winline -Wsynth -PBCXXFLAGS := -g3 -Wall -Werror $(GPROF) + -Winline -Wsynth $(CXXFLAGS) +PBCXXFLAGS := $(OPT) -Wall -Werror $(GPROF) all: $(TARGET) $(TARGET): $(OBJS) - $(CXX) -o $@ $^ $(LDFLAGS) + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ %.o: %.cc $(PBHDRS) - $(WTF) $(CXX) $(CXXFLAGS) -c -o $@ $< + $(COMPILE.cc) $(OUTPUT_OPTION) $< %.o: %.pb.cc %.pb.h - $(WTF) $(CXX) $(PBCXXFLAGS) -c -o $@ $< + $(CXX) -c $(PBCXXFLAGS) $(OUTPUT_OPTION) $< %.cc: %.lzz lzz -hx hh -sx cc -hl -sl -hd -sd $< @@ -72,3 +82,10 @@ .PHONY: clean .SECONDARY: $(SRCS) $(HDRS) $(OBJS) main.lzz + +### + +serperf: serperf.o ydb.o + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ + +# serperf.cc ydb.pb.h \ No newline at end of file Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-02-05 09:41:13 UTC (rev 1165) +++ ydb/trunk/src/main.lzz.clamp 2009-02-06 00:32:30 UTC (rev 1166) @@ -1125,7 +1125,6 @@ } else if (sig == SIGUSR1) { toggle(do_pause); } - //break; } } @@ -1291,24 +1290,29 @@ my_spawn(memmon, "memmon"); } + long long start = thread_start_time = current_time_millis(); // At the end, print thread profiling information. finally f(lambda() { if (profile_threads) { + long long end = current_time_millis(); + long long all = end - __ref(start); cout << "thread profiling results:" << endl; - long long total; + long long total = 0; typedef pair<st_thread_t, long long> entry; foreach (entry p, threadtimes) { - const string &name = threadname(p.first); - if (name != "main" && name != "handle_sig_sync") - total += p.second; + total += p.second; } + cout << "total " << total << " all " << all << endl; foreach (entry p, threadtimes) { - const string &name = threadname(p.first); - if (name != "main" && name != "handle_sig_sync") - cout << "- " << threadname(p.first) << ": " << p.second - << " (" << (static_cast<double>(p.second) / total) << "%)" - << endl; + cout << "- " << threadname(p.first) << ": " << p.second << " ms (" + << (static_cast<double>(p.second) / total) << "% of total, " + << (static_cast<double>(p.second) / all) << "% of all)" << endl; } + cout << "- total: " << total << " ms (" << double(total) / all + << "% of all)" << endl; + cout << "- unaccounted: " << all - total << " ms (" + << double(all - total) / all << "% of all)" << endl; + cout << "- all: " << all << " ms" << endl; } }); Added: ydb/trunk/src/serperf.cc =================================================================== --- ydb/trunk/src/serperf.cc (rev 0) +++ ydb/trunk/src/serperf.cc 2009-02-06 00:32:30 UTC (rev 1166) @@ -0,0 +1,80 @@ +#include <iostream> +#include <sstream> +#include <commons/time.h> +#include "ydb.pb.h" +#include <boost/archive/binary_oarchive.hpp> + +using namespace boost::archive; +using namespace std; +using namespace commons; + +int main(int argc, char **argv) { + const int count = atoi(argv[1]), batchsize = atoi(argv[2]); + + TxnBatch batch; + for (int i = 0; i < batchsize; ++i) { + Txn &txn = *batch.add_txn(); + txn.set_seqno(i); + for (int j = 0; j < 5; j++) { + Op *op = txn.add_op(); + op->set_key(j); + op->set_value(-j); + op->set_type(Op::read); + } + } + + { + long long start = current_time_millis(); + for (int i = 0; i < count; ++i) { + stringstream ss; + batch.SerializeToOstream(&ss); + } + long long time = current_time_millis() - start; + double tps = 100 * static_cast<double>(count * batchsize) / time; + cout << "protobuf: " << time << " ms, " << tps << " tps" << endl; + } + + { + long long start = current_time_millis(); + for (int i = 0; i < count; ++i) { + stringbuf sb; + binary_oarchive oa(sb); + for (int j = 0; j < batchsize; ++j) { + const Txn &txn = batch.txn(j); + int seqno = txn.seqno(); + oa << seqno; + for (int k = 0; k < 5; ++k) { + const Op &op = txn.op(k); + int key = op.key(), value = op.value(), type = op.value(); + oa << key << value << type; + } + } + } + long long time = current_time_millis() - start; + double tps = 100 * static_cast<double>(count * batchsize) / time; + cout << "boost: " << time << " ms, " << tps << " tps" << endl; + } + + { + long long start = current_time_millis(); + for (int i = 0; i < count; ++i) { + stringbuf sb; + for (int j = 0; j < batchsize; ++j) { + const Txn &txn = batch.txn(j); +#define write(x) { typeof(x) __x = x; sb.sputn((char*)(&__x), sizeof __x); } + write(txn.seqno()); + for (int k = 0; k < 5; ++k) { + const Op &op = txn.op(k); + write(op.key()); + write(op.value()); + write(op.type()); + } + } + } + long long time = current_time_millis() - start; + double tps = 100 * static_cast<double>(count * batchsize) / time; + cout << "streambuf.sputn: " << time << " ms, " << tps << " tps" << endl; + } + + return 0; +} Modified: ydb/trunk/src/ydb.proto =================================================================== --- ydb/trunk/src/ydb.proto 2009-02-05 09:41:13 UTC (rev 1165) +++ ydb/trunk/src/ydb.proto 2009-02-06 00:32:30 UTC (rev 1166) @@ -75,3 +75,7 @@ // into action. message Ready { } + +message TxnBatch { + repeated Txn txn = 1; +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-13 20:57:10
|
Revision: 1178 http://assorted.svn.sourceforge.net/assorted/?rev=1178&view=rev Author: yangzhang Date: 2009-02-13 20:57:05 +0000 (Fri, 13 Feb 2009) Log Message: ----------- - added p2 - added ResponseBatch - added gch rules (but not yet incorporated into main build) - moved to g++0x - lifted break_exception - removed bcastmsg_fake - changed bcastmsg_async to queue up (dst,msg) pairs (rather than rely on a cached version of the dsts vector) - changed the core readmsg calls to use st_reader - replaced bcast_async macros with fn ptrs - reworked process_txn interface - added leader summary as well - tolerate EINTR on joiner accept - added --fake-bcast, --bcast-async Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ydb.proto Added Paths: ----------- ydb/trunk/src/p2.cc Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-02-13 20:54:46 UTC (rev 1177) +++ ydb/trunk/src/Makefile 2009-02-13 20:57:05 UTC (rev 1178) @@ -44,7 +44,7 @@ -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings \ -Winit-self -Wsign-promo -Wno-unused-parameter -Wc++0x-compat \ -Wparentheses -Wmissing-format-attribute -Wfloat-equal \ - -Wno-inline -Wsynth $(CXXFLAGS) + -Wno-inline -Wsynth -std=gnu++0x $(CXXFLAGS) PBCXXFLAGS := $(OPT) -Wall -Werror $(GPROF) all: $(TARGET) @@ -73,6 +73,12 @@ %.lzz: %.lzz.clamp clamp < $< | sed "`echo -e '1i#src\n1a#end'`" > $@ +all.h: + fgrep '#include' main.lzz.clamp > all.h + +all.h.gch: all.h + $(COMPILE.cc) $(PBHDRS) $(OUTPUT_OPTION) $< + clean: rm -f $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) main.lzz *.clamp_h serperf @@ -89,6 +95,9 @@ ### serperf: serperf.o ydb.o - $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) $(OUTPUT_OPTION) # serperf.cc ydb.pb.h + +p2: p2.cc + $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) $(OUTPUT_OPTION) Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-02-13 20:54:46 UTC (rev 1177) +++ ydb/trunk/src/main.lzz.clamp 2009-02-13 20:57:05 UTC (rev 1178) @@ -29,6 +29,8 @@ #include <vector> #include "ydb.pb.h" #define foreach BOOST_FOREACH +#define shared_ptr boost::shared_ptr +#define ref boost::ref using namespace boost; using namespace boost::archive; using namespace commons; @@ -55,7 +57,7 @@ bool verbose, yield_during_build_up, yield_during_catch_up, dump, show_updates, count_updates, stop_on_recovery, general_txns, profile_threads, debug_threads, multirecover, disk, debug_memory, use_wal, - suppress_txn_msgs; + suppress_txn_msgs, use_bcast_async, fake_bcast; long long timelim, read_thresh, write_thresh; // Control. @@ -68,13 +70,14 @@ /** * Convenience function for calculating percentages. */ -double -pct(double sub, double tot) -{ - return 100 * sub / tot; -} +double pct(double sub, double tot) { return 100 * sub / tot; } /** + * Convenience class for performing long-jumping break. + */ +class break_exception : public std::exception {}; + +/** * The list of all threads. Keep track of these so that we may cleanly shut * down all threads. */ @@ -241,81 +244,30 @@ const vector<st_netfd_t> &rs_; }; -/** - * XXX - */ -template<typename T> -void -bcastmsg_fake(const vector<st_netfd_t> &dsts, const T & msg) -{ - // Serialize message to a buffer. - string s; - check(msg.SerializeToString(&s)); - const char *buf = s.c_str(); +st_channel<pair<st_netfd_t, shared_ptr<string> > > msgs; - if (s.size() > 1000000) - cout << "sending large message to " << dsts.size() << " dsts, size = " - << s.size() << " bytes" << endl; - - // Prefix the message with a four-byte length. - uint32_t len = htonl(static_cast<uint32_t>(s.size())); - - // Broadcast the length-prefixed message to replicas. - int dstno = 0; - foreach (st_netfd_t dst, dsts) { - size_t resid = sizeof len; -#define checksize(x,y) checkeqnneg(x, static_cast<ssize_t>(y)) - int res = true ? 0 : st_write_resid(dst, static_cast<void*>(&len), &resid, timeout); - long long before_write = -1; - if (write_thresh > 0) { - before_write = current_time_millis(); - } - if (res == -1 && errno == ETIME) { - checksize(st_write(dst, - reinterpret_cast<char*>(&len) + sizeof len - resid, - resid, - ST_UTIME_NO_TIMEOUT), - resid); - } else { - check0x(res); - } - if (write_thresh > 0) { - long long write_time = current_time_millis() - before_write; - if (write_time > write_thresh) { - cout << "thread " << threadname() - << ": write to dst #" << dstno - << " took " << write_time << " ms" << endl; - } - } - if (false) - checksize(st_write(dst, buf, s.size(), ST_UTIME_NO_TIMEOUT), - s.size()); - ++dstno; - } -} - -st_channel<shared_ptr<string> > msgs; - -const vector<st_netfd_t> *gdsts; - /** - * XXX + * The worker that performs the actual broadcasting. */ void bcaster() { int counter = 0; while (!kill_hub) { - shared_ptr<string> p; + pair<st_netfd_t, shared_ptr<string> > pr; { st_intr intr(kill_hub); - p = msgs.take(); + pr = msgs.take(); } + st_netfd_t dst = pr.first; + shared_ptr<string> &p = pr.second; if (p.get() == nullptr) break; string &s = *p.get(); int dstno = 0; - foreach (st_netfd_t dst, *gdsts) { + // XXX + // foreach (st_netfd_t dst, *gdsts) { + if (!fake_bcast) { long long before_write = -1; if (write_thresh > 0) { before_write = current_time_millis(); @@ -330,7 +282,8 @@ cout << "thread " << threadname() << ": write #" << counter << " of size " << s.size() - << " bytes to dst #" << dstno + //<< " bytes to dst #" << dstno + << " bytes" << " took " << write_time << " ms" << endl; } } @@ -341,14 +294,12 @@ } /** - * XXX + * Asynchronous version of the broadcaster. */ template<typename T> void bcastmsg_async(const vector<st_netfd_t> &dsts, const T & msg) { - gdsts = &dsts; - // Serialize message to a buffer. uint32_t len; shared_ptr<string> p(new string(sizeof len, '\0')); @@ -362,13 +313,11 @@ // Prefix the message with a four-byte length. len = htonl(static_cast<uint32_t>(s.size() - sizeof len)); char *plen = reinterpret_cast<char*>(&len); - for (size_t i = 0; i < sizeof len; ++i) - s[i] = plen[i]; + copy(plen, plen + sizeof len, s.begin()); - msgs.push(p); + foreach (st_netfd_t dst, dsts) msgs.push(make_pair(dst, p)); } - /** * Send a message to some destinations (sequentially). */ @@ -432,6 +381,14 @@ bcastmsg(dsts, msg); } +template<typename T> +void +sendmsg_async(st_netfd_t dst, const T &msg) +{ + vector<st_netfd_t> dsts(1, dst); + bcastmsg_async(dsts, msg); +} + /** * Read a message. This is done in two steps: first by reading the length * prefix, then by reading the actual body. This function also provides a way @@ -476,7 +433,7 @@ char buf[len]; GETMSG(buf); } else { - cout << "receiving large msg; heap-allocating " << len << " bytes" << endl; + //cout << "receiving large msg; heap-allocating " << len << " bytes" << endl; scoped_array<char> buf(new char[len]); GETMSG(buf.get()); } @@ -499,6 +456,19 @@ } /** + * Same as the above readmsg() but uses an st_reader instead of a raw + * st_netfd_t. + */ +template <typename T> +void +readmsg(st_reader &src, T & msg) +{ + array_view<char> a = src.read(sizeof(uint32_t)); + uint32_t len = ntohl(*reinterpret_cast<const uint32_t*>(a.get())); + check(msg.ParseFromArray(src.read(len), len)); +} + +/** * ARIES write-ahead log. No undo logging necessary (no steal). */ class wal @@ -527,6 +497,15 @@ mii g_map; wal *g_wal; +// Function pointer types. +typedef void (*bcasttxn_t)(const vector<st_netfd_t> &dsts, const TxnBatch &msg); +bcasttxn_t bcasttxn_async = bcastmsg_async<TxnBatch>; +bcasttxn_t bcasttxn_sync = bcastmsg<TxnBatch>; + +typedef void (*sendres_t)(st_netfd_t dst, const ResponseBatch &msg); +sendres_t sendres_async = sendmsg_async<ResponseBatch>; +sendres_t sendres_sync = sendmsg<ResponseBatch>; + /** * Keep issuing transactions to the replicas. */ @@ -534,16 +513,16 @@ issue_txns(st_channel<replica_info> &newreps, int &seqno, st_bool &accept_joiner) { -#define bcastmsg bcastmsg_async + bcasttxn_t bcast = use_bcast_async ? bcasttxn_async : bcasttxn_sync; + st_thread_t bcaster_thread = bcast == bcasttxn_async ? + my_spawn(bcaster, "bcaster") : nullptr; + Op_OpType types[] = {Op::read, Op::write, Op::del}; vector<st_netfd_t> fds; long long start_time = current_time_millis(); -#if bcastmsg == bcastmsg_async - st_joining join_bcaster(my_spawn(bcaster, "bcaster")); -#endif - finally f(lambda () { + if (__ref(bcaster_thread) != nullptr) st_join(__ref(bcaster_thread)); showtput("issued", current_time_millis(), __ref(start_time), __ref(seqno), 0); }); @@ -554,7 +533,7 @@ // empty/default Txn). if (!newreps.empty() && seqno > 0) { if (multirecover) { - bcastmsg(fds, TxnBatch()); + bcast(fds, TxnBatch()); } else { sendmsg(fds[0], TxnBatch()); } @@ -583,7 +562,7 @@ // Process immediately if not bcasting. if (fds.empty()) { --seqno; - process_txn(nullptr, g_map, txn, seqno, true); + process_txn(g_map, txn, seqno, nullptr); } ++seqno; @@ -619,7 +598,7 @@ // Broadcast. if (!fds.empty() && !suppress_txn_msgs) - bcastmsg(fds, batch); + bcast(fds, batch); // Pause? if (do_pause) @@ -630,11 +609,10 @@ TxnBatch batch; Txn &txn = *batch.add_txn(); txn.set_seqno(-1); - bcastmsg(fds, batch); -#if bcastmsg == bcastmsg_any - msgs.push(shared_ptr<string>()); -#endif -#undef bcastmsg + bcast(fds, batch); + if (bcaster_thread != nullptr) { + msgs.push(make_pair(nullptr, shared_ptr<string>())); + } } /** @@ -642,15 +620,15 @@ * leader. */ void -process_txn(st_netfd_t leader, mii &map, const Txn &txn, int &seqno, - bool caught_up) +process_txn(mii &map, const Txn &txn, int &seqno, Response *res) { wal &wal = *g_wal; checkeq(txn.seqno(), seqno + 1); - Response res; - res.set_seqno(txn.seqno()); - res.set_caught_up(caught_up); seqno = txn.seqno(); + if (res != nullptr) { + res->set_seqno(seqno); + res->set_caught_up(true); + } for (int o = 0; o < txn.op_size(); ++o) { const Op &op = txn.op(o); const int key = op.key(); @@ -663,8 +641,10 @@ } switch (op.type()) { case Op::read: - if (it == map.end()) res.add_result(0); - else res.add_result(it->second); + if (res != nullptr) { + if (it == map.end()) res->add_result(0); + else res->add_result(it->second); + } break; case Op::write: if (use_wal) wal.write(key, op.value()); @@ -680,7 +660,6 @@ } } if (use_wal) wal.commit(); - if (caught_up && leader != nullptr) sendmsg(leader, res); } void @@ -760,7 +739,16 @@ // issued more since the Init message). int first_seqno = -1; + st_thread_t bcaster_thread = use_bcast_async ? + my_spawn(bcaster, "bcaster") : nullptr; + sendres_t sendmsg = use_bcast_async ? sendres_async : sendres_sync; + finally f(lambda () { + if (__ref(bcaster_thread) != nullptr) { + msgs.push(make_pair(nullptr, shared_ptr<string>())); + st_join(__ref(bcaster_thread)); + } + long long now = current_time_millis(); showtput("processed", now, __ref(start_time), __ref(seqno), __ref(init_seqno)); @@ -773,8 +761,6 @@ __ref(send_states).push(shared_ptr<Recovery>()); }); - class break_exception : public std::exception {}; - try { while (true) { TxnBatch batch; @@ -794,6 +780,7 @@ } } if (batch.txn_size() > 0) { + ResponseBatch resbatch; for (int t = 0; t < batch.txn_size(); ++t) { const Txn &txn = batch.txn(t); // Regular transaction. @@ -809,7 +796,8 @@ first_seqno == -1 ? init_seqno - 1 : first_seqno); caught_up = true; } - process_txn(leader, map, txn, seqno, true); + Response *res = resbatch.add_res(); + process_txn(map, txn, seqno, res); action = "processed"; } else { if (first_seqno == -1) @@ -829,6 +817,8 @@ st_sleep(0); } } + if (resbatch.res_size() > 0) + sendmsg(leader, resbatch); } else { // Empty (default) Txn means "generate a snapshot." // TODO make this faster @@ -876,39 +866,28 @@ start_time(current_time_millis()), recovery_start_time(caught_up ? -1 : start_time), recovery_end_time(-1), + start_seqno(seqno), recovery_start_seqno(caught_up ? -1 : seqno), recovery_end_seqno(-1), last_seqno(-1) {} void run() { - //start_time = current_time_millis(); - //recovery_start_time = caught_up ? -1 : start_time; - //recovery_end_time = -1; - //recovery_start_seqno = caught_up ? -1 : seqno; - //recovery_end_seqno = -1; - //last_seqno = -1; + finally f(boost::bind(&response_handler::cleanup, this)); - finally f(lambda () { - long long end_time = current_time_millis(); - if (__ref(recovery_end_time) > -1) { - cout << __ref(rid) << ": "; - showtput("after recovery, finished", end_time, __ref(recovery_end_time), - __ref(seqno), __ref(recovery_end_seqno)); - } - }); + st_reader reader(replica); while (true) { finally f(boost::bind(&response_handler::loop_cleanup, this)); - Response res; + ResponseBatch batch; // Read the message, but correctly respond to interrupts so that we can // cleanly exit (slightly tricky). if (last_seqno + 1 == seqno) { // Stop-interruptible in case we're already caught up. try { st_intr intr(stop_hub); - readmsg(replica, res); + readmsg(reader, batch); } catch (...) { // TODO: only catch interruptions // This check on seqnos is OK for termination since the seqno will // never grow again if stop_hub is set. @@ -925,34 +904,38 @@ // Only kill-interruptible because we want a clean termination (want // to get all the acks back). st_intr intr(kill_hub); - readmsg(replica, res); + readmsg(reader, batch); } - // Determine if this response handler's host (the only joiner) has finished - // catching up. If it has, then broadcast a signal so that all response - // handlers will know about this event. - if (!caught_up && res.caught_up()) { - long long now = current_time_millis(), timediff = now - start_time; - caught_up = true; - recover_signals.push(now); - cout << rid << ": "; - cout << "recovering node caught up; took " - << timediff << " ms" << endl; - // This will cause the program to exit eventually, but cleanly, such that - // the recovery time will be set first, before the eventual exit (which - // may not even happen in the current iteration). - if (stop_on_recovery) { - cout << "stopping on recovery" << endl; - stop_hub.set(); - } - } - if (res.seqno() % chkpt == 0) { - if (verbose) { + + for (int i = 0; i < batch.res_size(); ++i) { + const Response &res = batch.res(i); + // Determine if this response handler's host (the only joiner) has finished + // catching up. If it has, then broadcast a signal so that all response + // handlers will know about this event. + if (!caught_up && res.caught_up()) { + long long now = current_time_millis(), timediff = now - start_time; + caught_up = true; + recover_signals.push(now); cout << rid << ": "; - cout << "got response " << res.seqno() << " from " << replica << endl; + cout << "recovering node caught up; took " + << timediff << " ms" << endl; + // This will cause the program to exit eventually, but cleanly, such that + // the recovery time will be set first, before the eventual exit (which + // may not even happen in the current iteration). + if (stop_on_recovery) { + cout << "stopping on recovery" << endl; + stop_hub.set(); + } } - st_sleep(0); + if (res.seqno() % chkpt == 0) { + if (verbose) { + cout << rid << ": "; + cout << "got response " << res.seqno() << " from " << replica << endl; + } + st_sleep(0); + } + last_seqno = res.seqno(); } - last_seqno = res.seqno(); } } @@ -977,6 +960,17 @@ } } + void cleanup() { + long long end_time = current_time_millis(); + cout << rid << ": "; + showtput("handled", end_time, start_time, seqno, start_seqno); + if (recovery_end_time > -1) { + cout << rid << ": "; + showtput("after recovery, finished", end_time, recovery_end_time, + seqno, recovery_end_seqno); + } + } + st_netfd_t replica; const int &seqno; int rid; @@ -984,7 +978,7 @@ bool caught_up; st_channel<long long> ⊂ long long start_time, recovery_start_time, recovery_end_time; - int recovery_start_seqno, recovery_end_seqno, last_seqno; + int start_seqno, recovery_start_seqno, recovery_end_seqno, last_seqno; }; /** @@ -1101,6 +1095,23 @@ foreach (const replica_info &r, replicas) newreps.push(r); st_joining join_swallower(swallower); + // XXX + finally fin(lambda () { + cout << "LEADER SUMMARY" << endl; + cout << "- total updates = " << updates << endl; + cout << "- final DB state: seqno = " << __ref(seqno) << ", size = " + << g_map.size() << endl; + string fname = string("/tmp/ydb") + lexical_cast<string>(getpid()); + if (dump) { + cout << "- dumping to " << fname << endl; + ofstream of(fname.c_str()); + of << "seqno: " << __ref(seqno) << endl; + foreach (const pii &p, g_map) { + of << p.first << ": " << p.second << endl; + } + } + }); + try { // Start handling responses. st_thread_group handlers; @@ -1113,11 +1124,17 @@ // Accept the recovering node, and tell it about the online replicas. st_netfd_t joiner; - { + try { st_intr intr(stop_hub); joiner = checkerr(st_accept(listener, nullptr, nullptr, ST_UTIME_NO_TIMEOUT)); accept_joiner.waitset(); + } catch (std::exception &ex) { + string s(ex.what()); + if (s.find("Interrupted system call") == s.npos) + throw; + else + throw break_exception(); } Join join = readmsg<Join>(joiner); replicas.push_back(replica_info(joiner, static_cast<uint16_t>(join.port()))); @@ -1133,6 +1150,7 @@ handlers.insert(my_spawn(bind(handle_responses, joiner, ref(seqno), rid++, ref(recover_signals), false), "handle_responses_joiner")); + } catch (break_exception &ex) { } catch (std::exception &ex) { // TODO: maybe there's a cleaner way to do this final step before waiting with the join cerr_thread_ex(ex) << endl; @@ -1276,7 +1294,7 @@ int mid_seqno = seqno; while (!backlog.empty()) { shared_ptr<Txn> p = backlog.take(); - process_txn(leader, map, *p, seqno, false); + process_txn(map, *p, seqno, nullptr); if (p->seqno() % chkpt == 0) { if (verbose) cout << "processed txn " << p->seqno() << " off the backlog; " @@ -1390,8 +1408,12 @@ "inspection/diffing") ("suppress-txn-msgs", po::bool_switch(&suppress_txn_msgs), "suppress txn msgs") + ("fake-bcast", po::bool_switch(&fake_bcast), + "when using --bcast-async, don't actually perform the socket write") ("show-updates,U", po::bool_switch(&show_updates), "log operations that touch (update/read/delete) an existing key") + ("bcast-async", po::bool_switch(&use_bcast_async), + "broadcast messages asynchronously") ("count-updates,u",po::bool_switch(&count_updates), "count operations that touch (update/read/delete) an existing key") ("general-txns,g", po::bool_switch(&general_txns), Added: ydb/trunk/src/p2.cc =================================================================== --- ydb/trunk/src/p2.cc (rev 0) +++ ydb/trunk/src/p2.cc 2009-02-13 20:57:05 UTC (rev 1178) @@ -0,0 +1,353 @@ +#include <algorithm> +#include <boost/foreach.hpp> +#include <boost/program_options.hpp> +#include <commons/array.h> +#include <commons/nullptr.h> +#include <commons/rand.h> +#include <commons/sockets.h> +#include <commons/time.h> +#include <exception> +#include <iostream> +#include <set> +#include <string> +#include <sys/select.h> +#include <tr1/unordered_map> +#include <vector> +using namespace commons; +using namespace std; +using namespace tr1; +#define foreach BOOST_FOREACH +#define exception std::exception +#define STAT(t, c, x) \ + long long start_time = current_time_millis(); \ + x \ + ++c; \ + t += current_time_millis() - start_time; + +int bufsize = 1e8, chkpt = 1e4, batch_size = 1e4, thresh = 1e6; +bool verbose = true; +long long start = 0, seltime = 0, readtime = 0, writetime = 0; +int selcnt = 0, readcnt = 0, writecnt = 0; + +typedef array_view<char> arr; +arr mkarr(char *p = nullptr) { return arr(p, false); } + +typedef unordered_map<int, int> map_t; + +fd_set rfds, wfds, efds; + +class reader +{ +private: + array<char> buf_; + char *start_; + char *end_; + int fd_; +public: + reader(int fd) : buf_(bufsize), start_(buf_.get()), end_(start_), fd_(fd) {} + size_t rem() { return buf_.end() - end_; } + size_t amt() { return end_ - start_; } + int fd() { return fd_; } + arr read(size_t req) { + if (req <= amt()) { + arr a = mkarr(start_); + start_ += req; + return a; + } + // make sure we have enough space + check(req < buf_.size()); + // shift if necessary + if (req > rem()) { + memmove(buf_.get(), start_, amt()); + size_t diff = start_ - buf_.get(); + start_ -= diff; + end_ -= diff; + } + // read; advance end_ + STAT(readtime, readcnt, int res = ::read(fd(), end_, rem());) + int e = errno; + errno = 0; + //cout << "read res " << res << endl; + if (e == EAGAIN) return mkarr(); + if (res < 1) { close(fd()); throw exception(); } + end_ += res; + // if we still haven't read enough (requested), ret null + if (amt() < req) return mkarr(); + // advance start_ and return the newly consumed range + arr a = mkarr(start_); + start_ += req; + //cout << "offset " << a.get() - buf_.get() << endl; + return a; + } +}; + +class msg_reader +{ +private: + // len_ of 0 means we haven't read the prefix yet + uint32_t len_; + reader r_; +public: + msg_reader(int fd) : len_(0), r_(fd) {} + arr read(uint32_t &len) { + // read prefix + if (len_ == 0) { + arr prefix = r_.read(sizeof len_); + if (prefix.get() == nullptr) return mkarr(); + uint32_t tmp = *reinterpret_cast<uint32_t*>(prefix.get()); + //cout << "tmp " << tmp << endl; + len_ = tmp; + } + // read body + check(len_ > 0); + arr body = r_.read(len_); + if (body.get() != nullptr) { + len = len_; + len_ = 0; + } + return body; + } +}; + +class writer +{ +private: + array<char> buf_; + // start/end of unsent, prepared range + char *start_; + char *end_; + int fd_; +public: + writer(int fd) : buf_(bufsize), start_(buf_.get()), end_(start_), fd_(fd) {} + int fd() { return fd_; }; + fd_set &wfds() { return ::wfds; } + size_t amt() { return end_ - start_; } + size_t rem() { return buf_.end() - end_; } + + arr getbuf(uint32_t req) { + uint32_t tot = req + sizeof req; + check(tot > 0); + check(tot <= buf_.size()); + //cout << "getbuf req " << req << endl; + + // make space? + if (tot > rem()) { + if (tot > buf_.size() - amt()) return mkarr(); // not enough space + memmove(buf_.get(), start_, amt()); // shift + size_t diff = start_ - buf_.get(); + //if (diff > 0) cout << "shifting amt " << amt() << " diff " << diff << endl; + start_ -= diff; + end_ -= diff; + assert(rem() >= tot); + } + + // write length prefix + allocate/return body + *(reinterpret_cast<uint32_t*>(end_)) = req; + end_ += sizeof req; + arr p = mkarr(end_); + end_ += req; + return p; + } + + void write() { + // perform the write + STAT(writetime, writecnt, int res = ::write(fd(), start_, end_ - start_);) + if (res < 0) { close(fd()); throw exception(); } + //cout << "write res " << res << " amt " << amt() << endl; + // advance start_ + start_ += res; + // re-register for writes if we still have things to write + if (end_ - start_ > 0) { + FD_SET(fd(), &wfds()); + } + } +}; + +class replica_channel +{ +private: + int fd_; + writer w_; + char *buf_; + +public: + replica_channel(int fd) : fd_(fd), w_(fd) {} + int fd() { return fd_; } + + void writeint(uint32_t i) { + *(reinterpret_cast<uint32_t*>(buf_)) = i; + buf_ += sizeof i; + } + + void handle_write() { + //cout << "writing" << endl; + uint32_t npairs = batch_size; + uint32_t len = 2 * sizeof(uint32_t) * npairs; + arr a = w_.getbuf(len); + buf_ = a; + if (buf_ == nullptr) return; + for (uint32_t i = 0; i < npairs; ++i) { + writeint(1); + writeint(2); + } + w_.write(); + } + +}; + +class replica +{ +private: + int fd_; + msg_reader r_; + const char *buf_; + map_t map_; + int counter_; + int readcount_; + long long start_; + +public: + replica(int fd) : fd_(fd), r_(fd), counter_(0), readcount_(0), start_(current_time_millis()) {} + int fd() { return fd_; } + + uint32_t readint() { + uint32_t i = *reinterpret_cast<const uint32_t*>(buf_); + buf_ += sizeof i; + return i; + } + + void handle_read() { + ++readcount_; + while (true) { + uint32_t len = 0; + arr a = r_.read(len); + buf_ = a.get(); + if (buf_ == nullptr) break; + uint32_t npairs = len / sizeof(uint32_t) / 2; + check(2 * sizeof(uint32_t) * npairs == len); // should be whole count + for (uint32_t i = 0; i < npairs; ++i) { + uint32_t k = readint(); + uint32_t v = readint(); + map_[k] = v; + ++counter_; + if (counter_ % chkpt == 0) { + //if (verbose) cout << current_time_millis() << ": count " << counter_ << endl; + if (counter_ > thresh) { + long long end = current_time_millis(); + double rate = double(counter_) / (end - start_) * 1000; + cout << "rate " << rate << " pairs/s " << rate / 5 << " tps; readcount " << readcount_ << endl; + throw exception(); + } + } + } + } + } + +}; + +class mainer +{ +private: + vector<replica_channel*> rs; + +public: + int main(int argc, char **argv) { + bool is_leader; + string host; + + namespace po = boost::program_options; + po::options_description desc("Allowed options"); + desc.add_options() + ("help,h", "show this help message") + ("leader,l", po::bool_switch(&is_leader), "leader") + ("verbose,v",po::bool_switch(&verbose), "verbose") + ("host,H", + po::value<string>(&host)->default_value(string("localhost")), + "hostname or address of the leader") + ("batch,b", po::value<int>(&batch_size)->default_value(1e4), "batch size"); + po::variables_map vm; + try { + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + if (vm.count("help")) { + cout << desc << endl; + return 0; + } + } catch (exception &ex) { + cerr << ex.what() << endl << endl << desc << endl; + return 1; + } + + struct timeval tv; + tv.tv_sec = 5; + tv.tv_usec = 0; + FD_ZERO(&rfds); + FD_ZERO(&wfds); + FD_ZERO(&efds); + + int srv = is_leader ? tcp_listen(7654, true) : -1; + int cli = is_leader ? -1 : tcp_connect(host.c_str(), 7654); + if (cli >= 0) checknnegerr(fcntl(cli, F_SETFL, O_NONBLOCK | fcntl(cli, F_GETFL, 0))); + int nfds = max(srv, cli); + if (srv >= 0) FD_SET(srv, &rfds); + if (cli >= 0) FD_SET(cli, &rfds); + replica rep(cli); + if (cli >= 0) { start = current_time_millis(); seltime = 0; } + + while (true) { + //sleep(1); + //cout << endl; + STAT(seltime, selcnt, checknnegerr(select(nfds + 1, &rfds, &wfds, &efds, nullptr));) + //cout << "select waited " << diff << endl; + + // accept new connections + if (srv >= 0 && FD_ISSET(srv, &rfds)) { + if (start == 0) { start = current_time_millis(); seltime = 0; } + cout << "accept" << endl; + int r = checknnegerr(accept(srv, nullptr, nullptr)); + cout << fcntl(r, F_GETFL, 0) << ' '; + checknnegerr(fcntl(r, F_SETFL, O_NONBLOCK | fcntl(r, F_GETFL, 0))); + cout << fcntl(r, F_GETFL, 0) << endl; + rs.push_back(new replica_channel(r)); + nfds = max(nfds, r); + FD_SET(r, &wfds); + } + + if (cli >= 0 && FD_ISSET(cli, &rfds)) { + rep.handle_read(); + } + + // handle ready events + foreach (replica_channel *p, rs) { + replica_channel &r = *p; + if (FD_ISSET(r.fd(), &rfds)) { + //r.handle_read(); + } + if (FD_ISSET(r.fd(), &wfds)) { + r.handle_write(); + } + FD_SET(r.fd(), &rfds); + } + } + + return 0; + } +}; + +void dump() { + long long tot = current_time_millis() - start; + cout << "readtime " << readtime << " writetime " << writetime << " seltime " << seltime << " tot " << tot << endl; + cout << "readcnt " << readcnt << " writecnt " << writecnt << " selcnt " << selcnt << endl; +} + +int main(int argc, char **argv) { + int ret; + atexit(dump); + try { + ret = mainer().main(argc, argv); + } catch (...) { + ret = 1; + } + return ret; +} Modified: ydb/trunk/src/ydb.proto =================================================================== --- ydb/trunk/src/ydb.proto 2009-02-13 20:54:46 UTC (rev 1177) +++ ydb/trunk/src/ydb.proto 2009-02-13 20:57:05 UTC (rev 1178) @@ -78,4 +78,8 @@ message TxnBatch { repeated Txn txn = 1; -} \ No newline at end of file +} + +message ResponseBatch { + repeated Response res = 1; +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-15 03:20:54
|
Revision: 1181 http://assorted.svn.sourceforge.net/assorted/?rev=1181&view=rev Author: yangzhang Date: 2009-02-15 03:20:46 +0000 (Sun, 15 Feb 2009) Log Message: ----------- - rename array_view to managed_array - tried adding google dense_hash_map - added REUSE_SER macro to control whether serialized stuff is reused - refactored serialization/network code: added ser(), st_timed_write() - added --force-ser - added --fake-exec - changed wal to use same txn serialization as net rather than its own in-line serialization - added st_reader for txnbatches Modified Paths: -------------- ydb/trunk/src/main.lzz.clamp ydb/trunk/src/p2.cc Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-02-13 20:57:48 UTC (rev 1180) +++ ydb/trunk/src/main.lzz.clamp 2009-02-15 03:20:46 UTC (rev 1181) @@ -7,7 +7,6 @@ #include <boost/range/iterator_range.hpp> #include <boost/scoped_array.hpp> #include <boost/shared_ptr.hpp> -//#include <boost/thread.hpp> #include <commons/nullptr.h> #include <commons/rand.h> #include <commons/st/st.h> @@ -17,6 +16,7 @@ #include <cstring> // strsignal #include <iostream> #include <fstream> // ofstream +#include <google/dense_hash_map> #include <gtest/gtest.h> #include <malloc.h> #include <map> @@ -31,9 +31,11 @@ #define foreach BOOST_FOREACH #define shared_ptr boost::shared_ptr #define ref boost::ref +#define REUSE_SER using namespace boost; using namespace boost::archive; using namespace commons; +using namespace google; using namespace std; using namespace testing; using namespace tr1; @@ -46,8 +48,11 @@ #end #define map_t unordered_map +//#define map_t map +//#define map_t dense_hash_map typedef pair<int, int> pii; typedef map_t<int, int> mii; +typedef string ser_t; // Configuration. st_utime_t timeout; @@ -57,7 +62,7 @@ bool verbose, yield_during_build_up, yield_during_catch_up, dump, show_updates, count_updates, stop_on_recovery, general_txns, profile_threads, debug_threads, multirecover, disk, debug_memory, use_wal, - suppress_txn_msgs, use_bcast_async, fake_bcast; + suppress_txn_msgs, use_bcast_async, fake_bcast, force_ser, fake_exec; long long timelim, read_thresh, write_thresh; // Control. @@ -247,12 +252,59 @@ st_channel<pair<st_netfd_t, shared_ptr<string> > > msgs; /** + * Serialization. + * + * TODO: experiment with which method is the fastest: using a string as shown + * here or computing the bytesize then allocating (or grabbing/reserving) the + * array. + */ +template<typename T> +void +ser(string &s, const T &msg) +{ + // Serialize message to a buffer. + uint32_t len; + s.append(sizeof len, '\0'); + check(msg.AppendToString(&s)); + + // Warn if the message is large. + if (s.size() > 1000000) + cout << "serializing large message of " << s.size() << " bytes" << endl; + + // Prefix the message with a four-byte length. + len = htonl(static_cast<uint32_t>(s.size() - sizeof len)); + char *plen = reinterpret_cast<char*>(&len); + copy(plen, plen + sizeof len, s.begin()); +} + +/** + * Helper for getting the cached ByteSize of a message. + */ +template <typename T> +int +pb_size(const T &msg) { + int len = msg.GetCachedSize(); + return len == 0 ? msg.ByteSize() : len; +} + +/** + * Serialization. + */ +template<typename T> +void +ser(ostream &s, const T &msg) +{ + uint32_t len = htonl(pb_size(msg)); + s.write(reinterpret_cast<const char*>(&len), sizeof len); + check(msg.SerializeToOstream(&s)); +} + +/** * The worker that performs the actual broadcasting. */ void bcaster() { - int counter = 0; while (!kill_hub) { pair<st_netfd_t, shared_ptr<string> > pr; { @@ -264,32 +316,8 @@ if (p.get() == nullptr) break; string &s = *p.get(); - int dstno = 0; - // XXX - // foreach (st_netfd_t dst, *gdsts) { - if (!fake_bcast) { - long long before_write = -1; - if (write_thresh > 0) { - before_write = current_time_millis(); - } - - checksize(st_write(dst, s.data(), s.size(), ST_UTIME_NO_TIMEOUT), - s.size()); - - if (write_thresh > 0) { - long long write_time = current_time_millis() - before_write; - if (write_time > write_thresh) { - cout << "thread " << threadname() - << ": write #" << counter - << " of size " << s.size() - //<< " bytes to dst #" << dstno - << " bytes" - << " took " << write_time << " ms" << endl; - } - } - ++dstno; - } - ++counter; + if (!fake_bcast) + st_timed_write(dst, s.data(), s.size()); } } @@ -298,24 +326,34 @@ */ template<typename T> void -bcastmsg_async(const vector<st_netfd_t> &dsts, const T & msg) +bcastmsg_async(const vector<st_netfd_t> &dsts, const T &msg) { - // Serialize message to a buffer. - uint32_t len; - shared_ptr<string> p(new string(sizeof len, '\0')); - string &s = *p.get(); - check(msg.AppendToString(&s)); + shared_ptr<string> p(new string); + ser(*p.get(), msg); + foreach (st_netfd_t dst, dsts) msgs.push(make_pair(dst, p)); +} - if (s.size() > 1000000) - cout << "sending large message to " << dsts.size() << " dsts, size = " - << s.size() << " bytes" << endl; +/** + * Perform an st_write but warn if it took over write_thresh ms. + */ +void +st_timed_write(st_netfd_t dst, const void *buf, size_t len) +{ + long long before_write = -1; + if (write_thresh > 0) { + before_write = current_time_millis(); + } - // Prefix the message with a four-byte length. - len = htonl(static_cast<uint32_t>(s.size() - sizeof len)); - char *plen = reinterpret_cast<char*>(&len); - copy(plen, plen + sizeof len, s.begin()); + checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), + static_cast<ssize_t>(len)); - foreach (st_netfd_t dst, dsts) msgs.push(make_pair(dst, p)); + if (write_thresh > 0) { + long long write_time = current_time_millis() - before_write; + if (write_time > write_thresh) { + cout << "thread " << threadname() << " write of " << len + << " bytes took " << write_time << " ms" << endl; + } + } } /** @@ -325,48 +363,12 @@ void bcastmsg(const vector<st_netfd_t> &dsts, const T & msg) { - // Serialize message to a buffer. - string s; - check(msg.SerializeToString(&s)); - const char *buf = s.c_str(); - - if (s.size() > 1000000) - cout << "sending large message to " << dsts.size() << " dsts, size = " - << s.size() << " bytes" << endl; - - // Prefix the message with a four-byte length. - uint32_t len = htonl(static_cast<uint32_t>(s.size())); - - // Broadcast the length-prefixed message to replicas. - int dstno = 0; - foreach (st_netfd_t dst, dsts) { - size_t resid = sizeof len; -#define checksize(x,y) checkeqnneg(x, static_cast<ssize_t>(y)) - int res = st_write_resid(dst, static_cast<void*>(&len), &resid, timeout); - long long before_write = -1; - if (write_thresh > 0) { - before_write = current_time_millis(); + ser_t s; + ser(s, msg); + if (!fake_bcast) { + foreach (st_netfd_t dst, dsts) { + st_timed_write(dst, s.data(), s.size()); } - if (res == -1 && errno == ETIME) { - checksize(st_write(dst, - reinterpret_cast<char*>(&len) + sizeof len - resid, - resid, - ST_UTIME_NO_TIMEOUT), - resid); - } else { - check0x(res); - } - if (write_thresh > 0) { - long long write_time = current_time_millis() - before_write; - if (write_time > write_thresh) { - cout << "thread " << threadname() - << ": write to dst #" << dstno - << " took " << write_time << " ms" << endl; - } - } - checksize(st_write(dst, buf, s.size(), ST_UTIME_NO_TIMEOUT), - s.size()); - ++dstno; } } @@ -463,7 +465,7 @@ void readmsg(st_reader &src, T & msg) { - array_view<char> a = src.read(sizeof(uint32_t)); + managed_array<char> a = src.read(sizeof(uint32_t)); uint32_t len = ntohl(*reinterpret_cast<const uint32_t*>(a.get())); check(msg.ParseFromArray(src.read(len), len)); } @@ -475,6 +477,9 @@ { public: wal() : of("wal"), out(of) {} + template <typename T> + void log(const T &msg) { ser(of, msg); } +#if 0 void del(int key) { int op = op_del; // TODO: is this really necessary? out & op & key; @@ -487,6 +492,7 @@ int op = op_commit; out & op; } +#endif private: enum { op_del, op_write, op_commit }; ofstream of; @@ -527,15 +533,24 @@ 0); }); + TxnBatch batch; + for (int t = 0; t < batch_size; ++t) batch.add_txn(); + while (!stop_hub) { +#ifdef REUSE_SER + batch.Clear(); +#else + TxnBatch batch; +#endif + // Did we get a new member? If so, notify an arbitrary member (the first // one) to prepare to send recovery information (by sending an // empty/default Txn). if (!newreps.empty() && seqno > 0) { if (multirecover) { - bcast(fds, TxnBatch()); + bcast(fds, batch); } else { - sendmsg(fds[0], TxnBatch()); + sendmsg(fds[0], batch); } } // Bring in any new members. @@ -544,7 +559,6 @@ } // Generate some random transactions. - TxnBatch batch; for (int t = 0; t < batch_size; ++t) { Txn &txn = *batch.add_txn(); txn.set_seqno(seqno); @@ -597,8 +611,14 @@ } // Broadcast. - if (!fds.empty() && !suppress_txn_msgs) + if (!fds.empty() && !suppress_txn_msgs) { bcast(fds, batch); + } else if (use_wal) { + g_wal->log(batch); + } else if (force_ser) { + string s; + ser(s, batch); + } // Pause? if (do_pause) @@ -606,7 +626,7 @@ } // This means "The End." - TxnBatch batch; + batch.Clear(); Txn &txn = *batch.add_txn(); txn.set_seqno(-1); bcast(fds, batch); @@ -622,44 +642,46 @@ void process_txn(mii &map, const Txn &txn, int &seqno, Response *res) { - wal &wal = *g_wal; + //wal &wal = *g_wal; checkeq(txn.seqno(), seqno + 1); seqno = txn.seqno(); if (res != nullptr) { res->set_seqno(seqno); res->set_caught_up(true); } - for (int o = 0; o < txn.op_size(); ++o) { - const Op &op = txn.op(o); - const int key = op.key(); - mii::iterator it = map.find(key); - if (show_updates || count_updates) { - if (it != map.end()) { - if (show_updates) cout << "existing key: " << key << endl; - if (count_updates) ++updates; - } - } - switch (op.type()) { - case Op::read: - if (res != nullptr) { - if (it == map.end()) res->add_result(0); - else res->add_result(it->second); - } - break; - case Op::write: - if (use_wal) wal.write(key, op.value()); - if (it == map.end()) map[key] = op.value(); - else it->second = op.value(); - break; - case Op::del: + if (!fake_exec) { + for (int o = 0; o < txn.op_size(); ++o) { + const Op &op = txn.op(o); + const int key = op.key(); + mii::iterator it = map.find(key); + if (show_updates || count_updates) { if (it != map.end()) { - if (use_wal) wal.del(key); - map.erase(it); + if (show_updates) cout << "existing key: " << key << endl; + if (count_updates) ++updates; } - break; + } + switch (op.type()) { + case Op::read: + if (res != nullptr) { + if (it == map.end()) res->add_result(0); + else res->add_result(it->second); + } + break; + case Op::write: + //if (use_wal) wal.write(key, op.value()); + if (it == map.end()) map[key] = op.value(); + else it->second = op.value(); + break; + case Op::del: + if (it != map.end()) { + //if (use_wal) wal.del(key); + map.erase(it); + } + break; + } } } - if (use_wal) wal.commit(); + //if (use_wal) wal.commit(); } void @@ -761,16 +783,19 @@ __ref(send_states).push(shared_ptr<Recovery>()); }); + st_reader reader(leader); + try { + TxnBatch batch; + ResponseBatch resbatch; while (true) { - TxnBatch batch; long long before_read = -1; if (read_thresh > 0) { before_read = current_time_millis(); } { st_intr intr(stop_hub); - readmsg(leader, batch); + readmsg(reader, batch); } if (read_thresh > 0) { long long read_time = current_time_millis() - before_read; @@ -780,7 +805,11 @@ } } if (batch.txn_size() > 0) { +#ifdef REUSE_SER + resbatch.Clear(); +#else ResponseBatch resbatch; +#endif for (int t = 0; t < batch.txn_size(); ++t) { const Txn &txn = batch.txn(t); // Regular transaction. @@ -876,11 +905,11 @@ finally f(boost::bind(&response_handler::cleanup, this)); st_reader reader(replica); + ResponseBatch batch; while (true) { finally f(boost::bind(&response_handler::loop_cleanup, this)); - ResponseBatch batch; // Read the message, but correctly respond to interrupts so that we can // cleanly exit (slightly tricky). if (last_seqno + 1 == seqno) { @@ -1095,7 +1124,6 @@ foreach (const replica_info &r, replicas) newreps.push(r); st_joining join_swallower(swallower); - // XXX finally fin(lambda () { cout << "LEADER SUMMARY" << endl; cout << "- total updates = " << updates << endl; @@ -1408,6 +1436,8 @@ "inspection/diffing") ("suppress-txn-msgs", po::bool_switch(&suppress_txn_msgs), "suppress txn msgs") + ("fake-exec", po::bool_switch(&fake_exec), + "don't actually execute txns") ("fake-bcast", po::bool_switch(&fake_bcast), "when using --bcast-async, don't actually perform the socket write") ("show-updates,U", po::bool_switch(&show_updates), @@ -1420,6 +1450,8 @@ "issue read and delete transactions as well as the default of (only) insertion/update transactions (for leader only)") ("wal", po::bool_switch(&use_wal), "enable ARIES write-ahead logging") + ("force-ser", po::bool_switch(&force_ser), + "force issue_txns to serialize its Txns") ("leader,l", po::bool_switch(&is_leader), "run the leader (run replica by default)") ("exit-on-recovery,x", po::bool_switch(&stop_on_recovery), Modified: ydb/trunk/src/p2.cc =================================================================== --- ydb/trunk/src/p2.cc 2009-02-13 20:57:48 UTC (rev 1180) +++ ydb/trunk/src/p2.cc 2009-02-15 03:20:46 UTC (rev 1181) @@ -29,7 +29,7 @@ long long start = 0, seltime = 0, readtime = 0, writetime = 0; int selcnt = 0, readcnt = 0, writecnt = 0; -typedef array_view<char> arr; +typedef managed_array<char> arr; arr mkarr(char *p = nullptr) { return arr(p, false); } typedef unordered_map<int, int> map_t; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-16 21:05:31
|
Revision: 1185 http://assorted.svn.sourceforge.net/assorted/?rev=1185&view=rev Author: yangzhang Date: 2009-02-16 21:05:26 +0000 (Mon, 16 Feb 2009) Log Message: ----------- - moved protobufs to ydb::pb - added ser Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/ydb.proto Added Paths: ----------- ydb/trunk/src/ser.cc ydb/trunk/src/ser.h Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-02-15 03:22:34 UTC (rev 1184) +++ ydb/trunk/src/Makefile 2009-02-16 21:05:26 UTC (rev 1185) @@ -101,3 +101,6 @@ p2: p2.cc $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) $(OUTPUT_OPTION) + +ser: ser.cc ser.h ydb.o + $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) $(OUTPUT_OPTION) Added: ydb/trunk/src/ser.cc =================================================================== --- ydb/trunk/src/ser.cc (rev 0) +++ ydb/trunk/src/ser.cc 2009-02-16 21:05:26 UTC (rev 1185) @@ -0,0 +1,86 @@ +#include "ser.h" + +//#define USE_PB +using ydb::msg::reader; +using ydb::msg::writer; +using ydb::msg::stream; +using namespace commons; +using namespace std; +#ifdef USE_PB +using namespace ydb::pb; +#else +using namespace ydb::msg; +#endif + +#ifdef USE_PB +#define PBSWITCH(a,b) a +#define PBONLY(x) x +#define NPBONLY(x) +#else +#define PBSWITCH(a,b) b +#define PBONLY(x) +#define NPBONLY(x) x +#endif + +//template<typename TxnBatch, typename Txn, typename Op> +void run() +{ + array<char> a(1e8); + writer w(a); + reader r(a); + stream s(r,w); + string str; + const int nreps = 2; + + { + TxnBatch batch NPBONLY((s)); + for (int i = 0; i < nreps; ++i) { + w.mark(); + batch.Clear(); + NPBONLY(batch.start_txn()); + for (int t = 0; t < 2; ++t) { + Txn &txn = *batch.add_txn(); + txn.set_seqno(t + 5); + NPBONLY(txn.start_op()); + for (int o = 0; o < 2; ++o) { + Op &op = *txn.add_op(); + op.set_type (Op::del); + op.set_key (3 * (o+1)); + op.set_value(4 * (o+1)); + } + NPBONLY(txn.fin_op()); + } + NPBONLY(batch.fin_txn()); + cout << w.pos() << '/' << w.size() << endl; + PBONLY(check(batch.SerializeToString(&str))); + } + } + w.flush(); + + const bool show = true; + { + TxnBatch batch NPBONLY((s)); + for (int i = 0; i < nreps; ++i) { + batch.Clear(); + PBONLY(check(batch.ParseFromString(str))); + if (show) cout << "ntxn " << batch.txn_size() << endl; + for (int t = 0; t < batch.txn_size(); ++t) { + const Txn &txn = batch.txn(t); + if (show) cout << "txn seqno " << txn.seqno() << endl; + for (int o = 0; o < txn.op_size(); ++o) { + const Op &op = txn.op(o); + int otype = op.type(); + int okey = op.key(); + int oval = op.value(); + if (show) cout << "op type " << otype << " key " << okey << " value " << oval << endl; + } + } + } + } +} + +int main() +{ + run(); + return 0; +} Added: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h (rev 0) +++ ydb/trunk/src/ser.h 2009-02-16 21:05:26 UTC (rev 1185) @@ -0,0 +1,158 @@ +#ifndef YDB_MSG_H +#define YDB_MSG_H + +#include <commons/array.h> +#include <iomanip> +#include <iostream> +#include "ydb.pb.h" + +#define BEGIN_NAMESPACE(ns) namespace ns { +#define END_NAMESPACE } + +BEGIN_NAMESPACE(ydb) +BEGIN_NAMESPACE(msg) + +using namespace commons; +using namespace std; + +short unset = -1; + +using ydb::pb::Op_OpType; + +class writer +{ + private: + array<char> a_; + char *p_; + char *mark_; + array<char> &out_; + template<typename T> + void write_(T x, char *p) { + reserve(sizeof x, p); + *reinterpret_cast<T*>(p) = x; + } + public: + writer(array<char> &out) : a_(90), p_(a_.get()), mark_(p_), out_(out) {} + array<char> &buf() { return a_; } + size_t pos() { return p_ - mark_; } + size_t size() { return a_.size(); } + void mark() { mark_ = p_; /*skip<int>();*/ } + void flush() { memcpy(out_.get(), a_.get(), mark_ - a_.get()); } + void reserve(int n) { reserve(n, p_); } + void reserve(int n, char *p) { + if (p + n > a_.end()) { + flush(); + memmove(a_.get(), mark_, a_.end() - mark_); + size_t diff = mark_ - a_.get(); + mark_ -= diff; + p_ -= diff; + p -= diff; + } + } + void show() { + cout << (void*) p_; + for (size_t i = 0; i < a_.size(); ++i) + cout << " " << hex << setfill('0') << setw(2) << int(mark_[i]); + cout << endl; + cout << (void*) p_; + for (size_t i = 0; i < a_.size(); ++i) + cout << " " << setfill(' ') << setw(2) << (i == pos() ? "^^" : ""); + cout << endl; + } + template<typename T> void skip() { reserve(sizeof(T)); p_ += sizeof(T); } + template<typename T> void write(T x) { write_(x, p_); p_ += sizeof x; } + template<typename T> void write(T x, size_t off) { write_(x, mark_ + off); } +}; + +class reader +{ + private: + array<char> &a_; + const char *p_; + public: + reader(array<char> &a) : a_(a), p_(a.get()) {} + template<typename T> T read() { + T x = *reinterpret_cast<const T*>(p_); + p_ += sizeof(T); + return x; + } + void jump(ssize_t off) { p_ += off; } +}; + +class stream +{ + private: + reader &r_; + writer &w_; + public: + stream(reader &r, writer &w) : r_(r), w_(w) {} + reader &get_reader() { return r_; } + writer &get_writer() { return w_; } +}; + +class Op +{ + private: + stream &s_; + reader &r_; + writer &w_; + public: + static const Op_OpType read = ydb::pb::Op_OpType_read; + static const Op_OpType write = ydb::pb::Op_OpType_write; + static const Op_OpType del = ydb::pb::Op_OpType_del; + Op(stream &s) : s_(s), r_(s.get_reader()), w_(s.get_writer()) {} + void set_type (char x) { w_.write(x); } + void set_key (int x) { w_.write(x); } + void set_value(int x) { w_.write(x); } + char type() const { return r_.read<char>(); } + int key() const { return r_.read<int>(); } + int value() const { return r_.read<int>(); } +}; + +class Txn +{ + private: + stream &s_; + reader &r_; + writer &w_; + size_t off_; + Op op_; + mutable short nop_; + mutable int seqno_; + public: + Txn(stream &s) : + s_(s), r_(s.get_reader()), w_(s.get_writer()), off_(w_.pos()), op_(s), + nop_(unset) {} + void Clear() { w_.reserve(0*50); nop_ = unset; off_ = w_.pos(); } + void set_seqno(int x) { w_.write(x); } + int seqno() const { return r_.read<int>(); } + void start_op() { w_.skip<typeof(nop_)>(); } + Op *add_op() { if (nop_ == unset) nop_ = 0; ++nop_; return &op_; } + void fin_op() { w_.write(nop_, off_ + sizeof(int)); } + int op_size() const { if (nop_ == unset) nop_ = r_.read<typeof(nop_)>(); return nop_; } + const Op &op(int o) const { return op_; } +}; + +class TxnBatch +{ + private: + stream &s_; + reader &r_; + writer &w_; + size_t off_; + mutable Txn txn_; + mutable short ntxn_; + public: + TxnBatch(stream &s) : s_(s), r_(s.get_reader()), w_(s.get_writer()), off_(w_.pos()), txn_(s), ntxn_(unset) {} + void Clear() { w_.reserve(0*100); txn_.Clear(); ntxn_ = unset; off_ = w_.pos(); } + void start_txn() { w_.skip<typeof(ntxn_)>(); } + Txn *add_txn() { if (ntxn_ == unset) ntxn_ = 0; ++ntxn_; txn_.Clear(); return &txn_; } + void fin_txn() { w_.write(ntxn_, off_); } + int txn_size() const { if (ntxn_ == unset) ntxn_ = r_.read<typeof(ntxn_)>(); return ntxn_; } + const Txn &txn(int t) const { txn_.Clear(); return txn_; } +}; + +END_NAMESPACE +END_NAMESPACE + +#endif Modified: ydb/trunk/src/ydb.proto =================================================================== --- ydb/trunk/src/ydb.proto 2009-02-15 03:22:34 UTC (rev 1184) +++ ydb/trunk/src/ydb.proto 2009-02-16 21:05:26 UTC (rev 1185) @@ -1,3 +1,5 @@ +package ydb.pb; + option optimize_for = SPEED; // A socket address (host:port). This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-17 04:36:21
|
Revision: 1187 http://assorted.svn.sourceforge.net/assorted/?rev=1187&view=rev Author: yangzhang Date: 2009-02-17 04:36:14 +0000 (Tue, 17 Feb 2009) Log Message: ----------- - added st-based separate client/server to ser demo - added seqno-caching - using st_reader as reader - added writer::flush() Modified Paths: -------------- ydb/trunk/src/ser.cc ydb/trunk/src/ser.h Modified: ydb/trunk/src/ser.cc =================================================================== --- ydb/trunk/src/ser.cc 2009-02-17 04:33:52 UTC (rev 1186) +++ ydb/trunk/src/ser.cc 2009-02-17 04:36:14 UTC (rev 1187) @@ -1,4 +1,5 @@ #include "ser.h" +#include <commons/st/st.h> //#define USE_PB using ydb::msg::reader; @@ -22,65 +23,85 @@ #define NPBONLY(x) x #endif -//template<typename TxnBatch, typename Txn, typename Op> -void run() +const int nreps = 2; + +void producer(st_netfd_t dst) { - array<char> a(1e8); - writer w(a); - reader r(a); + writer w(dst); + reader r(dst); stream s(r,w); string str; - const int nreps = 2; - - { - TxnBatch batch NPBONLY((s)); - for (int i = 0; i < nreps; ++i) { - w.mark(); - batch.Clear(); - NPBONLY(batch.start_txn()); - for (int t = 0; t < 2; ++t) { - Txn &txn = *batch.add_txn(); - txn.set_seqno(t + 5); - NPBONLY(txn.start_op()); - for (int o = 0; o < 2; ++o) { - Op &op = *txn.add_op(); - op.set_type (Op::del); - op.set_key (3 * (o+1)); - op.set_value(4 * (o+1)); - } - NPBONLY(txn.fin_op()); + const bool show = true; + TxnBatch batch NPBONLY((s)); + for (int i = 0; i < nreps; ++i) { + w.mark(); + batch.Clear(); + NPBONLY(batch.start_txn()); + for (int t = 0; t < 2; ++t) { + Txn &txn = *batch.add_txn(); + txn.set_seqno(t + 5); + NPBONLY(txn.start_op()); + for (int o = 0; o < 2; ++o) { + Op &op = *txn.add_op(); + op.set_type (Op::del); + op.set_key (3 * (o+1)); + op.set_value(4 * (o+1)); } - NPBONLY(batch.fin_txn()); - cout << w.pos() << '/' << w.size() << endl; - PBONLY(check(batch.SerializeToString(&str))); + NPBONLY(txn.fin_op()); } + NPBONLY(batch.fin_txn()); + if (show) cout << w.pos() << '/' << w.size() << endl; + PBONLY(check(batch.SerializeToString(&str))); } + batch.Clear(); + NPBONLY(batch.start_txn()); + NPBONLY(batch.fin_txn()); + w.mark(); w.flush(); +} +void consumer(st_netfd_t src) +{ + array<char> a(1e8); + writer w(src); + reader r(src); + stream s(r,w); const bool show = true; - { - TxnBatch batch NPBONLY((s)); - for (int i = 0; i < nreps; ++i) { - batch.Clear(); - PBONLY(check(batch.ParseFromString(str))); - if (show) cout << "ntxn " << batch.txn_size() << endl; - for (int t = 0; t < batch.txn_size(); ++t) { - const Txn &txn = batch.txn(t); - if (show) cout << "txn seqno " << txn.seqno() << endl; - for (int o = 0; o < txn.op_size(); ++o) { - const Op &op = txn.op(o); - int otype = op.type(); - int okey = op.key(); - int oval = op.value(); - if (show) cout << "op type " << otype << " key " << okey << " value " << oval << endl; - } + TxnBatch batch NPBONLY((s)); + for (int i = 0; i < nreps; ++i) { + batch.Clear(); + PBONLY(check(batch.ParseFromString(str))); + if (show) cout << "ntxn " << batch.txn_size() << endl; + //if (batch.txn_size() == 0) break; + for (int t = 0; t < batch.txn_size(); ++t) { + const Txn &txn = batch.txn(t); + if (show) cout << "txn seqno " << txn.seqno() << " " << txn.seqno() << endl; + for (int o = 0; o < txn.op_size(); ++o) { + const Op &op = txn.op(o); + int otype = op.type(); + int okey = op.key(); + int oval = op.value(); + if (show) + cout << "op type " << otype + << " key " << okey + << " value " << oval << endl; } } } } -int main() +int main(int argc, char **argv) { - run(); + st_init(); + bool is_leader = argc == 1; + if (is_leader) { + st_netfd_t listener = st_tcp_listen(7654); + st_netfd_t dst = checkerr(st_accept(listener, nullptr, nullptr, + ST_UTIME_NO_TIMEOUT)); + producer(dst); + } else { + st_netfd_t src = st_tcp_connect(argv[1], 7654, ST_UTIME_NO_TIMEOUT); + consumer(src); + } return 0; } Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-02-17 04:33:52 UTC (rev 1186) +++ ydb/trunk/src/ser.h 2009-02-17 04:36:14 UTC (rev 1187) @@ -2,6 +2,7 @@ #define YDB_MSG_H #include <commons/array.h> +#include <commons/st/st.h> #include <iomanip> #include <iostream> #include "ydb.pb.h" @@ -15,7 +16,7 @@ using namespace commons; using namespace std; -short unset = -1; +short unset = -7654; using ydb::pb::Op_OpType; @@ -25,30 +26,37 @@ array<char> a_; char *p_; char *mark_; - array<char> &out_; + char *unsent_; + st_netfd_t out_; template<typename T> void write_(T x, char *p) { reserve(sizeof x, p); *reinterpret_cast<T*>(p) = x; } public: - writer(array<char> &out) : a_(90), p_(a_.get()), mark_(p_), out_(out) {} + writer(st_netfd_t out) : + a_(90), p_(a_.get()), mark_(p_), unsent_(a_.get()), out_(out) {} array<char> &buf() { return a_; } size_t pos() { return p_ - mark_; } size_t size() { return a_.size(); } - void mark() { mark_ = p_; /*skip<int>();*/ } - void flush() { memcpy(out_.get(), a_.get(), mark_ - a_.get()); } + void mark() { mark_ = p_; } void reserve(int n) { reserve(n, p_); } void reserve(int n, char *p) { if (p + n > a_.end()) { flush(); - memmove(a_.get(), mark_, a_.end() - mark_); size_t diff = mark_ - a_.get(); - mark_ -= diff; + memmove(a_.get(), mark_, diff); + unsent_ = mark_ = a_.get(); p_ -= diff; p -= diff; } } + void flush() { + if (mark_ - unsent_ > 0) { + st_write(out_, unsent_, mark_ - unsent_, ST_UTIME_NO_TIMEOUT); + unsent_ = mark_; + } + } void show() { cout << (void*) p_; for (size_t i = 0; i < a_.size(); ++i) @@ -64,20 +72,7 @@ template<typename T> void write(T x, size_t off) { write_(x, mark_ + off); } }; -class reader -{ - private: - array<char> &a_; - const char *p_; - public: - reader(array<char> &a) : a_(a), p_(a.get()) {} - template<typename T> T read() { - T x = *reinterpret_cast<const T*>(p_); - p_ += sizeof(T); - return x; - } - void jump(ssize_t off) { p_ += off; } -}; +typedef st_reader reader; class stream { @@ -122,10 +117,10 @@ public: Txn(stream &s) : s_(s), r_(s.get_reader()), w_(s.get_writer()), off_(w_.pos()), op_(s), - nop_(unset) {} - void Clear() { w_.reserve(0*50); nop_ = unset; off_ = w_.pos(); } + nop_(unset), seqno_(unset) {} + void Clear() { w_.reserve(0*50); nop_ = unset; seqno_ = unset; off_ = w_.pos(); } void set_seqno(int x) { w_.write(x); } - int seqno() const { return r_.read<int>(); } + int seqno() const { return seqno_ == unset ? seqno_ = r_.read<int>() : seqno_; } void start_op() { w_.skip<typeof(nop_)>(); } Op *add_op() { if (nop_ == unset) nop_ = 0; ++nop_; return &op_; } void fin_op() { w_.write(nop_, off_ + sizeof(int)); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-17 23:16:08
|
Revision: 1188 http://assorted.svn.sourceforge.net/assorted/?rev=1188&view=rev Author: yangzhang Date: 2009-02-17 22:38:36 +0000 (Tue, 17 Feb 2009) Log Message: ----------- - made writer more generic; takes any callback for flush/overflow - added zero-copy (ydb::msg) to main ydb - fixed some bugs in reader/writer - updated default --batch-size=100 and --write-buf=10000 Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ser.cc ydb/trunk/src/ser.h Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-02-17 04:36:14 UTC (rev 1187) +++ ydb/trunk/src/Makefile 2009-02-17 22:38:36 UTC (rev 1188) @@ -33,6 +33,9 @@ else OPT := -g3 endif +ifneq ($(PB),) + PB := -DUSE_PB +endif CXX := $(WTF) $(CXX) LDFLAGS := -pthread $(GPROF) LDLIBS := -lstx -lst -lresolv -lprotobuf -lgtest \ @@ -44,7 +47,7 @@ -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings \ -Winit-self -Wsign-promo -Wno-unused-parameter -Wc++0x-compat \ -Wparentheses -Wmissing-format-attribute -Wfloat-equal \ - -Wno-inline -Wsynth -std=gnu++0x $(CXXFLAGS) + -Wno-inline -Wsynth -std=gnu++0x $(PB) $(CXXFLAGS) PBCXXFLAGS := $(OPT) -Wall -Werror $(GPROF) all: $(TARGET) @@ -73,6 +76,8 @@ %.lzz: %.lzz.clamp clamp < $< | sed "`echo -e '1i#src\n1a#end'`" > $@ +main.o: ser.h + all.h: fgrep '#include' main.lzz.clamp > all.h Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-02-17 04:36:14 UTC (rev 1187) +++ ydb/trunk/src/main.lzz.clamp 2009-02-17 22:38:36 UTC (rev 1188) @@ -28,18 +28,40 @@ #include <unistd.h> // pipe, write #include <vector> #include "ydb.pb.h" +//#define USE_PB +#include "ser.h" + +#define function boost::function #define foreach BOOST_FOREACH #define shared_ptr boost::shared_ptr #define ref boost::ref #define REUSE_SER + using namespace boost; using namespace boost::archive; using namespace commons; using namespace google; using namespace std; +using namespace std::tr1; using namespace testing; -using namespace tr1; +using ydb::msg::reader; +using ydb::msg::writer; +using ydb::msg::stream; +using ydb::msg::outstream; +using ydb::pb::ResponseBatch; +using ydb::pb::Response; +using ydb::pb::Recovery; +using ydb::pb::Recovery_Pair; +using ydb::pb::Init; +using ydb::pb::Join; +using ydb::pb::SockAddr; +#ifdef USE_PB +using namespace ydb::pb; +#else +using namespace ydb::msg; +#endif + #define GETMSG(buf) \ checkeqnneg(st_read_fully(src, buf, len, timeout), (int) len); \ if (stop_time != nullptr) \ @@ -58,7 +80,7 @@ st_utime_t timeout; int chkpt, accept_joiner_seqno, issuing_interval, min_ops, max_ops, stop_on_seqno, batch_size; -size_t accept_joiner_size; +size_t accept_joiner_size, buf_size; bool verbose, yield_during_build_up, yield_during_catch_up, dump, show_updates, count_updates, stop_on_recovery, general_txns, profile_threads, debug_threads, multirecover, disk, debug_memory, use_wal, @@ -479,6 +501,9 @@ wal() : of("wal"), out(of) {} template <typename T> void log(const T &msg) { ser(of, msg); } + void logbuf(const void *buf, size_t len) { + of.write(reinterpret_cast<const char*>(buf), len); + } #if 0 void del(int key) { int op = op_del; // TODO: is this really necessary? @@ -533,10 +558,20 @@ 0); }); - TxnBatch batch; + reader r(nullptr); + outstream os(fds); + function<void(const void*, size_t)> fn; + if (use_wal) fn = os; + else fn = lambda(const void *buf, size_t len) { g_wal->logbuf(buf, len); }; + // TODO why doesn't this work? + // else fn = boost::bind(&wal::logbuf, g_wal); + writer w(fn, buf_size); + stream s(r,w); + TxnBatch batch NPBONLY((s)); for (int t = 0; t < batch_size; ++t) batch.add_txn(); while (!stop_hub) { + w.mark(); #ifdef REUSE_SER batch.Clear(); #else @@ -559,10 +594,12 @@ } // Generate some random transactions. + NPBONLY(batch.start_txn()); for (int t = 0; t < batch_size; ++t) { Txn &txn = *batch.add_txn(); txn.set_seqno(seqno); int count = randint(min_ops, max_ops + 1); + NPBONLY(txn.start_op()); for (int o = 0; o < count; ++o) { Op *op = txn.add_op(); int rtype = general_txns ? randint(3) : 1, @@ -572,20 +609,20 @@ op->set_key(rkey); op->set_value(rvalue); } + NPBONLY(txn.fin_op()); // Process immediately if not bcasting. if (fds.empty()) { --seqno; process_txn(g_map, txn, seqno, nullptr); } - ++seqno; // Checkpoint. - if (txn.seqno() % chkpt == 0) { + if (seqno % chkpt == 0) { if (verbose) - cout << "issued txn " << txn.seqno() << endl; + cout << "issued txn " << seqno << endl; if (timelim > 0 && current_time_millis() - start_time > timelim) { - cout << "time's up; issued " << txn.seqno() << " txns in " << timelim + cout << "time's up; issued " << seqno << " txns in " << timelim << " ms" << endl; stop_hub.set(); } @@ -598,19 +635,24 @@ } // Are we to accept a new joiner? - if (txn.seqno() == accept_joiner_seqno) { + if (seqno == accept_joiner_seqno) { accept_joiner.set(); } // Set the stopping seqno. - if (txn.seqno() == stop_on_seqno) { - cout << "stopping on issue of seqno " << txn.seqno() << endl; + if (seqno == stop_on_seqno) { + cout << "stopping on issue of seqno " << seqno << endl; stop_hub.set(); break; } + + ++seqno; } + NPBONLY(batch.fin_txn()); + NPBONLY(if (batch.txn_size() == 0) w.reset()); // Broadcast. +#ifdef USE_PB if (!fds.empty() && !suppress_txn_msgs) { bcast(fds, batch); } else if (use_wal) { @@ -619,6 +661,7 @@ string s; ser(s, batch); } +#endif // Pause? if (do_pause) @@ -626,10 +669,17 @@ } // This means "The End." + w.mark(); batch.Clear(); + NPBONLY(batch.start_txn()); Txn &txn = *batch.add_txn(); txn.set_seqno(-1); - bcast(fds, batch); + NPBONLY(txn.start_op()); + NPBONLY(txn.fin_op()); + NPBONLY(batch.fin_txn()); + PBONLY(bcast(fds, batch)); + w.mark(); + w.flush(); if (bcaster_thread != nullptr) { msgs.push(make_pair(nullptr, shared_ptr<string>())); } @@ -652,6 +702,7 @@ if (!fake_exec) { for (int o = 0; o < txn.op_size(); ++o) { const Op &op = txn.op(o); + const char type = op.type(); const int key = op.key(); mii::iterator it = map.find(key); if (show_updates || count_updates) { @@ -660,7 +711,7 @@ if (count_updates) ++updates; } } - switch (op.type()) { + switch (type) { case Op::read: if (res != nullptr) { if (it == map.end()) res->add_result(0); @@ -668,10 +719,13 @@ } break; case Op::write: - //if (use_wal) wal.write(key, op.value()); - if (it == map.end()) map[key] = op.value(); - else it->second = op.value(); - break; + { + int value = op.value(); + //if (use_wal) wal.write(key, value); + if (it == map.end()) map[key] = value; + else it->second = value; + break; + } case Op::del: if (it != map.end()) { //if (use_wal) wal.del(key); @@ -784,9 +838,13 @@ }); st_reader reader(leader); + vector<st_netfd_t> leader_v(1, leader); + outstream os(leader_v); + writer w(os, buf_size); + stream s(reader, w); try { - TxnBatch batch; + TxnBatch batch NPBONLY((s)); ResponseBatch resbatch; while (true) { long long before_read = -1; @@ -795,7 +853,8 @@ } { st_intr intr(stop_hub); - readmsg(reader, batch); + PBONLY(readmsg(reader, batch)); + NPBONLY(batch.Clear()); } if (read_thresh > 0) { long long read_time = current_time_millis() - before_read; @@ -849,7 +908,7 @@ if (resbatch.res_size() > 0) sendmsg(leader, resbatch); } else { - // Empty (default) Txn means "generate a snapshot." + // Empty (default) TxnBatch means "generate a snapshot." // TODO make this faster shared_ptr<Recovery> recovery(new Recovery); typedef ::map<int, int> mii_; @@ -1456,7 +1515,7 @@ "run the leader (run replica by default)") ("exit-on-recovery,x", po::bool_switch(&stop_on_recovery), "exit after the joiner fully recovers (for leader only)") - ("batch-size,b", po::value<int>(&batch_size)->default_value(10), + ("batch-size,b", po::value<int>(&batch_size)->default_value(100), "number of txns to batch up in each msg (for leader only)") ("exit-on-seqno,X",po::value<int>(&stop_on_seqno)->default_value(-1), "exit after txn seqno is issued (for leader only)") @@ -1483,6 +1542,8 @@ ("leader-port,P", po::value<uint16_t>(&leader_port)->default_value(7654), "port the leader listens on") + ("write-buf", po::value<size_t>(&buf_size)->default_value(1e5), + "size of the outgoing (write) buffer in bytes") ("chkpt,c", po::value<int>(&chkpt)->default_value(1000), "number of txns before yielding/verbose printing") ("timelim,T", po::value<long long>(&timelim)->default_value(0), @@ -1592,3 +1653,11 @@ return 1; } } + +/* + * Compile-time options: + * + * - REUSE_SER + * - map, unordered_map, dense_hash_map + * - SERIALIZATION METHOD + */ Modified: ydb/trunk/src/ser.cc =================================================================== --- ydb/trunk/src/ser.cc 2009-02-17 04:36:14 UTC (rev 1187) +++ ydb/trunk/src/ser.cc 2009-02-17 22:38:36 UTC (rev 1188) @@ -5,6 +5,7 @@ using ydb::msg::reader; using ydb::msg::writer; using ydb::msg::stream; +using ydb::msg::outstream; using namespace commons; using namespace std; #ifdef USE_PB @@ -13,21 +14,13 @@ using namespace ydb::msg; #endif -#ifdef USE_PB -#define PBSWITCH(a,b) a -#define PBONLY(x) x -#define NPBONLY(x) -#else -#define PBSWITCH(a,b) b -#define PBONLY(x) -#define NPBONLY(x) x -#endif - const int nreps = 2; void producer(st_netfd_t dst) { - writer w(dst); + vector<st_netfd_t> dsts(1, dst); + outstream os(dsts); + writer w(os, 90); reader r(dst); stream s(r,w); string str; @@ -62,8 +55,9 @@ void consumer(st_netfd_t src) { - array<char> a(1e8); - writer w(src); + vector<st_netfd_t> v; + outstream os(v); + writer w(os, 90); reader r(src); stream s(r,w); const bool show = true; Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-02-17 04:36:14 UTC (rev 1187) +++ ydb/trunk/src/ser.h 2009-02-17 22:38:36 UTC (rev 1188) @@ -7,6 +7,16 @@ #include <iostream> #include "ydb.pb.h" +#ifdef USE_PB +#define PBSWITCH(a,b) a +#define PBONLY(x) x +#define NPBONLY(x) +#else +#define PBSWITCH(a,b) b +#define PBONLY(x) +#define NPBONLY(x) x +#endif + #define BEGIN_NAMESPACE(ns) namespace ns { #define END_NAMESPACE } @@ -20,29 +30,32 @@ using ydb::pb::Op_OpType; +// TODO try to make all of the following conform to the std interfaces, if +// amenable + +class outstream +{ + private: + const vector<st_netfd_t> &dsts; + public: + outstream(const vector<st_netfd_t> &dsts) : dsts(dsts) {} + void operator()(const void *buf, size_t len) { + foreach (st_netfd_t dst, dsts) + checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), ssize_t(len)); + } +}; + class writer { private: - array<char> a_; + commons::array<char> a_; char *p_; char *mark_; char *unsent_; - st_netfd_t out_; - template<typename T> - void write_(T x, char *p) { - reserve(sizeof x, p); - *reinterpret_cast<T*>(p) = x; - } - public: - writer(st_netfd_t out) : - a_(90), p_(a_.get()), mark_(p_), unsent_(a_.get()), out_(out) {} - array<char> &buf() { return a_; } - size_t pos() { return p_ - mark_; } - size_t size() { return a_.size(); } - void mark() { mark_ = p_; } - void reserve(int n) { reserve(n, p_); } - void reserve(int n, char *p) { + boost::function<void(void*, size_t)> flushcb; + char *reserve(int n, char *p) { if (p + n > a_.end()) { + assert(size_t(p - mark_ + n) <= a_.size()); flush(); size_t diff = mark_ - a_.get(); memmove(a_.get(), mark_, diff); @@ -50,10 +63,24 @@ p_ -= diff; p -= diff; } + return p; } + template<typename T> + void write_(T x, char *p) { + *reinterpret_cast<T*>(reserve(sizeof x, p)) = x; + } + public: + writer(boost::function<void(void*, size_t)> flushcb, size_t buf_size) : + a_(buf_size), p_(a_.get()), mark_(p_), unsent_(a_.get()), flushcb(flushcb) {} + commons::array<char> &buf() { return a_; } + size_t pos() { return p_ - mark_; } + size_t size() { return a_.size(); } + void mark() { mark_ = p_; } + void reset() { p_ = mark_; } + void reserve(int n) { reserve(n, p_); } void flush() { if (mark_ - unsent_ > 0) { - st_write(out_, unsent_, mark_ - unsent_, ST_UTIME_NO_TIMEOUT); + flushcb(unsent_, mark_ - unsent_); unsent_ = mark_; } } @@ -143,8 +170,15 @@ void start_txn() { w_.skip<typeof(ntxn_)>(); } Txn *add_txn() { if (ntxn_ == unset) ntxn_ = 0; ++ntxn_; txn_.Clear(); return &txn_; } void fin_txn() { w_.write(ntxn_, off_); } - int txn_size() const { if (ntxn_ == unset) ntxn_ = r_.read<typeof(ntxn_)>(); return ntxn_; } + int txn_size() const { + if (ntxn_ == unset) + ntxn_ = r_.read<typeof(ntxn_)>(); + return ntxn_; + } const Txn &txn(int t) const { txn_.Clear(); return txn_; } + bool AppendToString(string *s) const { throw std::exception(); } + bool SerializeToString(string *s) const { throw std::exception(); } + bool SerializeToOstream(ostream *s) const { throw std::exception(); } }; END_NAMESPACE This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-19 08:30:23
|
Revision: 1202 http://assorted.svn.sourceforge.net/assorted/?rev=1202&view=rev Author: yangzhang Date: 2009-02-19 08:30:19 +0000 (Thu, 19 Feb 2009) Log Message: ----------- - simplified bcast-switching logic - removed REUSE_SER logic; pointless to keep orig - tried adding aspectc++, no work - removed outstream, replaced with lambdas (and moved into ser.cc) Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ser.cc ydb/trunk/src/ser.h Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-02-19 08:15:53 UTC (rev 1201) +++ ydb/trunk/src/Makefile 2009-02-19 08:30:19 UTC (rev 1202) @@ -36,6 +36,7 @@ ifneq ($(PB),) PB := -DUSE_PB endif +# CXX := $(WTF) ag++ -k --Xcompiler # $(CXX) CXX := $(WTF) $(CXX) LDFLAGS := -pthread $(GPROF) LDLIBS := -lstx -lst -lresolv -lprotobuf -lgtest \ Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-02-19 08:15:53 UTC (rev 1201) +++ ydb/trunk/src/main.lzz.clamp 2009-02-19 08:30:19 UTC (rev 1202) @@ -48,7 +48,6 @@ using ydb::msg::reader; using ydb::msg::writer; using ydb::msg::stream; -using ydb::msg::outstream; using ydb::pb::ResponseBatch; using ydb::pb::Response; using ydb::pb::Recovery; @@ -383,7 +382,7 @@ */ template<typename T> void -bcastmsg(const vector<st_netfd_t> &dsts, const T & msg) +bcastmsg_sync(const vector<st_netfd_t> &dsts, const T &msg) { ser_t s; ser(s, msg); @@ -395,22 +394,26 @@ } /** - * Send a message to a single recipient. + * Send a message to some destinations, using whichever method of network IO + * was chosen (sync or async). */ template<typename T> void -sendmsg(st_netfd_t dst, const T &msg) +bcastmsg(const vector<st_netfd_t> &dsts, const T &msg) { - vector<st_netfd_t> dsts(1, dst); - bcastmsg(dsts, msg); + if (use_bcast_async) bcastmsg_async(dsts, msg); + else bcastmsg_sync(dsts, msg); } +/** + * Send a message to a single recipient. + */ template<typename T> void -sendmsg_async(st_netfd_t dst, const T &msg) +sendmsg(st_netfd_t dst, const T &msg) { vector<st_netfd_t> dsts(1, dst); - bcastmsg_async(dsts, msg); + bcastmsg(dsts, msg); } /** @@ -528,15 +531,6 @@ mii g_map; wal *g_wal; -// Function pointer types. -typedef void (*bcasttxn_t)(const vector<st_netfd_t> &dsts, const TxnBatch &msg); -bcasttxn_t bcasttxn_async = bcastmsg_async<TxnBatch>; -bcasttxn_t bcasttxn_sync = bcastmsg<TxnBatch>; - -typedef void (*sendres_t)(st_netfd_t dst, const ResponseBatch &msg); -sendres_t sendres_async = sendmsg_async<ResponseBatch>; -sendres_t sendres_sync = sendmsg<ResponseBatch>; - /** * Keep issuing transactions to the replicas. */ @@ -544,46 +538,47 @@ issue_txns(st_channel<replica_info> &newreps, int &seqno, st_bool &accept_joiner) { - bcasttxn_t bcast = use_bcast_async ? bcasttxn_async : bcasttxn_sync; - st_thread_t bcaster_thread = bcast == bcasttxn_async ? - my_spawn(bcaster, "bcaster") : nullptr; Op_OpType types[] = {Op::read, Op::write, Op::del}; vector<st_netfd_t> fds; long long start_time = current_time_millis(); finally f(lambda () { - if (__ref(bcaster_thread) != nullptr) st_join(__ref(bcaster_thread)); showtput("issued", current_time_millis(), __ref(start_time), __ref(seqno), 0); }); reader r(nullptr); - outstream os(fds); - function<void(const void*, size_t)> fn; - if (use_wal) fn = os; - else fn = lambda(const void *buf, size_t len) { g_wal->logbuf(buf, len); }; + //function<void(const void*, size_t)> fn = use_wal ? + // lambda(const void *buf, size_t len) { g_wal->logbuf(buf, len); } : + // lambda(const void *buf, size_t len) { + // }; + //if (use_wal) fn = lambda(const void *buf, size_t len) {}; + //else fn = lambda(const void *buf, size_t len) { g_wal->logbuf(buf, len); }; // TODO why doesn't this work? // else fn = boost::bind(&wal::logbuf, g_wal); - writer w(fn, buf_size); + + writer w(lambda(const void *buf, size_t len) { + if (__ref(use_wal)) + g_wal->logbuf(buf, len); + else + foreach (st_netfd_t dst, __ref(fds)) + checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), ssize_t(len)); + }, buf_size); stream s(r,w); TxnBatch batch NPBONLY((s)); for (int t = 0; t < batch_size; ++t) batch.add_txn(); while (!stop_hub) { w.mark(); -#ifdef REUSE_SER batch.Clear(); -#else - TxnBatch batch; -#endif // Did we get a new member? If so, notify an arbitrary member (the first // one) to prepare to send recovery information (by sending an // empty/default Txn). if (!newreps.empty() && seqno > 0) { if (multirecover) { - bcast(fds, batch); + bcastmsg(fds, batch); } else { sendmsg(fds[0], batch); } @@ -654,7 +649,7 @@ // Broadcast. #ifdef USE_PB if (!fds.empty() && !suppress_txn_msgs) { - bcast(fds, batch); + bcastmsg(fds, batch); } else if (use_wal) { g_wal->log(batch); } else if (force_ser) { @@ -663,6 +658,9 @@ } #endif + if (fds.empty()) + w.reset(); + // Pause? if (do_pause) do_pause.waitreset(); @@ -677,12 +675,9 @@ NPBONLY(txn.start_op()); NPBONLY(txn.fin_op()); NPBONLY(batch.fin_txn()); - PBONLY(bcast(fds, batch)); + PBONLY(bcastmsg(fds, batch)); w.mark(); w.flush(); - if (bcaster_thread != nullptr) { - msgs.push(make_pair(nullptr, shared_ptr<string>())); - } } /** @@ -815,16 +810,7 @@ // issued more since the Init message). int first_seqno = -1; - st_thread_t bcaster_thread = use_bcast_async ? - my_spawn(bcaster, "bcaster") : nullptr; - sendres_t sendmsg = use_bcast_async ? sendres_async : sendres_sync; - finally f(lambda () { - if (__ref(bcaster_thread) != nullptr) { - msgs.push(make_pair(nullptr, shared_ptr<string>())); - st_join(__ref(bcaster_thread)); - } - long long now = current_time_millis(); showtput("processed", now, __ref(start_time), __ref(seqno), __ref(init_seqno)); @@ -839,8 +825,7 @@ st_reader reader(leader); vector<st_netfd_t> leader_v(1, leader); - outstream os(leader_v); - writer w(os, buf_size); + writer w(lambda(const void*, size_t) { throw std::exception(); }, buf_size); stream s(reader, w); try { @@ -864,11 +849,7 @@ } } if (batch.txn_size() > 0) { -#ifdef REUSE_SER resbatch.Clear(); -#else - ResponseBatch resbatch; -#endif for (int t = 0; t < batch.txn_size(); ++t) { const Txn &txn = batch.txn(t); // Regular transaction. @@ -1614,9 +1595,20 @@ my_spawn(memmon, "memmon"); } + // Start the message broadcaster thread, if requested. + st_thread_t bcaster_thread = use_bcast_async ? + my_spawn(bcaster, "bcaster") : nullptr; + long long start = thread_start_time = current_time_millis(); - // At the end, print thread profiling information. + + // At the end, cleanly stop the bcaster thread and print thread profiling + // information. finally f(lambda() { + if (use_bcast_async) { + msgs.push(make_pair(nullptr, shared_ptr<string>())); + st_join(__ref(bcaster_thread)); + } + if (profile_threads) { long long end = current_time_millis(); long long all = end - __ref(start); @@ -1657,7 +1649,6 @@ /* * Compile-time options: * - * - REUSE_SER * - map, unordered_map, dense_hash_map * - SERIALIZATION METHOD */ Modified: ydb/trunk/src/ser.cc =================================================================== --- ydb/trunk/src/ser.cc 2009-02-19 08:15:53 UTC (rev 1201) +++ ydb/trunk/src/ser.cc 2009-02-19 08:30:19 UTC (rev 1202) @@ -5,7 +5,6 @@ using ydb::msg::reader; using ydb::msg::writer; using ydb::msg::stream; -using ydb::msg::outstream; using namespace commons; using namespace std; #ifdef USE_PB @@ -16,6 +15,18 @@ const int nreps = 2; +class outstream +{ + private: + const vector<st_netfd_t> &dsts; + public: + outstream(const vector<st_netfd_t> &dsts) : dsts(dsts) {} + void operator()(const void *buf, size_t len) { + foreach (st_netfd_t dst, dsts) + checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), ssize_t(len)); + } +}; + void producer(st_netfd_t dst) { vector<st_netfd_t> dsts(1, dst); Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-02-19 08:15:53 UTC (rev 1201) +++ ydb/trunk/src/ser.h 2009-02-19 08:30:19 UTC (rev 1202) @@ -33,18 +33,6 @@ // TODO try to make all of the following conform to the std interfaces, if // amenable -class outstream -{ - private: - const vector<st_netfd_t> &dsts; - public: - outstream(const vector<st_netfd_t> &dsts) : dsts(dsts) {} - void operator()(const void *buf, size_t len) { - foreach (st_netfd_t dst, dsts) - checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), ssize_t(len)); - } -}; - class writer { private: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-20 08:06:34
|
Revision: 1207 http://assorted.svn.sourceforge.net/assorted/?rev=1207&view=rev Author: yangzhang Date: 2009-02-20 08:06:28 +0000 (Fri, 20 Feb 2009) Log Message: ----------- quick fix: fall back to pb whenever minreps == 0 Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ser.h Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-02-20 06:22:02 UTC (rev 1206) +++ ydb/trunk/src/Makefile 2009-02-20 08:06:28 UTC (rev 1207) @@ -48,7 +48,7 @@ -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings \ -Winit-self -Wsign-promo -Wno-unused-parameter -Wc++0x-compat \ -Wparentheses -Wmissing-format-attribute -Wfloat-equal \ - -Wno-inline -Wsynth -std=gnu++0x $(PB) $(CXXFLAGS) + -Wno-inline -Wsynth -Wno-old-style-cast -std=gnu++0x $(PB) $(CXXFLAGS) PBCXXFLAGS := $(OPT) -Wall -Werror $(GPROF) all: $(TARGET) Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-02-20 06:22:02 UTC (rev 1206) +++ ydb/trunk/src/main.lzz.clamp 2009-02-20 08:06:28 UTC (rev 1207) @@ -299,6 +299,7 @@ template <typename T> int pb_size(const T &msg) { + // GetCachedSize returns 0 if no cached size. int len = msg.GetCachedSize(); return len == 0 ? msg.ByteSize() : len; } @@ -563,7 +564,8 @@ g_wal->logbuf(buf, len); else foreach (st_netfd_t dst, __ref(fds)) - checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), ssize_t(len)); + checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), + static_cast<ssize_t>(len)); }, buf_size); stream s(r,w); scoped_ptr<TxnBatch> pbatch(new_TxnBatch<TxnBatch>(s)); @@ -611,6 +613,7 @@ if (fds.empty()) { --seqno; process_txn<Types>(g_map, txn, seqno, nullptr); + w.reset(); } // Checkpoint. @@ -648,7 +651,6 @@ if (batch.txn_size() == 0) w.reset(); // Broadcast. -#ifdef USE_PB if (!fds.empty() && !suppress_txn_msgs) { bcastmsg(fds, batch); } else if (use_wal) { @@ -657,7 +659,6 @@ string s; ser(s, batch); } -#endif if (fds.empty()) w.reset(); @@ -1584,6 +1585,8 @@ check(min_ops > 0); check(max_ops > 0); check(max_ops >= min_ops); + + if (minreps == 0) use_pb = true; // XXX } catch (std::exception &ex) { cerr << ex.what() << endl << endl << desc << endl; return 1; Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-02-20 06:22:02 UTC (rev 1206) +++ ydb/trunk/src/ser.h 2009-02-20 08:06:28 UTC (rev 1207) @@ -158,6 +158,8 @@ bool SerializeToString(string *s) const { throw std::exception(); } bool SerializeToOstream(ostream *s) const { throw std::exception(); } bool ParseFromArray(void *p, size_t len) { throw std::exception(); } + size_t GetCachedSize() const { throw std::exception(); } + size_t ByteSize() const { throw std::exception(); } }; template<typename T> void start_txn(T &batch); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-22 00:14:36
|
Revision: 1215 http://assorted.svn.sourceforge.net/assorted/?rev=1215&view=rev Author: yangzhang Date: 2009-02-22 00:14:14 +0000 (Sun, 22 Feb 2009) Log Message: ----------- fixed all warnings Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ser.h Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-02-21 23:55:46 UTC (rev 1214) +++ ydb/trunk/src/Makefile 2009-02-22 00:14:14 UTC (rev 1215) @@ -42,13 +42,12 @@ LDLIBS := -lstx -lst -lresolv -lprotobuf -lgtest \ -lboost_program_options-gcc43-mt -lboost_thread-gcc43-mt \ -lboost_serialization-gcc43-mt $(PPROF) -# The -Wno- warnings are for boost. CXXFLAGS := $(OPT) -pthread $(GPROF) -Wall -Werror -Wextra -Woverloaded-virtual \ - -Wconversion -Wno-conversion -Wno-ignored-qualifiers \ + -Wconversion -Wignored-qualifiers \ -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings \ - -Winit-self -Wsign-promo -Wno-unused-parameter -Wc++0x-compat \ + -Winit-self -Wsign-promo -Wunused-parameter -Wc++0x-compat \ -Wparentheses -Wmissing-format-attribute -Wfloat-equal \ - -Wno-inline -Wsynth -Wno-old-style-cast -std=gnu++0x $(PB) $(CXXFLAGS) + -Winline -Wsynth -Wno-old-style-cast -std=gnu++0x $(PB) $(CXXFLAGS) PBCXXFLAGS := $(OPT) -Wall -Werror $(GPROF) all: $(TARGET) Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-02-21 23:55:46 UTC (rev 1214) +++ ydb/trunk/src/main.lzz.clamp 2009-02-22 00:14:14 UTC (rev 1215) @@ -57,7 +57,7 @@ using namespace ydb::msg; #define GETMSG(buf) \ -checkeqnneg(st_read_fully(src, buf, len, timeout), (int) len); \ +checkeqnneg(st_read_fully(src, buf, len, timeout), int(len)); \ if (stop_time != nullptr) \ *stop_time = current_time_millis(); \ check(msg.ParseFromArray(buf, len)); @@ -94,7 +94,8 @@ /** * Convenience function for calculating percentages. */ -double pct(double sub, double tot) { return 100 * sub / tot; } +template<typename T> +double pct(T sub, T tot) { return 100 * double(sub) / double(tot); } /** * Convenience class for performing long-jumping break. @@ -300,10 +301,10 @@ * Helper for getting the cached ByteSize of a message. */ template <typename T> -int +size_t pb_size(const T &msg) { // GetCachedSize returns 0 if no cached size. - int len = msg.GetCachedSize(); + size_t len = msg.GetCachedSize(); return len == 0 ? msg.ByteSize() : len; } @@ -314,7 +315,7 @@ void ser(ostream &s, const T &msg) { - uint32_t len = htonl(pb_size(msg)); + uint32_t len = htonl(uint32_t(pb_size(msg))); s.write(reinterpret_cast<const char*>(&len), sizeof len); check(msg.SerializeToOstream(&s)); } @@ -741,7 +742,7 @@ { long long time_diff = stop_time - start_time; int count_diff = stop_count - start_count; - double rate = double(count_diff) * 1000 / time_diff; + double rate = count_diff * 1000 / double(time_diff); cout << action << " " << count_diff << " txns [" << start_count << ".." << stop_count << "] in " << time_diff << " ms [" @@ -1097,7 +1098,7 @@ * from process_txns. */ void -recover_joiner(st_netfd_t listener, const mii &map, const int &seqno, +recover_joiner(st_netfd_t listener, st_channel<shared_ptr<Recovery> > &send_states) { st_netfd_t joiner; @@ -1334,8 +1335,8 @@ ref(send_states), ref(backlog), init.txnseqno(), mypos, init.node_size()); st_joining join_proc(my_spawn(process_fn, "process_txns")); - st_joining join_rec(my_spawn(bind(recover_joiner, listener, ref(map), - ref(seqno), ref(send_states)), + st_joining join_rec(my_spawn(bind(recover_joiner, listener, + ref(send_states)), "recover_joiner")); try { Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-02-21 23:55:46 UTC (rev 1214) +++ ydb/trunk/src/ser.h 2009-02-22 00:14:14 UTC (rev 1215) @@ -64,11 +64,12 @@ } } void show() { - cout << (void*) p_; + cout << static_cast<void*>(p_); for (size_t i = 0; i < a_.size(); ++i) - cout << " " << hex << setfill('0') << setw(2) << (int)(unsigned char)(a_.get()[i]); + cout << " " << hex << setfill('0') << setw(2) + << int(static_cast<unsigned char>(a_.get()[i])); cout << endl; - cout << (void*) p_; + cout << static_cast<void*>(p_); for (size_t i = 0; i < a_.size(); ++i) cout << " " << setfill(' ') << setw(2) << (i == pos() ? "^^" : ""); cout << endl; @@ -124,14 +125,14 @@ Txn(stream &s) : s_(s), r_(s.get_reader()), w_(s.get_writer()), off_(w_.pos()), op_(s), nop_(unset), seqno_(unset) {} - void Clear() { w_.reserve(0*50); nop_ = unset; seqno_ = unset; off_ = w_.pos(); } + void Clear() { nop_ = unset; seqno_ = unset; off_ = w_.pos(); } void set_seqno(int x) { w_.write(x); } int seqno() const { return seqno_ == unset ? seqno_ = r_.read<int>() : seqno_; } void start_op() { if (nop_ == unset) nop_ = 0; w_.skip<typeof(nop_)>(); } Op *add_op() { ++nop_; return &op_; } void fin_op() { w_.write(nop_, off_ + sizeof(int)); } int op_size() const { if (nop_ == unset) nop_ = r_.read<typeof(nop_)>(); return nop_; } - const Op &op(int o) const { return op_; } + const Op &op(int) const { return op_; } }; class TxnBatch @@ -145,7 +146,7 @@ mutable short ntxn_; public: TxnBatch(stream &s) : s_(s), r_(s.get_reader()), w_(s.get_writer()), off_(w_.pos()), txn_(s), ntxn_(unset) {} - void Clear() { w_.reserve(0*100); txn_.Clear(); ntxn_ = unset; off_ = w_.pos(); } + void Clear() { txn_.Clear(); ntxn_ = unset; off_ = w_.pos(); } void start_txn() { if (ntxn_ == unset) ntxn_ = 0; w_.skip<typeof(ntxn_)>(); } Txn *add_txn() { ++ntxn_; txn_.Clear(); return &txn_; } void fin_txn() { w_.write(ntxn_, off_); } @@ -154,33 +155,33 @@ ntxn_ = r_.read<typeof(ntxn_)>(); return ntxn_; } - const Txn &txn(int t) const { txn_.Clear(); return txn_; } - bool AppendToString(string *s) const { throw_operation_not_supported(); } - bool SerializeToString(string *s) const { throw_operation_not_supported(); } - bool SerializeToOstream(ostream *s) const { throw_operation_not_supported(); } - bool ParseFromArray(void *p, size_t len) { throw_operation_not_supported(); } + const Txn &txn(int) const { txn_.Clear(); return txn_; } + bool AppendToString(string*) const { throw_operation_not_supported(); } + bool SerializeToString(string*) const { throw_operation_not_supported(); } + bool SerializeToOstream(ostream*) const { throw_operation_not_supported(); } + bool ParseFromArray(void*, size_t) { throw_operation_not_supported(); } size_t GetCachedSize() const { throw_operation_not_supported(); } size_t ByteSize() const { throw_operation_not_supported(); } }; template<typename T> void start_txn(T &batch); -template<> void start_txn(ydb::pb::TxnBatch &batch) {} +template<> void start_txn(ydb::pb::TxnBatch &) {} template<> void start_txn(ydb::msg::TxnBatch &batch) { batch.start_txn(); } template<typename T> void fin_txn(T &batch); -template<> void fin_txn(ydb::pb::TxnBatch &batch) {} +template<> void fin_txn(ydb::pb::TxnBatch &) {} template<> void fin_txn(ydb::msg::TxnBatch &batch) { batch.fin_txn(); } template<typename T> void start_op(T &txn); -template<> void start_op(ydb::pb::Txn &txn) {} +template<> void start_op(ydb::pb::Txn &) {} template<> void start_op(ydb::msg::Txn &txn) { txn.start_op(); } template<typename T> void fin_op(T &txn); -template<> void fin_op(ydb::pb::Txn &txn) {} +template<> void fin_op(ydb::pb::Txn &) {} template<> void fin_op(ydb::msg::Txn &txn) { txn.fin_op(); } template<typename T> T *new_TxnBatch(stream &s); -template<> ydb::pb::TxnBatch *new_TxnBatch(stream &s) { return new ydb::pb::TxnBatch(); } +template<> ydb::pb::TxnBatch *new_TxnBatch(stream &) { return new ydb::pb::TxnBatch(); } template<> ydb::msg::TxnBatch *new_TxnBatch(stream &s) { return new ydb::msg::TxnBatch(s); } struct pb_types { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-22 06:33:46
|
Revision: 1217 http://assorted.svn.sourceforge.net/assorted/?rev=1217&view=rev Author: yangzhang Date: 2009-02-22 06:33:43 +0000 (Sun, 22 Feb 2009) Log Message: ----------- added extra warnings and fixed issues they pointed out Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/p2.cc ydb/trunk/src/ser.cc ydb/trunk/src/ser.h Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-02-22 06:33:36 UTC (rev 1216) +++ ydb/trunk/src/Makefile 2009-02-22 06:33:43 UTC (rev 1217) @@ -29,7 +29,7 @@ PPROF := -lprofiler endif ifneq ($(OPT),) - OPT := -O3 + OPT := -O3 -Wdisabled-optimization else OPT := -g3 endif @@ -42,12 +42,52 @@ LDLIBS := -lstx -lst -lresolv -lprotobuf -lgtest \ -lboost_program_options-gcc43-mt -lboost_thread-gcc43-mt \ -lboost_serialization-gcc43-mt $(PPROF) -CXXFLAGS := $(OPT) -pthread $(GPROF) -Wall -Werror -Wextra -Woverloaded-virtual \ - -Wconversion -Wignored-qualifiers \ - -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings \ - -Winit-self -Wsign-promo -Wunused-parameter -Wc++0x-compat \ - -Wparentheses -Wmissing-format-attribute -Wfloat-equal \ - -Winline -Wsynth -Wno-old-style-cast -std=gnu++0x $(PB) $(CXXFLAGS) + +CXXFLAGS := $(OPT) -pthread $(GPROF) \ + -Wall \ + -Werror \ + -Wextra \ + -Wstrict-null-sentinel \ + -Wno-old-style-cast \ + -Woverloaded-virtual \ + -Wsign-promo \ + -Wformat=2 \ + -Winit-self \ + -Wswitch-enum \ + -Wunused \ + -Wstrict-overflow \ + -Wfloat-equal \ + -Wundef \ + -Wunsafe-loop-optimizations \ + -Wpointer-arith \ + -Wcast-qual \ + -Wcast-align \ + -Wwrite-strings \ + -Wconversion \ + -Wlogical-op \ + -Wno-aggregate-return \ + -Wno-missing-declarations \ + -Wno-missing-field-initializers \ + -Wmissing-format-attribute \ + -Wpacked \ + -Wredundant-decls \ + -Winline \ + -Winvalid-pch \ + -Wlong-long \ + -Wvolatile-register-var \ + -std=gnu++0x \ + $(PB) \ + $(CXXFLAGS) + + # -Wmissing-noreturn \ + # -Weffc++ \ + # -pedantic \ + # -Wshadow \ + # -Wswitch-default \ + # -Wpadded \ + # -Wunreachable-code \ + # -Wstack-protector \ + PBCXXFLAGS := $(OPT) -Wall -Werror $(GPROF) all: $(TARGET) @@ -83,7 +123,7 @@ $(COMPILE.cc) $(PBHDRS) $(OUTPUT_OPTION) $< clean: - rm -f $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) main.lzz *.clamp_h serperf + rm -f $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) main.lzz *.clamp_h distclean: clean rm -f all.h all.h.gch Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-02-22 06:33:36 UTC (rev 1216) +++ ydb/trunk/src/main.lzz.clamp 2009-02-22 06:33:43 UTC (rev 1217) @@ -457,8 +457,8 @@ len = ntohl(len); // Parse the message body. - if (len < 4096) { - char buf[len]; + if (len <= 4096) { + char buf[4096]; GETMSG(buf); } else { //cout << "receiving large msg; heap-allocating " << len << " bytes" << endl; @@ -1429,7 +1429,7 @@ * have marked themselves as interruptible. */ void handle_sig_sync() { - stfd fd = checkerr(st_netfd_open(sig_pipe[0])); + st_closing fd(checkerr(st_netfd_open(sig_pipe[0]))); while (true) { int sig; checkeqnneg(st_read(fd, &sig, sizeof sig, ST_UTIME_NO_TIMEOUT), Modified: ydb/trunk/src/p2.cc =================================================================== --- ydb/trunk/src/p2.cc 2009-02-22 06:33:36 UTC (rev 1216) +++ ydb/trunk/src/p2.cc 2009-02-22 06:33:43 UTC (rev 1217) @@ -64,7 +64,7 @@ end_ -= diff; } // read; advance end_ - STAT(readtime, readcnt, int res = ::read(fd(), end_, rem());) + STAT(readtime, readcnt, ssize_t res = ::read(fd(), end_, rem());) int e = errno; errno = 0; //cout << "read res " << res << endl; @@ -125,7 +125,7 @@ size_t rem() { return buf_.end() - end_; } arr getbuf(uint32_t req) { - uint32_t tot = req + sizeof req; + uint32_t tot = uint32_t(req + sizeof req); check(tot > 0); check(tot <= buf_.size()); //cout << "getbuf req " << req << endl; @@ -151,7 +151,7 @@ void write() { // perform the write - STAT(writetime, writecnt, int res = ::write(fd(), start_, end_ - start_);) + STAT(writetime, writecnt, ssize_t res = ::write(fd(), start_, end_ - start_);) if (res < 0) { close(fd()); throw exception(); } //cout << "write res " << res << " amt " << amt() << endl; // advance start_ @@ -182,7 +182,7 @@ void handle_write() { //cout << "writing" << endl; uint32_t npairs = batch_size; - uint32_t len = 2 * sizeof(uint32_t) * npairs; + uint32_t len = uint32_t(2 * sizeof(uint32_t) * npairs); arr a = w_.getbuf(len); buf_ = a; if (buf_ == nullptr) return; @@ -234,7 +234,7 @@ //if (verbose) cout << current_time_millis() << ": count " << counter_ << endl; if (counter_ > thresh) { long long end = current_time_millis(); - double rate = double(counter_) / (end - start_) * 1000; + double rate = counter_ / double(end - start_) * 1000; cout << "rate " << rate << " pairs/s " << rate / 5 << " tps; readcount " << readcount_ << endl; throw exception(); } Modified: ydb/trunk/src/ser.cc =================================================================== --- ydb/trunk/src/ser.cc 2009-02-22 06:33:36 UTC (rev 1216) +++ ydb/trunk/src/ser.cc 2009-02-22 06:33:43 UTC (rev 1217) @@ -28,7 +28,7 @@ uint32_t len = 0; str.append(sizeof len, '\0'); check(batch.AppendToString(&str)); - len = str.size() - sizeof len; + len = uint32_t(str.size() - sizeof len); copy((char*) &len, (char*) &len + sizeof len, str.begin()); os(str.data(), str.size()); } Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-02-22 06:33:36 UTC (rev 1216) +++ ydb/trunk/src/ser.h 2009-02-22 06:33:43 UTC (rev 1217) @@ -4,6 +4,7 @@ #include <commons/array.h> #include <commons/exceptions.h> #include <commons/st/st.h> +#include <commons/utility.h> #include <iomanip> #include <iostream> #include "ydb.pb.h" @@ -26,6 +27,7 @@ class writer { + NONCOPYABLE(writer) private: commons::array<char> a_; char *p_; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-02-25 19:26:01
|
Revision: 1233 http://assorted.svn.sourceforge.net/assorted/?rev=1233&view=rev Author: yangzhang Date: 2009-02-25 19:25:53 +0000 (Wed, 25 Feb 2009) Log Message: ----------- - fixed raw-buffer in 0-node mode and --wal modes: added buffer-sharing between reader and writer - added --read-buf-size - make main.lzz not writeable Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ser.h Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-02-25 19:25:50 UTC (rev 1232) +++ ydb/trunk/src/Makefile 2009-02-25 19:25:53 UTC (rev 1233) @@ -109,7 +109,9 @@ protoc --cpp_out=. $< %.lzz: %.lzz.clamp + rm -f $@ clamp < $< | sed '1d' > $@ + chmod -w $@ main.o: ser.h Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-02-25 19:25:50 UTC (rev 1232) +++ ydb/trunk/src/main.lzz.clamp 2009-02-25 19:25:53 UTC (rev 1233) @@ -64,7 +64,7 @@ st_utime_t timeout; int chkpt, accept_joiner_seqno, issuing_interval, min_ops, max_ops, stop_on_seqno, batch_size; -size_t accept_joiner_size, buf_size; +size_t accept_joiner_size, buf_size, read_buf_size; bool verbose, yield_during_build_up, yield_during_catch_up, dump, show_updates, count_updates, stop_on_recovery, general_txns, profile_threads, debug_threads, multirecover, disk, debug_memory, use_wal, use_pb, use_pb_res, @@ -537,10 +537,21 @@ 0); }); - reader r(nullptr); + commons::array<char> rbuf(read_buf_size), wbuf(buf_size); + reader r(nullptr, rbuf.get(), rbuf.size()); function<void(const void*, size_t)> fn; if (use_wal) fn = bind(&wal::logbuf, g_wal, _1, _2); + //else if (newreps.empty()) + // fn = lambda(const void *buf, size_t len) { + // // Prepare a new buffer to swap with the writer's current working buffer. + // new buffer; + // // Copy data past the end of the current buffer into the new buffer, so + // // that it's not lost. + // copy(); + // // Swap the current buffer with the new buffer. + // swap(); + // }; else fn = lambda(const void *buf, size_t len) { foreach (st_netfd_t dst, __ref(fds)) @@ -548,11 +559,15 @@ static_cast<ssize_t>(len)); }; - writer w(fn, buf_size); + char *real_wbuf = newreps.empty() ? rbuf.get() : wbuf.get(); + size_t real_wbuf_size = newreps.empty() ? rbuf.size() : wbuf.size(); + writer w(fn, real_wbuf, real_wbuf_size); stream s(r,w); scoped_ptr<TxnBatch> pbatch(new_TxnBatch<TxnBatch>(s)); TxnBatch batch = *pbatch; - for (int t = 0; t < batch_size; ++t) batch.add_txn(); + if (Types::is_pb()) + for (int t = 0; t < batch_size; ++t) + batch.add_txn(); while (!stop_hub) { w.mark(); @@ -569,6 +584,7 @@ } } // Bring in any new members. + // TODO more efficient: copy/extend/append while (!newreps.empty()) { fds.push_back(newreps.take().fd()); } @@ -576,6 +592,7 @@ // Generate some random transactions. start_txn(batch); for (int t = 0; t < batch_size; ++t) { + char *txn_start = w.cur(); Txn &txn = *batch.add_txn(); txn.set_seqno(seqno); int count = randint(min_ops, max_ops + 1); @@ -594,8 +611,9 @@ // Process immediately if not bcasting. if (fds.empty()) { --seqno; + r.reset_range(txn_start, w.cur()); + if (!Types::is_pb()) txn.Clear(); process_txn<Types, pb_types>(g_map, txn, seqno, nullptr); - w.reset(); } // Checkpoint. @@ -630,38 +648,49 @@ ++seqno; } fin_txn(batch); - if (batch.txn_size() == 0) w.reset(); - // Broadcast. - if (Types::is_pb() && !fds.empty() && !suppress_txn_msgs) { - bcastmsg(fds, batch); - } else if (use_wal) { - g_wal->log(batch); - } else if (force_ser) { - string s; - ser(s, batch); + if (Types::is_pb()) { + // Broadcast/log/serialize. + // TODO optimize: reuse serialization (have these functions take + // serialized buffers instead of message structures) + if (!fds.empty() && !suppress_txn_msgs) { + bcastmsg(fds, batch); + } + if (use_wal) { + g_wal->log(batch); + } + if (fds.empty() && suppress_txn_msgs && !use_wal && force_ser) { + string s; + ser(s, batch); + } + } else { + // Reset if we have nobody to send to (incl. disk) or if we actually have + // no txns (possible due to loop structure; want to avoid to avoid + // confusing with the 0-txn message signifying "prepare a recovery msg"). + if ((fds.empty() && !use_wal) || batch.txn_size() == 0) { + w.reset(); + } } - if (fds.empty()) - w.reset(); - // Pause? if (do_pause) do_pause.waitreset(); } // This means "The End." - w.mark(); - batch.Clear(); - start_txn(batch); - Txn &txn = *batch.add_txn(); - txn.set_seqno(-1); - start_op(txn); - fin_op(txn); - fin_txn(batch); - if (Types::is_pb()) bcastmsg(fds, batch); - w.mark(); - w.flush(); + if (!fds.empty()) { + w.mark(); + batch.Clear(); + start_txn(batch); + Txn &txn = *batch.add_txn(); + txn.set_seqno(-1); + start_op(txn); + fin_op(txn); + fin_txn(batch); + if (Types::is_pb()) bcastmsg(fds, batch); + w.mark(); + w.flush(); + } } /** @@ -831,12 +860,13 @@ __ref(send_states).push(shared_ptr<Recovery>()); }); - st_reader reader(leader); + commons::array<char> rbuf(read_buf_size), wbuf(buf_size); + st_reader reader(leader, rbuf.get(), rbuf.size()); vector<st_netfd_t> leader_v(1, leader); writer w(lambda(const void *buf, size_t len) { checkeqnneg(st_write(__ref(leader), buf, len, ST_UTIME_NO_TIMEOUT), static_cast<ssize_t>(len)); - }, buf_size); + }, wbuf.get(), wbuf.size()); stream s(reader, w); try { @@ -968,10 +998,11 @@ finally f(bind(&response_handler::cleanup, this)); - st_reader reader(replica); + commons::array<char> rbuf(read_buf_size), wbuf(buf_size); + st_reader reader(replica, rbuf.get(), rbuf.size()); writer w(lambda(const void*, size_t) { throw operation_not_supported("response handler should not be writing"); - }, buf_size); + }, wbuf.get(), wbuf.size()); stream s(reader,w); scoped_ptr<ResponseBatch> pbatch(new_ResponseBatch<ResponseBatch>(s)); @@ -1568,6 +1599,8 @@ ("leader-port,P", po::value<uint16_t>(&leader_port)->default_value(7654), "port the leader listens on") + ("read-buf", po::value<size_t>(&read_buf_size)->default_value(1e7), + "size of the incoming (read) buffer in bytes") ("write-buf", po::value<size_t>(&buf_size)->default_value(1e5), "size of the outgoing (write) buffer in bytes") ("chkpt,c", po::value<int>(&chkpt)->default_value(1000), @@ -1600,8 +1633,6 @@ check(min_ops > 0); check(max_ops > 0); check(max_ops >= min_ops); - - if (minreps == 0 && !use_wal) use_pb = true; // XXX } catch (std::exception &ex) { cerr << ex.what() << endl << endl << desc << endl; return 1; Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-02-25 19:25:50 UTC (rev 1232) +++ ydb/trunk/src/ser.h 2009-02-25 19:25:53 UTC (rev 1233) @@ -13,13 +13,25 @@ #define END_NAMESPACE } #define MAKE_START_FIN_HELPER(MsgType, field, action) \ - template<typename T> void action##_##field(T &msg); \ - template<> void action##_##field(ydb::pb::MsgType&) {} \ - template<> void action##_##field(ydb::msg::MsgType& msg) { msg.action##_##field(); } + template<typename T> inline void action##_##field(T &msg); \ + template<> inline void action##_##field(ydb::pb::MsgType&) {} \ + template<> inline void action##_##field(ydb::msg::MsgType& msg) { \ + msg.action##_##field(); \ + } + #define MAKE_START_FIN(MsgType, field) \ MAKE_START_FIN_HELPER(MsgType, field, start) \ MAKE_START_FIN_HELPER(MsgType, field, fin) +#define MAKE_TOP_MSG(MsgType) \ + template<typename T> inline T *new_##MsgType(stream &s); \ + template<> inline ydb::pb::MsgType *new_##MsgType(stream &) { \ + return new ydb::pb::MsgType(); \ + } \ + template<> inline ydb::msg::MsgType *new_##MsgType(stream &s) { \ + return new ydb::msg::MsgType(s); \ + } + #define EXPAND_PB \ bool AppendToString(string*) const { throw_operation_not_supported(); } \ bool SerializeToString(string*) const { throw_operation_not_supported(); } \ @@ -28,6 +40,16 @@ size_t GetCachedSize() const { throw_operation_not_supported(); } \ size_t ByteSize() const { throw_operation_not_supported(); } \ +#define MAKE_TYPE_BATCH(name, ns, b) \ + struct name##_types { \ + typedef ydb::ns::TxnBatch TxnBatch; \ + typedef ydb::ns::Txn Txn; \ + typedef ydb::ns::Op Op; \ + typedef ydb::ns::Response Response; \ + typedef ydb::ns::ResponseBatch ResponseBatch; \ + static bool is_pb() { return b; } \ + }; + BEGIN_NAMESPACE(ydb) BEGIN_NAMESPACE(msg) @@ -45,7 +67,7 @@ { NONCOPYABLE(writer) private: - commons::array<char> a_; + sized_array<char> a_; char *p_; char *mark_; char *unsent_; @@ -67,9 +89,10 @@ *reinterpret_cast<T*>(reserve(sizeof x, p)) = x; } public: - writer(boost::function<void(void*, size_t)> flushcb, size_t buf_size) : - a_(buf_size), p_(a_.get()), mark_(p_), unsent_(a_.get()), flushcb(flushcb) {} - commons::array<char> &buf() { return a_; } + writer(boost::function<void(void*, size_t)> flushcb, char *a, size_t buf_size) : + a_(a, buf_size), p_(a_.get()), mark_(p_), unsent_(a_.get()), flushcb(flushcb) {} + sized_array<char> &buf() { return a_; } + char *cur() { return p_; } size_t pos() { return p_ - mark_; } size_t size() { return a_.size(); } void mark() { mark_ = p_; } @@ -179,12 +202,9 @@ EXPAND_PB }; -template<typename T> T *new_TxnBatch(stream &s); -template<> ydb::pb::TxnBatch *new_TxnBatch(stream &) { return new ydb::pb::TxnBatch(); } -template<> ydb::msg::TxnBatch *new_TxnBatch(stream &s) { return new ydb::msg::TxnBatch(s); } - MAKE_START_FIN(Txn, op) MAKE_START_FIN(TxnBatch, txn) +MAKE_TOP_MSG(TxnBatch) class Response { @@ -234,32 +254,13 @@ EXPAND_PB }; -template<typename T> T *new_ResponseBatch(stream &s); -template<> ydb::pb::ResponseBatch *new_ResponseBatch(stream &) { return new ydb::pb::ResponseBatch(); } -template<> ydb::msg::ResponseBatch *new_ResponseBatch(stream &s) { return new ydb::msg::ResponseBatch(s); } - MAKE_START_FIN(Response, result) MAKE_START_FIN(ResponseBatch, res) +MAKE_TOP_MSG(ResponseBatch) -struct pb_types { - typedef ydb::pb::TxnBatch TxnBatch; - typedef ydb::pb::Txn Txn; - typedef ydb::pb::Op Op; - typedef ydb::pb::Response Response; - typedef ydb::pb::ResponseBatch ResponseBatch; - static bool is_pb() { return true; } -}; +MAKE_TYPE_BATCH(pb, pb, true) +MAKE_TYPE_BATCH(rb, msg, false) -// rb = raw buffer -struct rb_types { - typedef ydb::msg::TxnBatch TxnBatch; - typedef ydb::msg::Txn Txn; - typedef ydb::msg::Op Op; - typedef ydb::msg::Response Response; - typedef ydb::msg::ResponseBatch ResponseBatch; - static bool is_pb() { return false; } -}; - END_NAMESPACE END_NAMESPACE This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-03 18:50:44
|
Revision: 1245 http://assorted.svn.sourceforge.net/assorted/?rev=1245&view=rev Author: yangzhang Date: 2009-03-03 18:50:38 +0000 (Tue, 03 Mar 2009) Log Message: ----------- - fixed bug: was reading caught_up as int instead of char - fixed bug: was not clearing Response in ResponseBatch::add_res() - fixed bug: was serializing resbatch instead of batch in process_txns - add mark_and_flush() so that the buffer can be cleared out - replaced --wal with --twal and --pwal; reintroduced physical logging Modified Paths: -------------- ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ser.h Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-03 04:44:03 UTC (rev 1244) +++ ydb/trunk/src/main.lzz.clamp 2009-03-03 18:50:38 UTC (rev 1245) @@ -68,7 +68,8 @@ size_t accept_joiner_size, buf_size, read_buf_size; bool yield_during_build_up, yield_during_catch_up, dump, show_updates, count_updates, stop_on_recovery, general_txns, profile_threads, - debug_threads, multirecover, disk, debug_memory, use_wal, use_pb, use_pb_res, + debug_threads, multirecover, disk, debug_memory, use_pwal, use_twal, + use_pb, use_pb_res, suppress_txn_msgs, fake_bcast, force_ser, fake_exec; long long timelim, read_thresh, write_thresh; @@ -531,7 +532,6 @@ void logbuf(const void *buf, size_t len) { of.write(reinterpret_cast<const char*>(buf), len); } -#if 0 void del(int key) { int op = op_del; // TODO: is this really necessary? out & op & key; @@ -544,7 +544,6 @@ int op = op_commit; out & op; } -#endif private: enum { op_del, op_write, op_commit }; ofstream of; @@ -579,7 +578,7 @@ commons::array<char> rbuf(read_buf_size), wbuf(buf_size); reader r(nullptr, rbuf.get(), rbuf.size()); function<void(const void*, size_t)> fn; - if (use_wal) + if (use_twal) fn = bind(&wal::logbuf, g_wal, _1, _2); else fn = lambda(const void *buf, size_t len) { @@ -609,7 +608,9 @@ if (!newreps.empty() && seqno > 0) { start_txn(batch); fin_txn(batch); - w.mark(); + // TODO: verify that this made the catch-up stream more efficient, + // starting it only at the point necessary + w.mark_and_flush(); if (Types::is_pb()) { if (multirecover) bcastmsg(fds, batch); else sendmsg(fds[0], batch); @@ -684,17 +685,17 @@ bool do_bcast = !fds.empty() && !suppress_txn_msgs; if (Types::is_pb()) { // Broadcast/log/serialize. - if (force_ser || do_bcast || use_wal) { + if (force_ser || do_bcast || use_twal) { serbuf.clear(); ser(serbuf, batch); if (do_bcast) bcastbuf(fds, serbuf); - if (use_wal) g_wal->logbuf(serbuf); + if (use_twal) g_wal->logbuf(serbuf); } } else { // Reset if we have nobody to send to (incl. disk) or if we actually have // no txns (possible due to loop structure; want to avoid to avoid // confusing with the 0-txn message signifying "prepare a recovery msg"). - if (!do_bcast && !use_wal) { + if (!do_bcast && !use_twal) { w.reset(); } } @@ -715,8 +716,7 @@ fin_op(txn); fin_txn(batch); if (Types::is_pb()) bcastmsg(fds, batch); - w.mark(); - w.flush(); + w.mark_and_flush(); } } @@ -760,14 +760,14 @@ case Op::write: { int value = op.value(); - //if (use_wal) wal.write(key, value); + if (use_pwal) g_wal->write(key, value); if (it == map.end()) map[key] = value; else it->second = value; break; } case Op::del: if (it != map.end()) { - //if (use_wal) wal.del(key); + if (use_pwal) g_wal->del(key); map.erase(it); } break; @@ -776,7 +776,7 @@ } if (res != nullptr) fin_result(*res); - //if (use_wal) wal.commit(); + if (use_pwal) g_wal->commit(); } void @@ -880,6 +880,15 @@ // issued more since the Init message). int first_seqno = -1; + commons::array<char> rbuf(read_buf_size), wbuf(buf_size); + st_reader reader(leader, rbuf.get(), rbuf.size()); + vector<st_netfd_t> leader_v(1, leader); + writer w(lambda(const void *buf, size_t len) { + checkeqnneg(st_write(__ref(leader), buf, len, ST_UTIME_NO_TIMEOUT), + static_cast<ssize_t>(len)); + }, wbuf.get(), wbuf.size()); + stream s(reader, w); + finally f(lambda () { long long now = current_time_millis(); showtput("processed", now, __ref(start_time), __ref(seqno), @@ -891,17 +900,10 @@ __ref(seqno_caught_up)); } __ref(send_states).push(shared_ptr<Recovery>()); + __ref(w).mark_and_flush(); + st_sleep(1); }); - commons::array<char> rbuf(read_buf_size), wbuf(buf_size); - st_reader reader(leader, rbuf.get(), rbuf.size()); - vector<st_netfd_t> leader_v(1, leader); - writer w(lambda(const void *buf, size_t len) { - checkeqnneg(st_write(__ref(leader), buf, len, ST_UTIME_NO_TIMEOUT), - static_cast<ssize_t>(len)); - }, wbuf.get(), wbuf.size()); - stream s(reader, w); - try { scoped_ptr<TxnBatch> pbatch(new_TxnBatch<TxnBatch>(s)); TxnBatch &batch = *pbatch; @@ -954,7 +956,7 @@ if (first_seqno == -1) first_seqno = txn.seqno(); // Queue up for later processing once a snapshot has been received. - // XXX + // XXX speed up backlog.push(to_pb_Txn(txn)); action = "backlogged"; } @@ -970,7 +972,7 @@ fin_res(resbatch); if (RTypes::is_pb() && resbatch.res_size() > 0) { serbuf.clear(); - ser(serbuf, batch); + ser(serbuf, resbatch); sendbuf(leader, serbuf); } } else if (multirecover || mypos == 0) { @@ -1086,7 +1088,14 @@ // catching up. If it has, then broadcast a signal so that all response // handlers will know about this event. int rseqno = res.seqno(); + if (rseqno <= last_seqno) + throw msg_exception(string("response seqno decreased from ") + + lexical_cast<string>(last_seqno) + " to " + + lexical_cast<string>(rseqno)); bool rcaught_up = res.caught_up(); + for (int r = 0; r < res.result_size(); ++r) { + cout << rseqno << last_seqno << res.result_size() << " " << r << " " << res.result(r) << endl; + } if (!caught_up && rcaught_up) { long long now = current_time_millis(), timediff = now - start_time; caught_up = true; @@ -1476,18 +1485,16 @@ while (!backlog.empty()) { using pb::Txn; shared_ptr<Txn> p = backlog.take(); - if (p->seqno() > seqno) { - process_txn<pb_types, pb_types>(map, *p, seqno, nullptr); - if (check_interval(p->seqno(), catch_up_display)) { - cout << "processed txn " << p->seqno() << " off the backlog; " - << "backlog.size = " << backlog.queue().size() << endl; - } - if (check_interval(p->seqno(), yield_interval)) { - // Explicitly yield. (Note that yielding does still effectively - // happen anyway because process_txn is a yield point.) - st_sleep(0); - } + process_txn<pb_types, pb_types>(map, *p, seqno, nullptr); + if (check_interval(p->seqno(), catch_up_display)) { + cout << "processed txn " << p->seqno() << " off the backlog; " + << "backlog.size = " << backlog.queue().size() << endl; } + if (check_interval(p->seqno(), yield_interval)) { + // Explicitly yield. (Note that yielding does still effectively + // happen anyway because process_txn is a yield point.) + st_sleep(0); + } } showtput("replayer caught up; from backlog replayed", current_time_millis(), mid_time, seqno, mid_seqno); @@ -1611,8 +1618,10 @@ "use protocol buffers instead of raw buffers for txns") ("use-pb-res", po::bool_switch(&use_pb_res), "use protocol buffers instead of raw buffers for responses") - ("wal", po::bool_switch(&use_wal), - "enable ARIES write-ahead logging") + ("twal", po::bool_switch(&use_twal), + "enable transactional write-ahead logging") + ("pwal", po::bool_switch(&use_pwal), + "enable physical write-ahead logging") ("force-ser", po::bool_switch(&force_ser), "force issue_txns to serialize its Txns") ("leader,l", po::bool_switch(&is_leader), Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-03-03 04:44:03 UTC (rev 1244) +++ ydb/trunk/src/ser.h 2009-03-03 18:50:38 UTC (rev 1245) @@ -91,7 +91,7 @@ } public: writer(boost::function<void(void*, size_t)> flushcb, char *a, size_t buf_size) : - a_(a, buf_size), p_(a_.get()), mark_(p_), unsent_(a_.get()), flushcb(flushcb) {} + a_(a, buf_size), p_(a_.get()), mark_(p_), unsent_(p_), flushcb(flushcb) {} sized_array<char> &buf() { return a_; } char *cur() { return p_; } size_t pos() { return p_ - mark_; } @@ -99,6 +99,11 @@ void mark() { mark_ = p_; } void reset() { p_ = mark_; } void reserve(int n) { reserve(n, p_); } + void mark_and_flush() { + mark(); + flush(); + unsent_ = mark_ = p_ = a_.get(); + } void flush() { if (mark_ - unsent_ > 0) { flushcb(unsent_, mark_ - unsent_); @@ -220,7 +225,7 @@ void set_seqno(int x) { w_.write(x); } void set_caught_up(char x) { w_.write(x); } int seqno() const { return r_.read<int>(); } - bool caught_up() const { return r_.read<int>(); } + bool caught_up() const { return r_.read<char>(); } void start_result() { if (nres_ == unset) nres_ = 0; w_.skip<typeof(nres_)>(); } void add_result(int x) { w_.write(x); } void fin_result() { w_.write(nres_, off_ + sizeof(int) + sizeof(char)); } @@ -244,7 +249,7 @@ ResponseBatch(stream &s) : s_(s), r_(s.get_reader()), w_(s.get_writer()), off_(w_.pos()), res_(s), nres_(unset) {} void Clear() { res_.Clear(); nres_ = unset; off_ = w_.pos(); } void start_res() { if (nres_ == unset) nres_ = 0; w_.skip<typeof(nres_)>(); } - Response *add_res() { ++nres_; return &res_; } + Response *add_res() { ++nres_; res_.Clear(); return &res_; } void fin_res() { w_.write(nres_, off_); } int res_size() const { if (nres_ == unset) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-04 06:55:47
|
Revision: 1246 http://assorted.svn.sourceforge.net/assorted/?rev=1246&view=rev Author: yangzhang Date: 2009-03-04 06:55:42 +0000 (Wed, 04 Mar 2009) Log Message: ----------- - added byte length prefixes to marked segments in writer - updated ser Modified Paths: -------------- ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ser.cc ydb/trunk/src/ser.h Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-03 18:50:38 UTC (rev 1245) +++ ydb/trunk/src/main.lzz.clamp 2009-03-04 06:55:42 UTC (rev 1246) @@ -911,6 +911,7 @@ ResponseBatch &resbatch = *presbatch; ser_t serbuf; while (true) { + uint32_t prefix = 0; long long before_read = -1; if (read_thresh > 0) { before_read = current_time_millis(); @@ -918,7 +919,7 @@ { st_intr intr(stop_hub); if (Types::is_pb()) readmsg(reader, batch); - else batch.Clear(); + else { prefix = reader.read<uint32_t>(); batch.Clear(); } } if (read_thresh > 0) { long long read_time = current_time_millis() - before_read; @@ -931,7 +932,17 @@ w.mark(); resbatch.Clear(); start_res(resbatch); + // XXX + //char *start = reader.start(); + //const Txn &first_txn = batch.txn(0); + //if (txn.seqno() < 0) { + //} else if (txn.seqno() == seqno + 1) { + //} else { + // // Skip entire message. + // reader. + //} for (int t = 0; t < batch.txn_size(); ++t) { + // XXX const Txn &txn = t == 0 ? first_txn : batch.txn(t); const Txn &txn = batch.txn(t); // Regular transaction. const char *action; @@ -955,8 +966,11 @@ } else { if (first_seqno == -1) first_seqno = txn.seqno(); - // Queue up for later processing once a snapshot has been received. - // XXX speed up + // Queue up entire buffer for later processing once a snapshot has + // been received. + // XXX backlog.push(array()); + // Stop the loop. + // XXX t = batch.txn_size(); backlog.push(to_pb_Txn(txn)); action = "backlogged"; } @@ -1053,6 +1067,7 @@ while (true) { finally f(loop_cleanup); + uint32_t prefix = 0; // Read the message, but correctly respond to interrupts so that we can // cleanly exit (slightly tricky). @@ -1061,7 +1076,7 @@ try { st_intr intr(stop_hub); if (Types::is_pb()) readmsg(reader, batch); - else batch.Clear(); + else { prefix = reader.read<uint32_t>(); batch.Clear(); } } catch (...) { // TODO: only catch interruptions // This check on seqnos is OK for termination since the seqno will // never grow again if stop_hub is set. @@ -1079,7 +1094,7 @@ // to get all the acks back). st_intr intr(kill_hub); if (Types::is_pb()) readmsg(reader, batch); - else batch.Clear(); + else { prefix = reader.read<uint32_t>(); batch.Clear(); } } for (int i = 0; i < batch.res_size(); ++i) { Modified: ydb/trunk/src/ser.cc =================================================================== --- ydb/trunk/src/ser.cc 2009-03-03 18:50:38 UTC (rev 1245) +++ ydb/trunk/src/ser.cc 2009-03-04 06:55:42 UTC (rev 1246) @@ -42,8 +42,9 @@ typedef typename types::Op Op; vector<st_netfd_t> dsts(1, dst); outstream os(dsts); - writer w(os, 90); - reader r(dst); + char *buf = new char[90]; + writer w(os, buf, 90); + reader r(dst, buf, 90); stream s(r,w); string str; const bool show = true; @@ -51,6 +52,7 @@ TxnBatch &batch = *p; for (int i = 0; i < nreps; ++i) { w.mark(); + w.show(); batch.Clear(); start_txn(batch); for (int t = 0; t < 2; ++t) { @@ -69,12 +71,13 @@ if (show) cout << w.pos() << '/' << w.size() << endl; if (types::is_pb()) push(batch, str, os); } + w.mark(); + w.show(); batch.Clear(); start_txn(batch); fin_txn(batch); - w.mark(); + w.mark_and_flush(); w.show(); - w.flush(); if (types::is_pb()) push(batch, str, os); } @@ -86,22 +89,26 @@ typedef typename types::Op Op; vector<st_netfd_t> v; outstream os(v); - writer w(os, 90); - reader r(src); + char *buf = new char[90]; + writer w(os, buf, 90); + reader r(src, buf, 90); stream s(r,w); string str; // XXX const bool show = true; scoped_ptr<TxnBatch> p(new_TxnBatch<TxnBatch>(s)); TxnBatch &batch = *p; while (true) { + uint32_t len; if (types::is_pb()) { - uint32_t len = r.read<uint32_t>(); + len = r.read<uint32_t>(); managed_array<char> a = r.read(len); check(batch.ParseFromArray(a.get(), len)); } else { + len = r.read<uint32_t>(); batch.Clear(); } - if (show) cout << "ntxn " << batch.txn_size() << endl; + if (show) w.show(); + if (show) cout << "len " << len << " ntxn " << batch.txn_size() << endl; if (batch.txn_size() == 0) break; for (int t = 0; t < batch.txn_size(); ++t) { const Txn &txn = batch.txn(t); Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-03-03 18:50:38 UTC (rev 1245) +++ ydb/trunk/src/ser.h 2009-03-04 06:55:42 UTC (rev 1246) @@ -69,45 +69,56 @@ NONCOPYABLE(writer) private: sized_array<char> a_; + char *unsent_; + char *mark_; char *p_; - char *mark_; - char *unsent_; boost::function<void(void*, size_t)> flushcb; char *reserve(int n, char *p) { if (p + n > a_.end()) { - assert(size_t(p - mark_ + n) <= a_.size()); + // check that the reserved space will fit + assert(size_t(p - mark_ + n + sizeof(uint32_t)) <= a_.size()); + // get rid of what we have flush(); - size_t diff = mark_ - a_.get(); - memmove(a_.get(), mark_, p_ - mark_); - unsent_ = mark_ = a_.get(); + size_t diff = mark_ - (a_.get() + sizeof(uint32_t)); + memmove(a_.get() + sizeof(uint32_t), mark_, p_ - mark_); + mark_ = (unsent_ = a_.get()) + sizeof(uint32_t); p_ -= diff; p -= diff; } return p; } + char *prefix() { return mark_ - sizeof(uint32_t); } template<typename T> void write_(T x, char *p) { *reinterpret_cast<T*>(reserve(sizeof x, p)) = x; } public: writer(boost::function<void(void*, size_t)> flushcb, char *a, size_t buf_size) : - a_(a, buf_size), p_(a_.get()), mark_(p_), unsent_(p_), flushcb(flushcb) {} + a_(a, buf_size), unsent_(a_.get()), mark_(unsent_ + sizeof(uint32_t)), + p_(mark_), flushcb(flushcb) {} sized_array<char> &buf() { return a_; } char *cur() { return p_; } size_t pos() { return p_ - mark_; } size_t size() { return a_.size(); } - void mark() { mark_ = p_; } + void mark() { + if (p_ > mark_) { + // prefix last segment with its length + *reinterpret_cast<uint32_t*>(prefix()) = uint32_t(p_ - mark_); + // start new segment + mark_ = (p_ += sizeof(uint32_t)); + } + } void reset() { p_ = mark_; } void reserve(int n) { reserve(n, p_); } void mark_and_flush() { mark(); flush(); - unsent_ = mark_ = p_ = a_.get(); + mark_ = p_ = (unsent_ = a_.get()) + sizeof(uint32_t); } void flush() { - if (mark_ - unsent_ > 0) { - flushcb(unsent_, mark_ - unsent_); - unsent_ = mark_; + if (prefix() > unsent_) { + flushcb(unsent_, prefix() - unsent_); + unsent_ = prefix(); } } void show() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-06 02:32:24
|
Revision: 1256 http://assorted.svn.sourceforge.net/assorted/?rev=1256&view=rev Author: yangzhang Date: 2009-03-06 02:32:19 +0000 (Fri, 06 Mar 2009) Log Message: ----------- pb_types -> pb_traits, rb_types -> rb_traits Modified Paths: -------------- ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ser.cc ydb/trunk/src/ser.h Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-05 23:16:46 UTC (rev 1255) +++ ydb/trunk/src/main.lzz.clamp 2009-03-06 02:32:19 UTC (rev 1256) @@ -652,7 +652,7 @@ --seqno; r.reset_range(txn_start, w.cur()); if (!Types::is_pb()) txn.Clear(); - process_txn<Types, pb_types>(g_map, txn, seqno, nullptr); + process_txn<Types, pb_traits>(g_map, txn, seqno, nullptr); } // Checkpoint. @@ -1571,7 +1571,7 @@ batch.Clear(); for (int t = 0; t < batch.txn_size(); ++t) { const Txn &txn = batch.txn(t); - process_txn<rb_types, rb_types>(map, txn, seqno, nullptr); + process_txn<rb_traits, rb_traits>(map, txn, seqno, nullptr); if (fake_exec && !Types::is_pb()) { reader.skip(txn.op_size() * Op_Size); } @@ -1592,7 +1592,7 @@ while (!backlog.empty()) { using pb::Txn; shared_ptr<Txn> p = backlog.take(); - process_txn<pb_types, pb_types>(map, *p, seqno, nullptr); + process_txn<pb_traits, pb_traits>(map, *p, seqno, nullptr); if (check_interval(p->seqno(), catch_up_display)) { cout << "processed txn " << p->seqno() << " off the backlog; " << "backlog.size = " << backlog.queue().size() << endl; @@ -1885,29 +1885,29 @@ if (is_leader) { if (use_pb) { if (use_pb_res) { - run_leader<pb_types, pb_types>(minreps, leader_port); + run_leader<pb_traits, pb_traits>(minreps, leader_port); } else { - run_leader<pb_types, rb_types>(minreps, leader_port); + run_leader<pb_traits, rb_traits>(minreps, leader_port); } } else { if (use_pb_res) { - run_leader<rb_types, pb_types>(minreps, leader_port); + run_leader<rb_traits, pb_traits>(minreps, leader_port); } else { - run_leader<rb_types, rb_types>(minreps, leader_port); + run_leader<rb_traits, rb_traits>(minreps, leader_port); } } } else { if (use_pb) { if (use_pb_res) { - run_replica<pb_types, pb_types>(leader_host, leader_port, listen_port); + run_replica<pb_traits, pb_traits>(leader_host, leader_port, listen_port); } else { - run_replica<pb_types, rb_types>(leader_host, leader_port, listen_port); + run_replica<pb_traits, rb_traits>(leader_host, leader_port, listen_port); } } else { if (use_pb_res) { - run_replica<rb_types, pb_types>(leader_host, leader_port, listen_port); + run_replica<rb_traits, pb_traits>(leader_host, leader_port, listen_port); } else { - run_replica<rb_types, rb_types>(leader_host, leader_port, listen_port); + run_replica<rb_traits, rb_traits>(leader_host, leader_port, listen_port); } } } Modified: ydb/trunk/src/ser.cc =================================================================== --- ydb/trunk/src/ser.cc 2009-03-05 23:16:46 UTC (rev 1255) +++ ydb/trunk/src/ser.cc 2009-03-06 02:32:19 UTC (rev 1256) @@ -138,15 +138,15 @@ st_netfd_t dst = checkerr(st_accept(listener, nullptr, nullptr, ST_UTIME_NO_TIMEOUT)); if (use_pb) - producer<pb_types>(dst); + producer<pb_traits>(dst); else - producer<rb_types>(dst); + producer<rb_traits>(dst); } else { st_netfd_t src = st_tcp_connect(argv[use_pb ? 2 : 1], 7654, ST_UTIME_NO_TIMEOUT); if (use_pb) - consumer<pb_types>(src); + consumer<pb_traits>(src); else - consumer<rb_types>(src); + consumer<rb_traits>(src); } return 0; } Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-03-05 23:16:46 UTC (rev 1255) +++ ydb/trunk/src/ser.h 2009-03-06 02:32:19 UTC (rev 1256) @@ -43,7 +43,7 @@ int ByteSize() const { throw_operation_not_supported(); } \ #define MAKE_TYPE_BATCH(name, ns, b) \ - struct name##_types { \ + struct name##_traits { \ typedef ydb::ns::TxnBatch TxnBatch; \ typedef ydb::ns::Txn Txn; \ typedef ydb::ns::Op Op; \ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-12 19:35:07
|
Revision: 1285 http://assorted.svn.sourceforge.net/assorted/?rev=1285&view=rev Author: yangzhang Date: 2009-03-12 19:34:50 +0000 (Thu, 12 Mar 2009) Log Message: ----------- added tpcc Added Paths: ----------- ydb/trunk/src/tpcc/ ydb/trunk/src/tpcc/Makefile ydb/trunk/src/tpcc/assert.h ydb/trunk/src/tpcc/btree.h ydb/trunk/src/tpcc/clock.cc ydb/trunk/src/tpcc/clock.h ydb/trunk/src/tpcc/randomgenerator.cc ydb/trunk/src/tpcc/randomgenerator.h ydb/trunk/src/tpcc/stlutil.h ydb/trunk/src/tpcc/tpcc.cc ydb/trunk/src/tpcc/tpccclient.cc ydb/trunk/src/tpcc/tpccclient.h ydb/trunk/src/tpcc/tpccdb.cc ydb/trunk/src/tpcc/tpccdb.h ydb/trunk/src/tpcc/tpccgenerator.cc ydb/trunk/src/tpcc/tpccgenerator.h ydb/trunk/src/tpcc/tpcctables.cc ydb/trunk/src/tpcc/tpcctables.h Added: ydb/trunk/src/tpcc/Makefile =================================================================== --- ydb/trunk/src/tpcc/Makefile (rev 0) +++ ydb/trunk/src/tpcc/Makefile 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,25 @@ +WARNINGS = -Werror -Wall -Wextra -Wconversion -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare -Wno-unused-parameter + +# Debug flags +CXXFLAGS = -g -MD $(WARNINGS) -I.. +# Optimization flags +#CXXFLAGS = -g -O3 -DNDEBUG -MD $(WARNINGS) + +# Link withthe C++ standard library +LDFLAGS=-lstdc++ + +BINARIES = tpcc # btree_test randomgenerator_test tpccclient_test tpcctables_test tpccgenerator_test tpcc + +all: $(BINARIES) + +#btree_test: btree_test.o stupidunit.o +#randomgenerator_test: randomgenerator_test.o randomgenerator.o stupidunit.o +#tpccclient_test: tpccclient_test.o tpccclient.o randomgenerator.o stupidunit.o +#tpcctables_test: tpcctables_test.o tpcctables.o tpccdb.o randomgenerator.o stupidunit.o +#tpccgenerator_test: tpccgenerator_test.o tpccgenerator.o tpcctables.o tpccdb.o randomgenerator.o stupidunit.o +tpcc: tpcc.o tpccclient.o tpccgenerator.o tpcctables.o tpccdb.o clock.o randomgenerator.o + +clean : + rm -f *.o *.d $(BINARIES) + +-include *.d Added: ydb/trunk/src/tpcc/assert.h =================================================================== --- ydb/trunk/src/tpcc/assert.h (rev 0) +++ ydb/trunk/src/tpcc/assert.h 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,16 @@ +#ifndef ASSERT_H__ +#define ASSERT_H__ + +#include <cassert> + +// Wraps the standard assert macro to avoids "unused variable" warnings when compiled away. +// Inspired by: http://powerof2games.com/node/10 +// This is not the "default" because it does not conform to the requirements of the C standard, +// which requires that the NDEBUG version be ((void) 0). +#ifdef NDEBUG +#define ASSERT(x) do { (void)sizeof(x); } while(0) +#else +#define ASSERT(x) assert(x) +#endif + +#endif Added: ydb/trunk/src/tpcc/btree.h =================================================================== --- ydb/trunk/src/tpcc/btree.h (rev 0) +++ ydb/trunk/src/tpcc/btree.h 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,511 @@ +#if !defined BPLUSTREE_HPP_227824 +#define BPLUSTREE_HPP_227824 + +// This is required for glibc to define std::posix_memalign +#if !defined(_XOPEN_SOURCE) || (_XOPEN_SOURCE < 600) +#define _XOPEN_SOURCE 600 +#endif + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include <boost/static_assert.hpp> +#include <boost/pool/object_pool.hpp> + +// DEBUG +#include <iostream> +using std::cout; +using std::endl; + +#ifdef __linux__ +#define HAVE_POSIX_MEMALIGN +#endif + +#ifdef HAVE_POSIX_MEMALIGN +// Nothing to do +#else +// TODO: This is not aligned! It doesn't matter for this implementation, but it could +static inline int posix_memalign(void** result, int, size_t bytes) { + *result = malloc(bytes); + return *result == NULL; +} +#endif + +template <typename KEY, typename VALUE, unsigned N, unsigned M, + unsigned INNER_NODE_PADDING= 0, unsigned LEAF_NODE_PADDING= 0, + unsigned NODE_ALIGNMENT= 64> +class BPlusTree +{ +public: + // N must be greater than two to make the split of + // two inner nodes sensible. + BOOST_STATIC_ASSERT(N>2); + // Leaf nodes must be able to hold at least one element + BOOST_STATIC_ASSERT(M>0); + + // Builds a new empty tree. + BPlusTree() + : depth(0), + root(new_leaf_node()) + { + // DEBUG + // cout << "sizeof(LeafNode)==" << sizeof(LeafNode) << endl; + // cout << "sizeof(InnerNode)==" << sizeof(InnerNode) << endl; + } + + ~BPlusTree() { + // Empty. Memory deallocation is done automatically + // when innerPool and leafPool are destroyed. + } + + // Inserts a pair (key, value). If there is a previous pair with + // the same key, the old value is overwritten with the new one. + void insert(KEY key, VALUE value) { + // GCC warns that this may be used uninitialized, even though that is untrue. + InsertionResult result = { KEY(), 0, 0 }; + bool was_split; + if( depth == 0 ) { + // The root is a leaf node + assert( *reinterpret_cast<NodeType*>(root) == + NODE_LEAF); + was_split= leaf_insert(reinterpret_cast<LeafNode*> + (root), key, value, &result); + } else { + // The root is an inner node + assert( *reinterpret_cast<NodeType*> + (root) == NODE_INNER ); + was_split= inner_insert(reinterpret_cast<InnerNode*> + (root), depth, key, value, &result); + } + if( was_split ) { + // The old root was splitted in two parts. + // We have to create a new root pointing to them + depth++; + root= new_inner_node(); + InnerNode* rootProxy= + reinterpret_cast<InnerNode*>(root); + rootProxy->num_keys= 1; + rootProxy->keys[0]= result.key; + rootProxy->children[0]= result.left; + rootProxy->children[1]= result.right; + } + } + +// Looks for the given key. If it is not found, it returns false, +// if it is found, it returns true and copies the associated value +// unless the pointer is null. +bool find(const KEY& key, VALUE* value= 0) const { + const InnerNode* inner; + register const void* node= root; + register unsigned d= depth, index; + while( d-- != 0 ) { + inner= reinterpret_cast<const InnerNode*>(node); + assert( inner->type == NODE_INNER ); + index= inner_position_for(key, inner->keys, inner->num_keys); + node= inner->children[index]; + } + const LeafNode* leaf= reinterpret_cast<const LeafNode*>(node); + assert( leaf->type == NODE_LEAF ); + index= leaf_position_for(key, leaf->keys, leaf->num_keys); + if( leaf->keys[index] == key ) { + if( value != 0 ) { + *value= leaf->values[index]; + } + if (leaf->values[index]) + return true; + else return false; + } else { + return false; + } +} + + +// Looks for the given key. If it is not found, it returns false, +// if it is found, it returns true and sets +// the associated value to NULL +// Note: del currently leaks memory. Fix later. +bool del(const KEY& key) { + InnerNode* inner; + register void* node= root; + register unsigned d= depth, index; + while( d-- != 0 ) { + inner= reinterpret_cast<InnerNode*>(node); + assert( inner->type == NODE_INNER ); + index= inner_position_for(key, inner->keys, inner->num_keys); + node= inner->children[index]; + } + LeafNode* leaf= reinterpret_cast<LeafNode*>(node); + assert( leaf->type == NODE_LEAF ); + index= leaf_position_for(key, leaf->keys, leaf->num_keys); + if( leaf->keys[index] == key ) { + leaf->values[index] = 0; + return true; + } else { + return false; + } +} + +// Finds the LAST item that is < key. That is, the next item in the tree is not < key, but this +// item is. If we were to insert key into the tree, it would go after this item. This is weird, +// but is easier than implementing iterators. In STL terms, this would be "lower_bound(key)--" +// WARNING: This does *not* work when values are deleted. Thankfully, TPC-C does not use deletes. +bool findLastLessThan(const KEY& key, VALUE* value = 0, KEY* out_key = 0) const { + const void* node = root; + unsigned int d = depth; + while( d-- != 0 ) { + const InnerNode* inner = reinterpret_cast<const InnerNode*>(node); + assert( inner->type == NODE_INNER ); + unsigned int pos = inner_position_for(key, inner->keys, inner->num_keys); + // We need to rewind in the case where they are equal + if (pos > 0 && key == inner->keys[pos-1]) { + pos -= 1; + } + assert(pos == 0 || inner->keys[pos-1] < key); + node = inner->children[pos]; + } + const LeafNode* leaf= reinterpret_cast<const LeafNode*>(node); + assert( leaf->type == NODE_LEAF ); + unsigned int pos = leaf_position_for(key, leaf->keys, leaf->num_keys); + if (pos <= leaf->num_keys) { + pos -= 1; + if (pos < leaf->num_keys && key == leaf->keys[pos]) { + pos -= 1; + } + + if (pos < leaf->num_keys) { + assert(leaf->keys[pos] < key); + if (leaf->values[pos]) { + if (value != NULL) { + *value = leaf->values[pos]; + } + if (out_key != NULL) { + *out_key = leaf->keys[pos]; + } + return true; + } + } + } + + return false; +} + + // Returns the size of an inner node + // It is useful when optimizing performance with cache alignment. + unsigned sizeof_inner_node() const { + return sizeof(InnerNode); + } + + // Returns the size of a leaf node. + // It is useful when optimizing performance with cache alignment. + unsigned sizeof_leaf_node() const { + return sizeof(LeafNode); + } + + +private: + // Used when debugging + enum NodeType {NODE_INNER=0xDEADBEEF, NODE_LEAF=0xC0FFEE}; + + // Leaf nodes store pairs of keys and values. + struct LeafNode { +#ifndef NDEBUG + LeafNode() : type(NODE_LEAF), num_keys(0) {memset(keys,0,sizeof(KEY)*M);} + const NodeType type; +#else + LeafNode() : num_keys(0) {memset(keys,0,sizeof(KEY)*M);} +#endif + unsigned num_keys; + KEY keys[M]; + VALUE values[M]; + // unsigned char _pad[LEAF_NODE_PADDING]; + }; + + // Inner nodes store pointers to other nodes interleaved with keys. + struct InnerNode { +#ifndef NDEBUG + InnerNode() : type(NODE_INNER), num_keys(0) {memset(keys,0,sizeof(KEY)*M);} + const NodeType type; +#else + InnerNode() : num_keys(0) {memset(keys,0,sizeof(KEY)*M);} +#endif + unsigned num_keys; + KEY keys[N]; + void* children[N+1]; + // unsigned char _pad[INNER_NODE_PADDING]; + }; + + // Custom allocator that returns aligned blocks of memory + template <unsigned ALIGNMENT> + struct AlignedMemoryAllocator { + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + static char* malloc(const size_type bytes) + { + void* result; + if( posix_memalign(&result, ALIGNMENT, bytes) != 0 ) { + result= 0; + } + // Alternative: result= std::malloc(bytes); + return reinterpret_cast<char*>(result); + } + static void free(char* const block) + { std::free(block); } + }; + + // Returns a pointer to a fresh leaf node. + LeafNode* new_leaf_node() { + LeafNode* result; + //result= new LeafNode(); + result= leafPool.construct(); + //cout << "New LeafNode at " << result << endl; + return result; + } + + // Frees a leaf node previously allocated with new_leaf_node() + void delete_leaf_node(LeafNode* node) { + assert( node->type == NODE_LEAF ); + //cout << "Deleting LeafNode at " << node << endl; + // Alternatively: delete node; + leafPool.destroy(node); + } + + // Returns a pointer to a fresh inner node. + InnerNode* new_inner_node() { + InnerNode* result; + // Alternatively: result= new InnerNode(); + result= innerPool.construct(); + //cout << "New InnerNode at " << result << endl; + return result; + } + + // Frees an inner node previously allocated with new_inner_node() + void delete_inner_node(InnerNode* node) { + assert( node->type == NODE_INNER ); + //cout << "Deleting InnerNode at " << node << endl; + // Alternatively: delete node; + innerPool.destroy(node); + } + + // Data type returned by the private insertion methods. + struct InsertionResult { + KEY key; + void* left; + void* right; + }; + + // Returns the position where 'key' should be inserted in a leaf node + // that has the given keys. + static unsigned leaf_position_for(const KEY& key, const KEY* keys, + unsigned num_keys) { + // Simple linear search. Faster for small values of N or M + unsigned k= 0; + while((k < num_keys) && (keys[k]<key)) { + ++k; + } + return k; + /* + // Binary search. It is faster when N or M is > 100, + // but the cost of the division renders it useless + // for smaller values of N or M. + XXX--- needs to be re-checked because the linear search + has changed since the last update to the following ---XXX + int left= -1, right= num_keys, middle; + while( right -left > 1 ) { + middle= (left+right)/2; + if( keys[middle] < key) { + left= middle; + } else { + right= middle; + } + } + //assert( right == k ); + return unsigned(right); + */ + } + + // Returns the position where 'key' should be inserted in an inner node + // that has the given keys. + static inline unsigned inner_position_for(const KEY& key, const KEY* keys, + unsigned num_keys) { + // Simple linear search. Faster for small values of N or M + unsigned k= 0; + while((k < num_keys) && ((keys[k]<key) || (keys[k]==key))) { + ++k; + } + return k; + // Binary search is faster when N or M is > 100, + // but the cost of the division renders it useless + // for smaller values of N or M. + } + + bool leaf_insert(LeafNode* node, KEY& key, + VALUE& value, InsertionResult* result) { + assert( node->type == NODE_LEAF ); + assert( node->num_keys <= M ); + bool was_split= false; + // Simple linear search + unsigned i= leaf_position_for(key, node->keys, node->num_keys); + if( node->num_keys == M ) { + // The node was full. We must split it + unsigned treshold= (M+1)/2; + LeafNode* new_sibling= new_leaf_node(); + new_sibling->num_keys= node->num_keys -treshold; + for(unsigned j=0; j < new_sibling->num_keys; ++j) { + new_sibling->keys[j]= node->keys[treshold+j]; + new_sibling->values[j]= + node->values[treshold+j]; + } + node->num_keys= treshold; + if( i < treshold ) { + // Inserted element goes to left sibling + leaf_insert_nonfull(node, key, value, i); + } else { + // Inserted element goes to right sibling + leaf_insert_nonfull(new_sibling, key, value, + i-treshold); + } + // Notify the parent about the split + was_split= true; + result->key= new_sibling->keys[0]; + result->left= node; + result->right= new_sibling; + } else { + // The node was not full + leaf_insert_nonfull(node, key, value, i); + } + return was_split; + } + + static void leaf_insert_nonfull(LeafNode* node, KEY& key, VALUE& value, + unsigned index) { + assert( node->type == NODE_LEAF ); + assert( node->num_keys < M ); + assert( index <= M ); + assert( index <= node->num_keys ); + if( (index < M) && + (node->keys[index] == key) ) { + // We are inserting a duplicate value. + // Simply overwrite the old one + node->values[index]= value; + } else { + // The key we are inserting is unique + for(unsigned i=node->num_keys; i > index; --i) { + node->keys[i]= node->keys[i-1]; + node->values[i]= node->values[i-1]; + } + node->num_keys++; + node->keys[index]= key; + node->values[index]= value; + } + } + + bool inner_insert(InnerNode* node, unsigned current_depth, KEY& key, + VALUE& value, InsertionResult* result) { + assert( node->type == NODE_INNER ); + assert( current_depth != 0 ); + // Early split if node is full. + // This is not the canonical algorithm for B+ trees, + // but it is simpler and does not break the definition. + bool was_split= false; + if( node->num_keys == N ) { + // Split + unsigned treshold= (N+1)/2; + InnerNode* new_sibling= new_inner_node(); + new_sibling->num_keys= node->num_keys -treshold; + for(unsigned i=0; i < new_sibling->num_keys; ++i) { + new_sibling->keys[i]= node->keys[treshold+i]; + new_sibling->children[i]= + node->children[treshold+i]; + } + new_sibling->children[new_sibling->num_keys]= + node->children[node->num_keys]; + node->num_keys= treshold-1; + // Set up the return variable + was_split= true; + result->key= node->keys[treshold-1]; + result->left= node; + result->right= new_sibling; + // Now insert in the appropriate sibling + if( key < result->key ) { + inner_insert_nonfull(node, current_depth, key, value); + } else { + inner_insert_nonfull(new_sibling, current_depth, key, + value); + } + } else { + // No split + inner_insert_nonfull(node, current_depth, key, value); + } + return was_split; + } + + void inner_insert_nonfull(InnerNode* node, unsigned current_depth, KEY& key, + VALUE& value) { + assert( node->type == NODE_INNER ); + assert( node->num_keys < N ); + assert( current_depth != 0 ); + // Simple linear search + unsigned index= inner_position_for(key, node->keys, + node->num_keys); + // GCC warns that this may be used uninitialized, even though that is untrue. + InsertionResult result = { KEY(), 0, 0 }; + bool was_split; + if( current_depth-1 == 0 ) { + // The children are leaf nodes + for(unsigned kk=0; kk < node->num_keys+1; ++kk) { + assert( *reinterpret_cast<NodeType*> + (node->children[kk]) == NODE_LEAF ); + } + was_split= leaf_insert(reinterpret_cast<LeafNode*> + (node->children[index]), key, value, &result); + } else { + // The children are inner nodes + for(unsigned kk=0; kk < node->num_keys+1; ++kk) { + assert( *reinterpret_cast<NodeType*> + (node->children[kk]) == NODE_INNER ); + } + InnerNode* child= reinterpret_cast<InnerNode*> + (node->children[index]); + was_split= inner_insert( child, current_depth-1, key, value, + &result); + } + if( was_split ) { + if( index == node->num_keys ) { + // Insertion at the rightmost key + node->keys[index]= result.key; + node->children[index]= result.left; + node->children[index+1]= result.right; + node->num_keys++; + } else { + // Insertion not at the rightmost key + node->children[node->num_keys+1]= + node->children[node->num_keys]; + for(unsigned i=node->num_keys; i!=index; --i) { + node->children[i]= node->children[i-1]; + node->keys[i]= node->keys[i-1]; + } + node->children[index]= result.left; + node->children[index+1]= result.right; + node->keys[index]= result.key; + node->num_keys++; + } + } // else the current node is not affected + } + + typedef AlignedMemoryAllocator<NODE_ALIGNMENT> AlignedAllocator; + + // Node memory allocators. IMPORTANT NOTE: they must be declared + // before the root to make sure that they are properly initialised + // before being used to allocate any node. + boost::object_pool<InnerNode, AlignedAllocator> innerPool; + boost::object_pool<LeafNode, AlignedAllocator> leafPool; + // Depth of the tree. A tree of depth 0 only has a leaf node. + unsigned depth; + // Pointer to the root node. It may be a leaf or an inner node, but + // it is never null. + void* root; +}; + +#endif // !defined BPLUSTREE_HPP_227824 Added: ydb/trunk/src/tpcc/clock.cc =================================================================== --- ydb/trunk/src/tpcc/clock.cc (rev 0) +++ ydb/trunk/src/tpcc/clock.cc 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,58 @@ +#include "clock.h" + +#include <sys/time.h> + +#include <cstdio> +#include <ctime> + +#include "assert.h" + +// Fills output with the base-10 ASCII representation of value, using digits digits. +static char* makeInt(char* output, int value, int digits) { + char* last = output + digits; + char* next = last; + for (int i = 0; i < digits; ++i) { + int digit = value % 10; + value = value / 10; + next -= 1; + *next = static_cast<char>('0' + digit); + } + assert(value == 0); + return last; +} + +void SystemClock::getDateTimestamp(char* now) { + // Get the system time. Convert it to local time + time_t seconds_since_epoch = time(NULL); + assert(seconds_since_epoch != -1); + + struct tm local_calendar; + struct tm* result = localtime_r(&seconds_since_epoch, &local_calendar); + ASSERT(result == &local_calendar); + + // Format the time + // strftime is slow: it ends up consulting timezone info + // snprintf is also slow, since it needs to parse the input string. This is significantly + // faster, saving ~10% of the run time. + //~ int bytes = snprintf(now, DATETIME_SIZE+1, "%04d%02d%02d%02d%02d%02d", + //~ local_calendar.tm_year+1900, local_calendar.tm_mon+1, local_calendar.tm_mday, + //~ local_calendar.tm_hour, local_calendar.tm_min, local_calendar.tm_sec); + //~ int bytes = strftime(now, DATETIME_SIZE+1, "%Y%m%d%H%M%S", &broken_down_local_time); + char* next = makeInt(now, local_calendar.tm_year+1900, 4); + next = makeInt(next, local_calendar.tm_mon+1, 2); + next = makeInt(next, local_calendar.tm_mday, 2); + next = makeInt(next, local_calendar.tm_hour, 2); + next = makeInt(next, local_calendar.tm_min, 2); + next = makeInt(next, local_calendar.tm_sec, 2); + *next = '\0'; + assert(next == now + DATETIME_SIZE); +} + +int64_t SystemClock::getMicroseconds() { + struct timeval time; + int error = gettimeofday(&time, NULL); + ASSERT(error == 0); + int64_t result = time.tv_sec * 1000000; + result += time.tv_usec; + return result; +} Added: ydb/trunk/src/tpcc/clock.h =================================================================== --- ydb/trunk/src/tpcc/clock.h (rev 0) +++ ydb/trunk/src/tpcc/clock.h 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,28 @@ +#ifndef CLOCK_H__ +#define CLOCK_H__ + +//~ #include <cstdint> +#include <stdint.h> + +// Interface to the real time system clock. +class Clock { +public: + virtual ~Clock() {} + + static const int DATETIME_SIZE = 14; + + // now must have at least DATETIME_SIZE+1 bytes. + virtual void getDateTimestamp(char* now) = 0; + + // Returns the number of microseconds since the epoch. + virtual int64_t getMicroseconds() = 0; +}; + +// Uses gettimeofday. +class SystemClock : public Clock { +public: + virtual void getDateTimestamp(char* now); + virtual int64_t getMicroseconds(); +}; + +#endif Added: ydb/trunk/src/tpcc/randomgenerator.cc =================================================================== --- ydb/trunk/src/tpcc/randomgenerator.cc (rev 0) +++ ydb/trunk/src/tpcc/randomgenerator.cc 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,178 @@ +#include "randomgenerator.h" + +#include <algorithm> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <ctime> + +#include "assert.h" + +NURandC NURandC::makeRandom(RandomGenerator* generator) { + NURandC c; + c.c_last_ = generator->number(0, 255); + c.c_id_ = generator->number(0, 1023); + c.ol_i_id_ = generator->number(0, 8191); + return c; +} + +// Returns true if the C-Run value is valid. See TPC-C 2.1.6.1 (page 20). +static bool validCRun(int cRun, int cLoad) { + int cDelta = abs(cRun - cLoad); + return 65 <= cDelta && cDelta <= 119 && cDelta != 96 && cDelta != 112; +} + +NURandC NURandC::makeRandomForRun(RandomGenerator* generator, const NURandC& c_load) { + NURandC c = makeRandom(generator); + + while (!validCRun(c.c_last_, c_load.c_last_)) { + c.c_last_ = generator->number(0, 255); + } + ASSERT(validCRun(c.c_last_, c_load.c_last_)); + + return c; +} + +int RandomGenerator::numberExcluding(int lower, int upper, int excluding) { + ASSERT(lower < upper); + ASSERT(lower <= excluding && excluding <= upper); + + // Generate 1 less number than the range + int num = number(lower, upper-1); + + // Adjust the numbers to remove excluding + if (num >= excluding) { + num += 1; + } + ASSERT(lower <= num && num <= upper && num != excluding); + return num; +} + +static void generateString(RandomGenerator* generator, char* s, int lower_length, int upper_length, + char base_character, int num_characters) { + int length = generator->number(lower_length, upper_length); + for (int i = 0; i < length; ++i) { + s[i] = static_cast<char>(base_character + generator->number(0, num_characters-1)); + } + s[length] = '\0'; +} + +void RandomGenerator::astring(char* s, int lower_length, int upper_length) { + generateString(this, s, lower_length, upper_length, 'a', 26); +} + +void RandomGenerator::nstring(char* s, int lower_length, int upper_length) { + generateString(this, s, lower_length, upper_length, '0', 10); +} + +void RandomGenerator::lastName(char* c_last, int max_cid) { + makeLastName(NURand(255, 0, std::min(999, max_cid-1)), c_last); +} + +float RandomGenerator::fixedPoint(int digits, float lower, float upper) { + int multiplier = 1; + for (int i = 0; i < digits; ++i) { + multiplier *= 10; + } + + int int_lower = static_cast<int>(lower * static_cast<double>(multiplier) + 0.5); + int int_upper = static_cast<int>(upper * static_cast<double>(multiplier) + 0.5); + return (float) number(int_lower, int_upper) / (float) multiplier; +} + +int RandomGenerator::NURand(int A, int x, int y) { + int C = 0; + switch(A) { + case 255: + C = c_values_.c_last_; + break; + case 1023: + C = c_values_.c_id_; + break; + case 8191: + C = c_values_.ol_i_id_; + break; + default: + fprintf(stderr, "Error: NURand: A = %d not supported\n", A); + exit(1); + } + return (((number(0, A) | number(x, y)) + C) % (y - x + 1)) + x; +} + +int* RandomGenerator::makePermutation(int lower, int upper) { + // initialize with consecutive values + int* array = new int[upper - lower + 1]; + for (int i = 0; i <= upper - lower; ++i) { + array[i] = lower + i; + } + + for (int i = 0; i < upper - lower; ++i) { + // choose a value to go into this position, including this position + int index = number(i, upper - lower); + int temp = array[i]; + array[i] = array[index]; + array[index] = temp; + } + + return array; +} + +// Defined by TPC-C 4.3.2.3. +void makeLastName(int num, char* name) { + static const char* const SYLLABLES[] = { + "BAR", "OUGHT", "ABLE", "PRI", "PRES", "ESE", "ANTI", "CALLY", "ATION", "EING", }; + static const int LENGTHS[] = { 3, 5, 4, 3, 4, 3, 4, 5, 5, 4, }; + + ASSERT(0 <= num && num <= 999); + int indicies[] = { num/100, (num/10)%10, num%10 }; + + int offset = 0; + for (int i = 0; i < sizeof(indicies)/sizeof(*indicies); ++i) { + ASSERT(strlen(SYLLABLES[indicies[i]]) == LENGTHS[indicies[i]]); + memcpy(name + offset, SYLLABLES[indicies[i]], LENGTHS[indicies[i]]); + offset += LENGTHS[indicies[i]]; + } + name[offset] = '\0'; +} + +RealRandomGenerator::RealRandomGenerator() { +#ifdef HAVE_RANDOM_R + // Set the random state to zeros. glibc will attempt to access the old state if not NULL. + memset(&state, 0, sizeof(state)); + int result = initstate_r(static_cast<unsigned int>(time(NULL)), state_array, + sizeof(state_array), &state); + ASSERT(result == 0); +#else + seed(time(NULL)); +#endif +} + +int RealRandomGenerator::number(int lower, int upper) { + int rand_int; +#ifdef HAVE_RANDOM_R + int error = random_r(&state, &rand_int); + ASSERT(error == 0); +#else + rand_int = nrand48(state); +#endif + ASSERT(0 <= rand_int && rand_int <= RAND_MAX); + + // Select a number in [0, range_size-1] + int range_size = upper - lower + 1; + rand_int %= range_size; + ASSERT(0 <= rand_int && rand_int < range_size); + + // Shift the range to [lower, upper] + rand_int += lower; + ASSERT(lower <= rand_int && rand_int <= upper); + return rand_int; +} + +void RealRandomGenerator::seed(unsigned int seed) { +#ifdef HAVE_RANDOM_R + int error = srandom_r(seed, &state); + ASSERT(error == 0); +#else + memcpy(state, &seed, std::min(sizeof(seed), sizeof(state))); +#endif +} Added: ydb/trunk/src/tpcc/randomgenerator.h =================================================================== --- ydb/trunk/src/tpcc/randomgenerator.h (rev 0) +++ ydb/trunk/src/tpcc/randomgenerator.h 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,102 @@ +#ifndef RANDOMGENERATOR_H__ +#define RANDOMGENERATOR_H__ + +#include <cstdlib> // for struct random_data + +#ifdef __linux__ +#define HAVE_RANDOM_R +#endif + +class RandomGenerator; + +// Constant C values for the NURand function. +struct NURandC { + NURandC() : c_last_(0), c_id_(0), ol_i_id_(0) {} + + int c_last_; + int c_id_; + int ol_i_id_; + + // Sets the fields randomly. + static NURandC makeRandom(RandomGenerator* generator); + + // Sets the fields randomly, in a fashion acceptable for a test run. c_load is the value of + // c_last_ that was used to generate the tables. See TPC-C 2.1.6.1. (page 20). + static NURandC makeRandomForRun(RandomGenerator* generator, const NURandC& c_load); +}; + +class RandomGenerator { +public: + RandomGenerator() : c_values_(NURandC()) {} + virtual ~RandomGenerator() {} + + // Return a random integer in the range [lower, upper]. The range is inclusive. + virtual int number(int lower, int upper) = 0; + + // Return a random integer in the range [lower, upper] excluding excluded. The range is + // inclusive. + int numberExcluding(int lower, int upper, int excluding); + + void astring(char* s, int lower_length, int upper_length); + void nstring(char* s, int lower_length, int upper_length); + + // Fill name with a random last name, generated according to TPC-C rules. Limits the customer + // id for the generated name to cid. + void lastName(char* name, int max_cid); + + float fixedPoint(int digits, float lower, float upper); + + // Non-uniform random number function from TPC-C 2.1.6. (page 20). + int NURand(int A, int x, int y); + + int* makePermutation(int lower, int upper); + + void setC(const NURandC& c) { + c_values_ = c; + } + +private: + NURandC c_values_; +}; + +// A mock RandomGenerator for unit testing. +class MockRandomGenerator : public RandomGenerator { +public: + MockRandomGenerator() : minimum_(true) {} + + virtual int number(int lower, int upper) { + if (minimum_) return lower; + else return upper; + } + + bool minimum_; +}; + +static const int MAX_LAST_NAME = 16; + +// Generate a last name as defined by TPC-C 4.3.2.3. name must be at least MAX_LAST_NAME+1 bytes. +void makeLastName(int num, char* name); + +// A real RandomGenerator that uses random_r. +class RealRandomGenerator : public RandomGenerator { +public: + // Seeds the generator with the current time. + RealRandomGenerator(); + + virtual int number(int lower, int upper); + + // Seed the generator with seed. + void seed(unsigned int seed); + +private: +#ifdef HAVE_RANDOM_R + // man random says optimal sizes are 8, 32, 64, 128, 256 bytes + static const int RANDOM_STATE_SIZE = 64; + char state_array[RANDOM_STATE_SIZE]; + struct random_data state; +#else + unsigned short state[3]; +#endif +}; + +#endif Added: ydb/trunk/src/tpcc/stlutil.h =================================================================== --- ydb/trunk/src/tpcc/stlutil.h (rev 0) +++ ydb/trunk/src/tpcc/stlutil.h 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,24 @@ +#ifndef STLUTIL_H__ +#define STLUTIL_H__ + +// Deletes all elements in STL container. +template <typename T> +static void STLDeleteElements(T* container) { + const typename T::iterator end = container->end(); + for (typename T::iterator i = container->begin(); i != end; ++i) { + delete *i; + } + container->clear(); +}; + +// Deletes all values (iterator->second) in STL container. +template <typename T> +static void STLDeleteValues(T* container) { + const typename T::iterator end = container->end(); + for (typename T::iterator i = container->begin(); i != end; ++i) { + delete i->second; + } + container->clear(); +}; + +#endif Added: ydb/trunk/src/tpcc/tpcc.cc =================================================================== --- ydb/trunk/src/tpcc/tpcc.cc (rev 0) +++ ydb/trunk/src/tpcc/tpcc.cc 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,72 @@ +#define __STDC_FORMAT_MACROS +#include <climits> +#include <inttypes.h> + +#include "clock.h" +#include "randomgenerator.h" +#include "tpccclient.h" +#include "tpccgenerator.h" +#include "tpcctables.h" + + +int main(int argc, const char* argv[]) { + if (argc != 2) { + fprintf(stderr, "tpcc [num warehouses]\n"); + exit(1); + } + + long num_warehouses = strtol(argv[1], NULL, 10); + if (num_warehouses == LONG_MIN || num_warehouses == LONG_MAX) { + fprintf(stderr, "Bad warehouse number (%s)\n", argv[1]); + exit(1); + } + if (num_warehouses <= 0) { + fprintf(stderr, "Number of warehouses must be > 0 (was %ld)\n", num_warehouses); + exit(1); + } + if (num_warehouses > Warehouse::MAX_WAREHOUSE_ID) { + fprintf(stderr, "Number of warehouses must be <= %d (was %ld)\n", Warehouse::MAX_WAREHOUSE_ID, num_warehouses); + exit(1); + } + + TPCCTables* tables = new TPCCTables(); + SystemClock* clock = new SystemClock(); + + // Create a generator for filling the database. + RealRandomGenerator* random = new RealRandomGenerator(); + NURandC cLoad = NURandC::makeRandom(random); + random->setC(cLoad); + + // Generate the data + printf("Loading %ld warehouses... ", num_warehouses); + fflush(stdout); + char now[Clock::DATETIME_SIZE+1]; + clock->getDateTimestamp(now); + TPCCGenerator generator(random, now, Item::NUM_ITEMS, District::NUM_PER_WAREHOUSE, + Customer::NUM_PER_DISTRICT, NewOrder::INITIAL_NUM_PER_DISTRICT); + int64_t begin = clock->getMicroseconds(); + generator.makeItemsTable(tables); + for (int i = 0; i < num_warehouses; ++i) { + generator.makeWarehouse(tables, i+1); + } + int64_t end = clock->getMicroseconds(); + printf("%"PRId64" ms\n", (end-begin)/1000); + + // Change the constants for run + random = new RealRandomGenerator(); + random->setC(NURandC::makeRandomForRun(random, cLoad)); + + // Client owns all the parameters + TPCCClient client(clock, random, tables, Item::NUM_ITEMS, static_cast<int>(num_warehouses), + District::NUM_PER_WAREHOUSE, Customer::NUM_PER_DISTRICT); + printf("Running... "); + fflush(stdout); + begin = clock->getMicroseconds(); + for (int i = 0; i < 200000; ++i) { + client.doOne(); + } + end = clock->getMicroseconds(); + printf("%"PRId64" ms\n", (end-begin)/1000); + + return 0; +} Added: ydb/trunk/src/tpcc/tpccclient.cc =================================================================== --- ydb/trunk/src/tpcc/tpccclient.cc (rev 0) +++ ydb/trunk/src/tpcc/tpccclient.cc 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,175 @@ +#include "tpccclient.h" + +#include <cstdio> +#include <vector> + +#include "assert.h" +#include "clock.h" +#include "randomgenerator.h" +#include "tpccdb.h" + +using std::vector; + +// Non-integral constants must be defined in a .cc file. Needed for Mac OS X. +// http://www.research.att.com/~bs/bs_faq2.html#in-class +const float TPCCClient::MIN_PAYMENT_AMOUNT; +const float TPCCClient::MAX_PAYMENT_AMOUNT; + +TPCCClient::TPCCClient(Clock* clock, RandomGenerator* generator, TPCCDB* db, int num_items, + int num_warehouses, int districts_per_warehouse, int customers_per_district) : + clock_(clock), + generator_(generator), + db_(db), + num_items_(num_items), + num_warehouses_(num_warehouses), + districts_per_warehouse_(districts_per_warehouse), + customers_per_district_(customers_per_district) { + ASSERT(clock_ != NULL); + ASSERT(generator_ != NULL); + ASSERT(db_ != NULL); + ASSERT(1 <= num_items_ && num_items_ <= Item::NUM_ITEMS); + ASSERT(1 <= num_warehouses_ && num_warehouses_ <= Warehouse::MAX_WAREHOUSE_ID); + ASSERT(1 <= districts_per_warehouse_ && + districts_per_warehouse_ <= District::NUM_PER_WAREHOUSE); + ASSERT(1 <= customers_per_district_ && customers_per_district_ <= Customer::NUM_PER_DISTRICT); +} + +TPCCClient::~TPCCClient() { + delete clock_; + delete generator_; + delete db_; +} + +void TPCCClient::doStockLevel() { + int32_t threshold = generator_->number(MIN_STOCK_LEVEL_THRESHOLD, MAX_STOCK_LEVEL_THRESHOLD); + int result = db_->stockLevel(generateWarehouse(), generateDistrict(), threshold); + ASSERT(result >= 0); +} + +void TPCCClient::doOrderStatus() { + OrderStatusOutput output; + int y = generator_->number(1, 100); + if (y <= 60) { + // 60%: order status by last name + char c_last[Customer::MAX_LAST+1]; + generator_->lastName(c_last, customers_per_district_); + db_->orderStatus(generateWarehouse(), generateDistrict(), c_last, &output); + } else { + // 40%: order status by id + ASSERT(y > 60); + db_->orderStatus(generateWarehouse(), generateDistrict(), generateCID(), &output); + } +} + +void TPCCClient::doDelivery() { + int carrier = generator_->number(Order::MIN_CARRIER_ID, Order::MAX_CARRIER_ID); + char now[Clock::DATETIME_SIZE+1]; + clock_->getDateTimestamp(now); + + vector<DeliveryOrderInfo> orders; + db_->delivery(generateWarehouse(), carrier, now, &orders); + if (orders.size() != District::NUM_PER_WAREHOUSE) { + printf("Only delivered from %zd districts\n", orders.size()); + } +} + +void TPCCClient::doPayment() { + PaymentOutput output; + int x = generator_->number(1, 100); + int y = generator_->number(1, 100); + + int32_t w_id = generateWarehouse(); + int32_t d_id = generateDistrict(); + + int32_t c_w_id; + int32_t c_d_id; + if (num_warehouses_ == 1 || x <= 85) { + // 85%: paying through own warehouse (or there is only 1 warehouse) + c_w_id = w_id; + c_d_id = d_id; + } else { + // 15%: paying through another warehouse: + // select in range [1, num_warehouses] excluding w_id + c_w_id = generator_->numberExcluding(1, num_warehouses_, w_id); + ASSERT(c_w_id != w_id); + c_d_id = generateDistrict(); + } + float h_amount = generator_->fixedPoint(2, MIN_PAYMENT_AMOUNT, MAX_PAYMENT_AMOUNT); + + char now[Clock::DATETIME_SIZE+1]; + clock_->getDateTimestamp(now); + if (y <= 60) { + // 60%: payment by last name + char c_last[Customer::MAX_LAST+1]; + generator_->lastName(c_last, customers_per_district_); + db_->payment(w_id, d_id, c_w_id, c_d_id, c_last, h_amount, now, &output); + } else { + // 40%: payment by id + ASSERT(y > 60); + db_->payment(w_id, d_id, c_w_id, c_d_id, generateCID(), h_amount, now, &output); + } +} + +void TPCCClient::doNewOrder() { + int32_t w_id = generateWarehouse(); + int ol_cnt = generator_->number(Order::MIN_OL_CNT, Order::MAX_OL_CNT); + + // 1% of transactions roll back + bool rollback = generator_->number(1, 100) == 1; + + vector<NewOrderItem> items(ol_cnt); + for (int i = 0; i < ol_cnt; ++i) { + if (rollback && i+1 == ol_cnt) { + items[i].i_id = Item::NUM_ITEMS + 1; + } else { + items[i].i_id = generateItemID(); + } + + bool remote = generator_->number(1, 100) == 1; + if (num_warehouses_ > 1 && remote) { + items[i].ol_supply_w_id = generator_->numberExcluding(1, num_warehouses_, w_id); + } else { + items[i].ol_supply_w_id = w_id; + } + items[i].ol_quantity = generator_->number(1, MAX_OL_QUANTITY); + } + + char now[Clock::DATETIME_SIZE+1]; + clock_->getDateTimestamp(now); + NewOrderOutput output; + db_->newOrder(w_id, generateDistrict(), generateCID(), items, now, &output); +} + +void TPCCClient::doOne() { + // This is not strictly accurate: The requirement is for certain *minimum* percentages to be + // maintained. This is close to the right thing, but not precisely correct. + // See TPC-C 5.2.4 (page 68). + int x = generator_->number(1, 100); + if (x <= 4) { // 4% + doStockLevel(); + } else if (x <= 8) { // 4% + doDelivery(); + } else if (x <= 12) { // 4% + doOrderStatus(); + } else if (x <= 12+43) { // 43% + doPayment(); + } else { // 45% + ASSERT(x > 100-45); + doNewOrder(); + } +} + +int32_t TPCCClient::generateWarehouse() { + return generator_->number(1, num_warehouses_); +} + +int32_t TPCCClient::generateDistrict() { + return generator_->number(1, districts_per_warehouse_); +} +int32_t TPCCClient::generateCID() { + return generator_->NURand(1023, 1, customers_per_district_); +} + +int32_t TPCCClient::generateItemID() { + return generator_->NURand(8191, 1, num_items_); +} Added: ydb/trunk/src/tpcc/tpccclient.h =================================================================== --- ydb/trunk/src/tpcc/tpccclient.h (rev 0) +++ ydb/trunk/src/tpcc/tpccclient.h 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,51 @@ +#ifndef TPCCCLIENT_H__ +#define TPCCCLIENT_H__ + +//~ #include <cstdint> +#include <stdint.h> + +class Clock; +class RandomGenerator; +class TPCCDB; + +// Generates transactions according to the TPC-C specification. This ignores the fact that +// terminals have a fixed w_id, d_id, and that requests should be made after a minimum keying time +// and a think time. +class TPCCClient { +public: + // Owns clock, generator and db. + TPCCClient(Clock* clock, RandomGenerator* generator, TPCCDB* db, int num_items, + int num_warehouses, int districts_per_warehouse, int customers_per_district); + ~TPCCClient(); + + void doStockLevel(); + void doOrderStatus(); + void doDelivery(); + void doPayment(); + void doNewOrder(); + + void doOne(); + + static const int32_t MIN_STOCK_LEVEL_THRESHOLD = 10; + static const int32_t MAX_STOCK_LEVEL_THRESHOLD = 20; + // TODO: Should these constants be part of tpccdb.h? + static const float MIN_PAYMENT_AMOUNT = 1.00; + static const float MAX_PAYMENT_AMOUNT = 5000.00; + static const int32_t MAX_OL_QUANTITY = 10; + +private: + int32_t generateWarehouse(); + int32_t generateDistrict(); + int32_t generateCID(); + int32_t generateItemID(); + + Clock* clock_; + RandomGenerator* generator_; + TPCCDB* db_; + int num_items_; + int num_warehouses_; + int districts_per_warehouse_; + int customers_per_district_; +}; + +#endif Added: ydb/trunk/src/tpcc/tpccdb.cc =================================================================== --- ydb/trunk/src/tpcc/tpccdb.cc (rev 0) +++ ydb/trunk/src/tpcc/tpccdb.cc 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,23 @@ +#include "tpccdb.h" + +// Non-integral constants must be defined in a .cc file. Needed for Mac OS X. +// http://www.research.att.com/~bs/bs_faq2.html#in-class +const float Item::MIN_PRICE; +const float Item::MAX_PRICE; +const float Warehouse::MIN_TAX; +const float Warehouse::MAX_TAX; +const float Warehouse::INITIAL_YTD; +const float District::MIN_TAX; +const float District::MAX_TAX; +const float District::INITIAL_YTD; // different from Warehouse +const float Customer::MIN_DISCOUNT; +const float Customer::MAX_DISCOUNT; +const float Customer::INITIAL_BALANCE; +const float Customer::INITIAL_CREDIT_LIM; +const float Customer::INITIAL_YTD_PAYMENT; +const char Customer::GOOD_CREDIT[] = "GC"; +const char Customer::BAD_CREDIT[] = "BC"; +const float OrderLine::MIN_AMOUNT; +const float OrderLine::MAX_AMOUNT; +const char NewOrderOutput::INVALID_ITEM_STATUS[] = "Item number is not valid"; +const float History::INITIAL_AMOUNT; Added: ydb/trunk/src/tpcc/tpccdb.h =================================================================== --- ydb/trunk/src/tpcc/tpccdb.h (rev 0) +++ ydb/trunk/src/tpcc/tpccdb.h 2009-03-12 19:34:50 UTC (rev 1285) @@ -0,0 +1,341 @@ +#ifndef TPCCDB_H__ +#define TPCCDB_H__ + +#include <stdint.h> +#include <vector> + +struct Item { + static const int MIN_IM = 1; + static const int MAX_IM = 10000; + static const float MIN_PRICE = 1.00; + static const float MAX_PRICE = 100.00; + static const int MIN_NAME = 14; + static const int MAX_NAME = 24; + static const int MIN_DATA = 26; + static const int MAX_DATA = 50; + static const int NUM_ITEMS = 100000; + + int32_t i_id; + int32_t i_im_id; + float i_price; + char i_name[MAX_NAME+1]; + char i_data[MAX_DATA+1]; +}; + +struct Warehouse { + static const float MIN_TAX = 0; + static const float MAX_TAX = 0.2000f; + static const float INITIAL_YTD = 300000.00f; + static const int MIN_NAME = 6; + static const int MAX_NAME = 10; + static const int MIN_STREET = 10; + static const int MAX_STREET = 20; + static const int MIN_CITY = 10; + static const int MAX_CITY = 20; + static const int STATE = 2; + static const int ZIP = 9; + // TPC-C 1.3.1 (page 11) requires 2*W. This permits testing up to 50 warehouses. This is an + // arbitrary limit created to pack ids into integers. + static const int MAX_WAREHOUSE_ID = 100; + + int32_t w_id; + float w_tax; + float w_ytd; + char w_name[MAX_NAME+1]; + char w_street_1[MAX_STREET+1]; + char w_street_2[MAX_STREET+1]; + char w_city[MAX_CITY+1]; + char w_state[STATE+1]; + char w_zip[ZIP+1]; +}; + +struct District { + static const float MIN_TAX = 0; + static const float MAX_TAX = 0.2000f; + static const float INITIAL_YTD = 30000.00; // different from Warehouse + static const int INITIAL_NEXT_O_ID = 3001; + static const int MIN_NAME = 6; + static const int MAX_NAME = 10; + static const int MIN_STREET = 10; + static const int MAX_STREET = 20; + static const int MIN_CITY = 10; + static const int MAX_CITY = 20; + static const int STATE = 2; + static const int ZIP = 9; + static const int NUM_PER_WAREHOUSE = 10; + + int32_t d_id; + int32_t d_w_id; + float d_tax; + float d_ytd; + int32_t d_next_o_id; + char d_name[MAX_NAME+1]; + char d_street_1[MAX_STREET+1]; + char d_street_2[MAX_STREET+1]; + char d_city[MAX_CITY+1]; + char d_state[STATE+1]; + char d_zip[ZIP+1]; +}; + +struct Stock { + static const int MIN_QUANTITY = 10; + static const int MAX_QUANTITY = 100; + static const int DIST = 24; + static const int MIN_DATA = 26; + static const int MAX_DATA = 50; + static const int NUM_STOCK_PER_WAREHOUSE = 100000; + + int32_t s_i_id; + int32_t s_w_id; + int32_t s_quantity; + int32_t s_ytd; + int32_t s_order_cnt; + int32_t s_remote_cnt; + char s_dist[District::NUM_PER_WAREHOUSE][DIST+1]; + char s_data[MAX_DATA+1]; +}; + +// YYYY-MM-DD HH:MM:SS This is supposed to be a date/time field from Jan 1st 1900 - +// Dec 31st 2100 with a resolution of 1 second. See TPC-C 1.3.1. +static const int DATETIME_SIZE = 14; + +struct Customer { + static const float INITIAL_CREDIT_LIM = 50000.00; + static const float MIN_DISCOUNT = 0.0000; + static const float MAX_DISCOUNT = 0.5000; + static const float INITIAL_BALANCE = -10.00; + static const float INITIAL_YTD_PAYMENT = 10.00; + static const int INITIAL_PAYMENT_CNT = 1; + static const int INITIAL_DELIVERY_CNT = 0; + static const int MIN_FIRST = 6; + static const int MAX_FIRST = 10; + static const int MIDDLE = 2; + static const int MAX_LAST = 16; + static const int MIN_STREET = 10; + static const int MAX_STREET = 20; + static const int MIN_CITY = 10; + static const int MAX_CITY = 20; + static const int STATE = 2; + static const int ZIP = 9; + static const int PHONE = 16; + static const int CREDIT = 2; + static const int MIN_DATA = 300; + static const int MAX_DATA = 500; + static const int NUM_PER_DISTRICT = 3000; + static const char GOOD_CREDIT[]; + static const char BAD_CREDIT[]; + + int32_t c_id; + int32_t c_d_id; + int32_t c_w_id; + float c_credit_lim; + float c_discount; + float c_balance; + float c_ytd_payment; + int32_t c_payment_cnt; + int32_t c_delivery_cnt; + char c_first[MAX_FIRST+1]; + char c_middle[MIDDLE+1]; + char c_last[MAX_LAST+1]; + char c_street_1[MAX_STREET+1]; + char c_street_2[MAX_STREET+1]; + char c_city[MAX_CITY+1]; + char c_state[STATE+1]; + char c_zip[ZIP+1]; + char c_phone[PHONE+1]; + char c_since[DATETIME_SIZE+1]; + char c_credit[CREDIT+1]; + char c_data[MAX_DATA+1]; +}; + +struct Order { + static const int MIN_CARRIER_ID = 1; + static const int MAX_CARRIER_ID = 10; + // HACK: This is not strictly correct, but it works + static const int NULL_CARRIER_ID = 0; + // Less than this value, carrier != null, >= -> carrier == null + static const int NULL_CARRIER_LOWER_BOUND = 2101; + static const int MIN_OL_CNT = 5; + static const int MAX_OL_CNT = 15; + static const int INITIAL_ALL_LOCAL = 1; + static const int INITIAL_ORDERS_PER_DISTRICT = 3000; + // See TPC-C 1.3.1 (page 15) + static const int MAX_ORDER_ID = 10000000; + + int32_t o_id; + int32_t o_c_id; + int32_t o_d_id; + int32_t o_w_id; + int32_t o_carrier_id; + int32_t o_ol_cnt; + int32_t o_all_local; + char o_entry_d[DATETIME_SIZE+1]; +}; + +struct OrderLine { + static const int MIN_I_ID = 1; + static const int MAX_I_ID = 100000; // Item::NUM_ITEMS + static const int INITIAL_QUANTITY = 5; + static const float MIN_AMOUNT = 0.01f; + static const float MAX_AMOUNT = 9999.99f; + + int32_t ol_o_id; + int32_t ol_d_id; + int32_t ol_w_id; + int32_t ol_number; + int32_t ol_i_id; + int32_t ol_supply_w_id; + int32_t ol_quantity; + float ol_amount; + char ol_delivery_d[DATETIME_SIZE+1]; + char ol_dist_info[Stock::DIST+1]; +}; + +struct NewOrder { + static const int INITIAL_NUM_PER_DISTRICT = 900; + + int32_t no_w_id; + int32_t no_d_id; + int32_t no_o_id; +}; + +struct History { + static const int MIN_DATA = 12; + static const int MAX_DATA = 24; + static const float INITIAL_AMOUNT = 10.00f; + + int32_t h_c_id; + int32_t h_c_d_id; + int32_t h_c_w_id; + int32_t h_d_id; + int32_t h_w_id; + float h_amount; + char h_date[DATETIME_SIZE+1]; + char h_data[MAX_DATA]; +}; + +// Data returned by the "order status" transaction. +struct OrderStatusOutput { + // From customer + int32_t c_id; // unclear if this needs to be returned + float c_balance; + + // From order + int32_t o_id; + int32_t o_carrier_id; + + struct OrderLineSubset { + int32_t ol_i_id; + int32_t ol_supply_w_id; + int32_t ol_quantity; + float ol_amount; + char ol_delivery_d[DATETIME_SIZE+1]; + }; + + std::vector<OrderLineSubset> lines; + + // From customer + char c_first[Customer::MAX_FIRST+1]; + char c_middle[Customer::MIDDLE+1]; + char c_last[Customer::MAX_LAST+1]; + + // From order + char o_entry_d[DATETIME_SIZE+1]; +}; + +struct NewOrderItem { + int32_t i_id; + int32_t ol_supply_w_id; + int32_t ol_quantity; +}; + +struct NewOrderOutput { + float w_tax; + float d_tax; + + // From district d_next_o_id + int32_t o_id; + + float c_discount; + + // TODO: Client can compute this from other values. + float total; + + struct ItemInfo { + static const char BRAND = 'B'; + static const char GENERIC = 'G'; + + int32_t s_quantity; + float i_price; + // TODO: Client can compute this from other values. + float ol_amount; + char brand_generic; + char i_name[Item::MAX_NAME+1]; + }; + + std::vector<ItemInfo> items; + char c_last[Customer::MAX_LAST+1]; + char c_credit[Customer::CREDIT+1]; + + static const int MAX_STATUS = 25; + static const char INVALID_ITEM_STATUS[]; + char status[MAX_STATUS+1]; +}; + +struct PaymentOutput { + // Return entire tuples since Payment requires most of the data. This returns more than + // necessary, but is easy. + Warehouse warehouse; + District district; + Customer customer; +}; + +struct DeliveryOrderInfo { + int32_t d_id; + int32_t o_id; +}; + +// Interface to the TPC-C transaction implementation. +class TPCCDB { +public: + virtual ~TPCCDB() {} + + // Executes the TPC-C "slev" transaction. From the last 20 orders, returns the number of rows in + // the STOCK table that have S_QUANTITY < threshold. See TPC-C 2.8 (page 43). + virtual int stockLevel(int32_t warehouse_id, int32_t district_id, int32_t threshold) = 0; + + // Executes the TPC-C order status transaction. Find the customer's last order and check the + // delivery date of each item on the order. See TPC-C 2.6 (page 36). + virtual void orderStatus(int32_t warehouse_id, int32_t district_id, int32_t customer_id, + OrderStatusOutput* output) = 0; + + // Executes the TPC-C order status transaction. Find the customer's last order and check the + // delivery date of each item on the order. See TPC-C 2.6 (page 36). + virtual void orderStatus(int32_t warehouse_id, int32_t district_id, const char* c_last, + OrderStatusOutput* output) = 0; + + // Executes the TPC-C new order transaction. Enter the new order for customer_id into the + // database. See TPC-C 2.4 (page 27). Returns true if the transaction commits. + virtual bool newOrder(int32_t warehouse_id, int32_t district_id, int32_t customer_id, + const std::vector<NewOrderItem>& items, const char* now, + NewOrderOutput* output) = 0; + + // Executes the TPC-C payment transaction. Add h_amount to the customer's account. + // See TPC-C 2.5 (page 32). + virtual void payment(int32_t warehouse_id, int32_t district_id, int32_t c_warehouse_id, + int32_t c_district_id, int32_t customer_id, float h_amount, const char* now, + PaymentOutput* output) = 0; + + // Executes the TPC-C payment transaction. Add h_amount to the customer's account. + // See TPC-C 2.5 (page 32). + virtual void payment(int32_t warehouse_id, int32_t district_id, int32_t c_warehouse_id, + int32_t c_district_id, const char* c_last, float h_amount, const char* now, + PaymentOutput* output) = 0; + + // Executes the TPC-C delivery transaction. Delivers the oldest undelivered transaction in each + // district in warehouse_id. See TPC-C 2.7 (page 39). + virtual void deli... [truncated message content] |
From: <yan...@us...> - 2009-03-12 19:54:53
|
Revision: 1290 http://assorted.svn.sourceforge.net/assorted/?rev=1290&view=rev Author: yangzhang Date: 2009-03-12 19:54:39 +0000 (Thu, 12 Mar 2009) Log Message: ----------- using ASSERT instead of assert; using snap_map instead of fast_map Modified Paths: -------------- ydb/trunk/src/main.lzz.clamp ydb/trunk/src/tpcc/clock.cc ydb/trunk/src/tpcc/randomgenerator.cc ydb/trunk/src/tpcc/tpccclient.cc ydb/trunk/src/tpcc/tpcctables.cc Removed Paths: ------------- ydb/trunk/src/tpcc/assert.h Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-12 19:54:31 UTC (rev 1289) +++ ydb/trunk/src/main.lzz.clamp 2009-03-12 19:54:39 UTC (rev 1290) @@ -8,10 +8,11 @@ #include <boost/scoped_array.hpp> #include <boost/shared_ptr.hpp> #include <boost/tuple/tuple.hpp> -#include <commons/fast_map.h> +#include <commons/assert.h> #include <commons/memory.h> #include <commons/nullptr.h> #include <commons/rand.h> +#include <commons/snap_map.h> #include <commons/st/st.h> #include <commons/time.h> #include <commons/unique_ptr.h> @@ -40,7 +41,6 @@ #define ref boost::ref #define tuple boost::tuple #define make_tuple boost::make_tuple -#define unused __attribute__((unused)) using namespace boost; using namespace boost::archive; @@ -57,7 +57,7 @@ //#define map_t unordered_map //#define map_t map //#define map_t dense_hash_map -#define map_t fast_map +#define map_t snap_map typedef map_t<int, int> mii; typedef mii::value_type entry; @@ -71,7 +71,7 @@ map.set_empty_key(-1); map.set_deleted_key(-2); } -template<> void init_map(fast_map<int, int> &map) { +template<> void init_map(snap_map<int, int> &map) { map.set_empty_key(-1); map.set_deleted_key(-2); } @@ -758,6 +758,16 @@ } } +#if 0 +template<typename Types, typename RTypes> +void +process_txn_ext(mii &map, const typename Types::Txn &txn, int &seqno, + typename RTypes::Response *res, ext_map ext) +{ + response +} +#endif + /** * Process a transaction: update DB state (incl. seqno) and send response to * leader. @@ -947,7 +957,7 @@ ResponseBatch &resbatch = *presbatch; ser_t serbuf; char *first_start = reader.start(); - assert(first_start == rbuf.get()); + ASSERT(first_start == rbuf.get()); const size_t headerlen = sizeof(uint32_t) + sizeof(short) + sizeof(int); while (true) { uint32_t prefix = 0; @@ -1019,16 +1029,16 @@ // Swap the buffers. swap(tmp, reader.buf()); reader.reset_range(reader.buf().get() + headerlen, reader.buf().get() + headerlen + reader.unread()); - assert(tmp.get() <= first_start && first_start < tmp.end()); - assert(tmp.get() < start && start < tmp.end()); - assert(first_start < start); + ASSERT(tmp.get() <= first_start && first_start < tmp.end()); + ASSERT(tmp.get() < start && start < tmp.end()); + ASSERT(first_start < start); backlog.push(make_tuple(tmp, first_start, start)); first_start = reader.buf().get(); first_seqno = first_txn.seqno(); } // Fill up rest of the message - assert(reader.unread() + reader.rem() >= prefix + sizeof(uint32_t) - headerlen); + ASSERT(reader.unread() + reader.rem() >= prefix + sizeof(uint32_t) - headerlen); check0x(reader.accum(prefix + sizeof(uint32_t) - headerlen)); } else { // Regular transaction batch. @@ -1047,6 +1057,13 @@ const Txn &txn = t == 0 ? first_txn : batch.txn(t); Response *res = resbatch.add_res(); process_txn<Types, RTypes>(map, txn, seqno, res); +#if 0 + if (!sending_recovery) { + process_txn<Types, RTypes>(map, txn, seqno, res); + } else { + process_txn_ext(map, txn, seqno, res, ext); + } +#endif if (fake_exec && !Types::is_pb()) { reader.skip(txn.op_size() * Op_Size); } @@ -1132,12 +1149,12 @@ template<> recovery_t -make_recovery(const fast_map<int, int> &map, int mypos, int nnodes, int &seqno) +make_recovery(const snap_map<int, int> &map, int mypos, int nnodes, int &seqno) { const commons::array<entry> &src = map.get_table(); pair<size_t, size_t> range = recovery_range(src.size(), mypos, nnodes); size_t begin = range.first, end = range.second; - assert(end > begin); + ASSERT(end > begin); recovery_header hdr = { seqno, end - begin, src.size(), map.size() }; size_t bodylen = sizeof(entry) * hdr.count; cout << "generating recovery of " << hdr.size << " records in " @@ -1637,7 +1654,7 @@ long long before_recv = current_time_millis(); vector<st_thread_t> recovery_builders; - assert(seqno == -1); + ASSERT(seqno == -1); bool first = true; for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { recovery_builders.push_back(my_spawn(lambda() { @@ -1744,16 +1761,16 @@ while (!backlog.empty()) { chunk chunk = backlog.take(); sized_array<char> &buf = chunk.get<0>(); - assert(buf.get() <= chunk.get<1>() && chunk.get<1>() < buf.end()); - assert(buf.get() < chunk.get<2>() && chunk.get<2>() < buf.end()); - assert(chunk.get<1>() < chunk.get<2>()); + ASSERT(buf.get() <= chunk.get<1>() && chunk.get<1>() < buf.end()); + ASSERT(buf.get() < chunk.get<2>() && chunk.get<2>() < buf.end()); + ASSERT(chunk.get<1>() < chunk.get<2>()); swap(buf, reader.buf()); reader.reset_range(chunk.get<1>(), chunk.get<2>()); while (reader.start() < reader.end()) { - unused char *start = reader.start(); - unused uint32_t prefix = reader.read<uint32_t>(); - assert(prefix < 10000); - assert(start + sizeof(uint32_t) + prefix <= reader.end()); + char *start = reader.start(); + uint32_t prefix = reader.read<uint32_t>(); + ASSERT(prefix < 10000); + ASSERT(start + sizeof(uint32_t) + prefix <= reader.end()); batch.Clear(); for (int t = 0; t < batch.txn_size(); ++t) { const Txn &txn = batch.txn(t); @@ -1771,7 +1788,7 @@ << "; backlog.size = " << backlog.queue().size() << endl; } } - assert(start + sizeof(uint32_t) + prefix == reader.start()); + ASSERT(start + sizeof(uint32_t) + prefix == reader.start()); } } g_caught_up = true; Deleted: ydb/trunk/src/tpcc/assert.h =================================================================== --- ydb/trunk/src/tpcc/assert.h 2009-03-12 19:54:31 UTC (rev 1289) +++ ydb/trunk/src/tpcc/assert.h 2009-03-12 19:54:39 UTC (rev 1290) @@ -1,16 +0,0 @@ -#ifndef ASSERT_H__ -#define ASSERT_H__ - -#include <cassert> - -// Wraps the standard assert macro to avoids "unused variable" warnings when compiled away. -// Inspired by: http://powerof2games.com/node/10 -// This is not the "default" because it does not conform to the requirements of the C standard, -// which requires that the NDEBUG version be ((void) 0). -#ifdef NDEBUG -#define ASSERT(x) do { (void)sizeof(x); } while(0) -#else -#define ASSERT(x) assert(x) -#endif - -#endif Modified: ydb/trunk/src/tpcc/clock.cc =================================================================== --- ydb/trunk/src/tpcc/clock.cc 2009-03-12 19:54:31 UTC (rev 1289) +++ ydb/trunk/src/tpcc/clock.cc 2009-03-12 19:54:39 UTC (rev 1290) @@ -5,7 +5,7 @@ #include <cstdio> #include <ctime> -#include "assert.h" +#include <commons/assert.h> // Fills output with the base-10 ASCII representation of value, using digits digits. static char* makeInt(char* output, int value, int digits) { Modified: ydb/trunk/src/tpcc/randomgenerator.cc =================================================================== --- ydb/trunk/src/tpcc/randomgenerator.cc 2009-03-12 19:54:31 UTC (rev 1289) +++ ydb/trunk/src/tpcc/randomgenerator.cc 2009-03-12 19:54:39 UTC (rev 1290) @@ -6,7 +6,7 @@ #include <cstring> #include <ctime> -#include "assert.h" +#include <commons/assert.h> NURandC NURandC::makeRandom(RandomGenerator* generator) { NURandC c; Modified: ydb/trunk/src/tpcc/tpccclient.cc =================================================================== --- ydb/trunk/src/tpcc/tpccclient.cc 2009-03-12 19:54:31 UTC (rev 1289) +++ ydb/trunk/src/tpcc/tpccclient.cc 2009-03-12 19:54:39 UTC (rev 1290) @@ -3,7 +3,7 @@ #include <cstdio> #include <vector> -#include "assert.h" +#include <commons/assert.h> #include "clock.h" #include "randomgenerator.h" #include "tpccdb.h" Modified: ydb/trunk/src/tpcc/tpcctables.cc =================================================================== --- ydb/trunk/src/tpcc/tpcctables.cc 2009-03-12 19:54:31 UTC (rev 1289) +++ ydb/trunk/src/tpcc/tpcctables.cc 2009-03-12 19:54:39 UTC (rev 1290) @@ -4,7 +4,7 @@ #include <limits> #include <vector> -#include "assert.h" +#include <commons/assert.h> #include "stlutil.h" using std::vector; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-18 09:59:32
|
Revision: 1304 http://assorted.svn.sourceforge.net/assorted/?rev=1304&view=rev Author: yangzhang Date: 2009-03-18 09:59:25 +0000 (Wed, 18 Mar 2009) Log Message: ----------- cleaned up macros, cog Modified Paths: -------------- ydb/trunk/src/ser.h ydb/trunk/src/tpcc/tpcctables.cc.cog Modified: ydb/trunk/src/ser.h =================================================================== --- ydb/trunk/src/ser.h 2009-03-18 09:58:41 UTC (rev 1303) +++ ydb/trunk/src/ser.h 2009-03-18 09:59:25 UTC (rev 1304) @@ -10,9 +10,6 @@ #include <iostream> #include "ydb.pb.h" -#define BEGIN_NAMESPACE(ns) namespace ns { -#define END_NAMESPACE } - #define MAKE_START_FIN_HELPER(MsgType, field, action) \ template<typename T> inline void action##_##field(T &msg); \ template<> inline void action##_##field(ydb::pb::MsgType&) {} \ Modified: ydb/trunk/src/tpcc/tpcctables.cc.cog =================================================================== --- ydb/trunk/src/tpcc/tpcctables.cc.cog 2009-03-18 09:58:41 UTC (rev 1303) +++ ydb/trunk/src/tpcc/tpcctables.cc.cog 2009-03-18 09:59:25 UTC (rev 1304) @@ -1,3 +1,13 @@ +//[[[cog +// allfields = 'items warehouses stock districts customers orders orderlines neworders history'.split() +// treepairs = 'warehouses/Warehouse stock/Stock districts/District customers/Customer orders/Order orderlines/OrderLine'.split() +// allpairs = treepairs + ['neworders/NewOrder'] +// def typedefs(): +// for name in allfields: +// cog.outl(r'typedef typeof(%s_) type_%s;' % (name, name)) +//]]] +//[[[end]]] + #include "tpcctables.h" #include <algorithm> @@ -605,7 +615,6 @@ using namespace std; cout //[[[cog - // import cog // for name in 'items warehouses stock districts customers orders orders_by_customer orderlines customers_by_name neworders history'.split(): // cog.outl(r'<< " |%s| = " << %s_.size() << "\n"' % (name, name)) //]]] @@ -625,11 +634,10 @@ bzero(&hdr, sizeof hdr); hdr.seqno = seqno; //[[[cog - // import cog - // for name in 'items warehouses stock districts customers orders orderlines neworders history'.split(): - // cog.outl(r'typedef typeof(%s_) type_%s;' % (name, name)) + // typedefs() + // for name in allfields: // cog.outl(r'hdr.n%s = uint32_t(%s_.size());' % (name, name)) - // for pair in 'warehouses/Warehouse stock/Stock districts/District customers/Customer orders/Order orderlines/OrderLine neworders/NewOrder'.split(): + // for pair in allpairs: // name, struct = pair.split('/') // cog.outl(r'hdr.len += uint32_t(hdr.n%s * (sizeof(type_%s::key_type) + sizeof(%s)));' % (name, name, struct)) //]]] @@ -651,7 +659,7 @@ } //[[[cog - // for pair in 'warehouses/Warehouse stock/Stock districts/District customers/Customer orders/Order orderlines/OrderLine'.split(): + // for pair in treepairs: // name, struct = pair.split('/') // cog.outl(r''' // { @@ -691,19 +699,14 @@ raw_reader reader(arr.get()); - //[[[cog - // for name in 'items warehouses stock districts customers orders orderlines neworders history'.split(): - // cog.outl(r'typedef typeof(%s_) type_%s;' % (name, name)) - //]]] - //[[[end]]] - items_.reserve(hdr.nitems); for (uint32_t i = 0; i < hdr.nitems; ++i) { items_.push_back(reader.read<Item>()); } //[[[cog - // for pair in 'warehouses/Warehouse stock/Stock districts/District customers/Customer orders/Order orderlines/OrderLine'.split(): + // typedefs() + // for pair in treepairs: // name, struct = pair.split('/') // # Generate customers_by_name_ // cbn = r'customers_by_name_.insert(reinterpret_cast<Customer*>(reader.ptr()));' if name == 'customers' else '' @@ -730,7 +733,7 @@ history_.reserve(hdr.nhistory); for (uint32_t i = 0; i < hdr.nhistory; ++i) { - history_.push_back(reader.readptr<const History>()); // reinterpret_cast<const History*>(reader.ptr())); + history_.push_back(reader.readptr<const History>()); } serbuf_.reset(arr.get(), arr.size()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-20 07:58:33
|
Revision: 1313 http://assorted.svn.sourceforge.net/assorted/?rev=1313&view=rev Author: yangzhang Date: 2009-03-20 07:58:28 +0000 (Fri, 20 Mar 2009) Log Message: ----------- - broke up the program into multiple modules - using new custom clamp - ser.h -> msg.h - still some renaming to do Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/ser.cc Added Paths: ----------- ydb/trunk/src/main2.lzz.clamp ydb/trunk/src/msg.h ydb/trunk/src/setprefs.h ydb/trunk/src/stxn.lzz.clamp ydb/trunk/src/tpcc.lzz.clamp ydb/trunk/src/unsetprefs.h ydb/trunk/src/util.lzz Removed Paths: ------------- ydb/trunk/src/ser.h Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-03-20 05:35:47 UTC (rev 1312) +++ ydb/trunk/src/Makefile 2009-03-20 07:58:28 UTC (rev 1313) @@ -38,6 +38,7 @@ endif # CXX := $(WTF) ag++ -k --Xcompiler # $(CXX) CXX := $(WTF) ccache $(CXX) -pipe +LD := $(CXX) LDFLAGS := -pthread $(GPROF) LDLIBS := -lstx -lst -lresolv -lprotobuf -lgtest \ -lboost_program_options-gcc43-mt -lboost_thread-gcc43-mt \ @@ -93,13 +94,15 @@ all: $(TARGET) -$(TARGET): $(OBJS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ - %.pb.o: %.pb.cc %.pb.h $(CXX) -c $(PBCXXFLAGS) $(OUTPUT_OPTION) $< -main.o: main.cc $(PBHDRS) +stxn.o: main.hh $(PBHDRS) +main.o: util.hh msg.h $(PBHDRS) +util.o: msg.h $(PBHDRS) +main2.o: main.hh stxn.hh tpcc.hh $(PBHDRS) +tpcc.o: main.hh util.hh $(PBHDRS) +ydb: main.o main2.o util.o tpcc.o stxn.o tpcc/%.o: tpcc/%.cc make -C tpcc/ @@ -109,12 +112,8 @@ .SECONDARY: tpcc/tpcctables.cc tpcc/tpcctables.o -%.o: %.cc - $(COMPILE.cc) $(OUTPUT_OPTION) $< - %.cc %.hh: %.lzz lzz -hx hh -sx cc -hl -sl -hd -sd $< - python -c 'pars = file("lambda_impl.clamp_h").read().split("\n\n"); hh = file("main.hh").read(); print >> file("main.cc", "a"), pars[-1]; print >> file("main.hh", "w"), "\n\n".join(pars[:-1] + [hh])' %.pb.cc: %.proto protoc --cpp_out=. $< @@ -124,11 +123,12 @@ %.lzz: %.lzz.clamp rm -f $@ - clamp < $< | sed '1d' > $@ + mkdir -p clamp/ + clamp --outdir clamp/ --prefix $(basename $@) < $< | \ + sed "$$( echo -e '1i\\\n\#hdr\n1a\\\n\#end' )" | \ + sed "$$( echo -e '$$i\\\n\#src\n$$a\\\n\#end' )" > $@ chmod -w $@ -main.o: ser.h - all.h: fgrep '#include' main.lzz.clamp > all.h @@ -136,7 +136,9 @@ $(COMPILE.cc) $(PBHDRS) $(OUTPUT_OPTION) $< clean: - rm -f $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) main.lzz *.clamp_h + rm -rf clamp/ $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) \ + main.lzz main2.lzz main.cc main.hh main2.hh main2.cc \ + util.cc util.hh tpcc.lzz tpcc.hh tpcc.cc make -C tpcc/ clean distclean: clean @@ -159,5 +161,5 @@ p2: p2.cc $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) $(OUTPUT_OPTION) -ser: ser.cc ser.h ydb.pb.o +ser: ser.cc msg.h ydb.pb.o $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) $(OUTPUT_OPTION) Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-20 05:35:47 UTC (rev 1312) +++ ydb/trunk/src/main.lzz.clamp 2009-03-20 07:58:28 UTC (rev 1313) @@ -1,103 +1,52 @@ #hdr -#define __STDC_FORMAT_MACROS +#include "unsetprefs.h" #include <boost/archive/binary_iarchive.hpp> #include <boost/archive/binary_oarchive.hpp> #include <boost/bind.hpp> #include <boost/foreach.hpp> -#include <boost/program_options.hpp> -#include <boost/range/iterator_range.hpp> -#include <boost/scoped_array.hpp> -#include <boost/shared_ptr.hpp> +//#include <boost/range/iterator_range.hpp> +//#include <boost/shared_ptr.hpp> #include <boost/tuple/tuple.hpp> #include <commons/assert.h> -#include <commons/memory.h> #include <commons/nullptr.h> -#include <commons/rand.h> -#include <commons/snap_map.h> #include <commons/st/st.h> #include <commons/time.h> -#include <commons/unique_ptr.h> -#include <csignal> // sigaction etc. -#include <cstdio> -#include <cstring> // strsignal #include <fstream> // ofstream -#include <google/dense_hash_map> -#include <google/protobuf/io/zero_copy_stream_impl.h> -#include <gtest/gtest.h> -#include <inttypes.h> // PRId64 #include <iostream> -#include <malloc.h> -#include <map> -#include <netinet/in.h> // in_addr etc. -#include <set> -#include <sys/socket.h> // getpeername -#include <sys/types.h> // ssize_t -#include <tr1/unordered_map> -#include <unistd.h> // pipe, write, sync #include <vector> -#include "ser.h" -#include "tpcc/clock.h" -#include "tpcc/randomgenerator.h" -#include "tpcc/tpccclient.h" -#include "tpcc/tpccgenerator.h" -#include "tpcc/tpcctables.h" -#include "ydb.pb.h" +#include "msg.h" +#include "util.hh" +#include "setprefs.h" -#define function boost::function -#define foreach BOOST_FOREACH -#define shared_ptr boost::shared_ptr -#define ref boost::ref -#define tuple boost::tuple -#define make_tuple boost::make_tuple - using namespace boost; using namespace boost::archive; using namespace commons; -using namespace google; -using namespace google::protobuf::io; using namespace std; -using namespace std::tr1; -using namespace testing; using namespace ydb; -using namespace ydb::pb; using namespace ydb::msg; #end -//#define map_t unordered_map -//#define map_t map -//#define map_t dense_hash_map -#define map_t snap_map -typedef map_t<int, int> mii; -typedef mii::value_type entry; +#src +#include <unistd.h> // pipe, write, sync +#end typedef tuple<sized_array<char>, char*, char*> chunk; -//typedef unique_ptr<Recovery> recovery_t; typedef commons::array<char> recovery_t; -template<typename T> void init_map(T &map) {} -template<> void init_map(dense_hash_map<int, int> &map) { - map.set_empty_key(-1); - map.set_deleted_key(-2); -} -template<> void init_map(snap_map<int, int> &map) { - map.set_empty_key(-1); - map.set_deleted_key(-2); -} - // Configuration. st_utime_t timeout; int yield_interval, accept_joiner_seqno, issuing_interval, min_ops, max_ops, stop_on_seqno, batch_size, handle_responses_display, fail_seqno, catch_up_display, issue_display, nwarehouses, process_display; -size_t accept_joiner_size, buf_size, read_buf_size; +size_t accept_joiner_size, read_buf_size; bool yield_during_build_up, yield_during_catch_up, dump, show_updates, - count_updates, stop_on_recovery, general_txns, profile_threads, - debug_threads, multirecover, disk, debug_memory, use_pwal, use_twal, + count_updates, stop_on_recovery, general_txns, + disk, debug_memory, use_pwal, use_twal, use_pb, use_pb_res, g_caught_up, rec_pwal, rec_twal, do_tpcc, - suppress_txn_msgs, fake_bcast, force_ser, fake_exec, ship_log; -long long timelim, read_thresh, write_thresh; + suppress_txn_msgs, force_ser, fake_exec, ship_log; +long long timelim, read_thresh; // Control. st_intr_bool stop_hub, kill_hub; @@ -112,147 +61,6 @@ int updates; /** - * Convenience function for calculating percentages. - */ -template<typename T> -inline double pct(T sub, T tot) -{ - return 100 * double(sub) / double(tot); -} - -/** - * Convenience class for performing long-jumping break. - */ -class break_exception : public std::exception {}; - -/** - * The list of all threads. Keep track of these so that we may cleanly shut - * down all threads. - */ -set<st_thread_t> threads; - -/** - * RAII for adding/removing the current thread from the global threads set. - */ -class thread_eraser -{ - public: - thread_eraser() { threads.insert(st_thread_self()); } - ~thread_eraser() { threads.erase(st_thread_self()); } -}; - -/** - * For debug/error-printing purposes. - */ -map<st_thread_t, string> threadnames; -st_thread_t last_thread; - -/** - * For profiling. - */ -map<st_thread_t, long long> threadtimes; -long long thread_start_time; - -/** - * Look up thread name, or just show thread ID. - */ -inline string -threadname(st_thread_t t = st_thread_self()) { - if (threadnames.find(t) != threadnames.end()) { - return threadnames[t]; - } else { - return lexical_cast<string>(t); - } -} - -/** - * Debug function for thread names. Remember what we're switching from. - */ -inline void -switch_out_cb() -{ - if (debug_threads) last_thread = st_thread_self(); - if (profile_threads) - threadtimes[st_thread_self()] += current_time_millis() - thread_start_time; -} - -/** - * Debug function for thread names. Show what we're switching from/to. - */ -inline void switch_in_cb() -{ - if (debug_threads && last_thread != st_thread_self()) { - cout << "switching"; - if (last_thread != 0) cout << " from " << threadname(last_thread); - cout << " to " << threadname() << endl; - } - if (profile_threads) - thread_start_time = current_time_millis(); -} - -/** - * Print to cerr a thread exception. - */ -ostream& -cerr_thread_ex(const std::exception &ex) -{ - return cerr << "exception in thread " << threadname() - << ": " << ex.what(); -} - -/** - * Delegate for running thread targets. - * \param[in] f The function to execute. - * \param[in] intr Whether to signal stop_hub on an exception. - */ -void -my_spawn_helper(const function0<void> f, bool intr) -{ - thread_eraser eraser; - try { - f(); - } catch (std::exception &ex) { - cerr_thread_ex(ex) << (intr ? "; interrupting!" : "") << endl; - if (intr) stop_hub.set(); - } -} - -/** - * Spawn a thread using ST but wrap it in an exception handler that interrupts - * all other threads (hopefully causing them to unwind). - * \param[in] f The function to execute. - * \param[in] intr Whether to signal stop_hub on an exception. Not actually - * used anywhere. - */ -st_thread_t -my_spawn(const function0<void> &f, string name, bool intr = false) -{ - st_thread_t t = st_spawn(bind(my_spawn_helper, f, intr)); - threads.insert(t); - threadnames[t] = name; - return t; -} - -char * -show_sockaddr(st_netfd_t fd) -{ - sockaddr_in sa; - socklen_t salen = sizeof sa; - check0x(getpeername(st_netfd_fileno(fd), - reinterpret_cast<sockaddr*>(&sa), - &salen)); - return inet_ntoa(sa.sin_addr); -} - -map<st_netfd_t, string> nfdnames; - -inline const string& -nfd2name(st_netfd_t fd) -{ - return nfdnames[fd]; -} - -/** * Used by the leader to bookkeep information about replicas. */ class replica_info @@ -309,95 +117,10 @@ const vector<st_netfd_t> &rs_; }; +#if 0 st_channel<pair<st_netfd_t, shared_ptr<string> > > msgs; /** - * Adapter for arrays to look like strings (for PB serialization). - */ -class ser_array -{ - commons::array<char> a_; - size_t size_; -public: - ser_array(size_t size = buf_size) : a_(size), size_(0) {} - char *data() const { return a_.get(); } - size_t size() const { return size_; } - void clear() { size_ = 0; } - void stretch(size_t size) { - if (size > a_.size()) - a_.reset(new char[size], size); - size_ = size; - } -}; - -//typedef string ser_t; -typedef ser_array ser_t; - -template<typename T> -void -ser(writer &w, const T &msg) -{ - uint32_t len = msg.ByteSize(); - w.mark(); - w.reserve(len); - check(msg.SerializeToArray(w.cur(), len)); - w.skip(len); -} - -/** - * Serialization. - * - * TODO: experiment with which method is the fastest: using a string as shown - * here or computing the bytesize then allocating (or grabbing/reserving) the - * array. - */ -template<typename T> -void -ser(string &s, const T &msg) -{ - // Serialize message to a buffer. - uint32_t len; - s.append(sizeof len, '\0'); - check(msg.AppendToString(&s)); - - // Warn if the message is large. - if (s.size() > 1000000) - cout << "serializing large message of " << s.size() << " bytes" << endl; - - // Prefix the message with a four-byte length. - len = htonl(static_cast<uint32_t>(s.size() - sizeof len)); - char *plen = reinterpret_cast<char*>(&len); - copy(plen, plen + sizeof len, s.begin()); -} - -template<typename T> -inline void -ser(ser_array &s, const T &msg) -{ - int len = msg.ByteSize(); - - // Grow the array as needed. - s.stretch(len + sizeof(uint32_t)); - - // Serialize message to a buffer with four-byte length prefix. - check(msg.SerializeToArray(s.data() + sizeof(uint32_t), len)); - *reinterpret_cast<uint32_t*>(s.data()) = htonl(uint32_t(len)); -} - -/** - * Serialization. - */ -template<typename T> -inline void -ser(ostream &s, const T &msg) -{ - uint32_t len = htonl(uint32_t(msg.ByteSize())); - s.write(reinterpret_cast<const char*>(&len), sizeof len); - check(msg.SerializeToOstream(&s)); -} - -#if 0 -/** * The worker that performs the actual broadcasting. */ void @@ -431,193 +154,6 @@ } #endif -/** - * Perform an st_write but warn if it took over write_thresh ms. - */ -void -st_timed_write(st_netfd_t dst, const void *buf, size_t len) -{ - long long before_write = -1; - if (write_thresh > 0) { - before_write = current_time_millis(); - } - - checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), - static_cast<ssize_t>(len)); - - if (write_thresh > 0) { - long long write_time = current_time_millis() - before_write; - if (write_time > write_thresh) { - cout << "thread " << threadname() << " write of " << len - << " bytes to dst " << show_sockaddr(dst) << " blocked for " - << write_time << " ms" << endl; - } - } -} - -/** - * Send a message to some destinations. - */ -inline void -bcastbuf(const vector<st_netfd_t> &dsts, const ser_t &msg) -{ - if (!fake_bcast) { - foreach (st_netfd_t dst, dsts) { - st_timed_write(dst, msg.data(), msg.size()); - } - } -} - -/** - * Send a message to some destinations, using whichever method of network IO - * was chosen (sync or async). - */ -template<typename T> -inline void -bcastmsg(const vector<st_netfd_t> &dsts, const T &msg) -{ - ser_t s; - ser(s, msg); - bcastbuf(dsts, s); -} - -/** - * Send a message to a single recipient. - */ -inline void -sendbuf(st_netfd_t dst, const ser_t &msg) -{ - if (!fake_bcast) - st_timed_write(dst, msg.data(), msg.size()); -} - -/** - * Send a message to a single recipient. - */ -template<typename T> -inline void -sendmsg(st_netfd_t dst, const T &msg) -{ - ser_t s; - ser(s, msg); - sendbuf(dst, s); -} - -/** - * Read a message. This is done in two steps: first by reading the length - * prefix, then by reading the actual body. This function also provides a way - * to measure how much time is spent actually reading the message from the - * network. Such measurement only makes sense for large messages which take a - * long time to receive. - * - * \param[in] src The socket from which to read. - * - * \param[in] msg The protobuf to read into. - * - * \param[out] start_time If not null, record the time at which we start to - * receive the message (after the length is received). - * - * \param[out] stop_time If not null, record the time at which we finish - * receiving the message (before we deserialize the protobuf). - * - * \param[out] len If not null, record the size of the serialized message - * in bytes. - * - * \param[in] timeout on each of the two read operations (first one is on - * length, second one is on the rest). - * - * \return The length of the serialized message. - */ -template <typename T> -size_t -readmsg(st_netfd_t src, T & msg, long long *start_time = nullptr, long long - *stop_time = nullptr, st_utime_t timeout = ST_UTIME_NO_TIMEOUT) -{ - // Read the message length. - uint32_t len; - checkeqnneg(st_read_fully(src, static_cast<void*>(&len), sizeof len, - timeout), - static_cast<ssize_t>(sizeof len)); - if (start_time != nullptr) - *start_time = current_time_millis(); - len = ntohl(len); - - // Parse the message body. Try stack-allocation if possible. - scoped_array<char> sbuf; - char *buf; - if (len <= 4096) buf = reinterpret_cast<char*>(alloca(len)); - else sbuf.reset(buf = new char[len]); - checkeqnneg(st_read_fully(src, buf, len, timeout), int(len)); - if (stop_time != nullptr) - *stop_time = current_time_millis(); - check(msg.ParseFromArray(buf, len)); - - return len; -} - -/** - * Same as the above readmsg(), but returns an internally constructed message. - * This is a "higher-level" readmsg() that relies on return-value optimization - * for avoiding unnecessary copies. - */ -template <typename T> -inline T -readmsg(st_netfd_t src, st_utime_t timeout = ST_UTIME_NO_TIMEOUT) -{ - T msg; - readmsg(src, msg, nullptr, nullptr, timeout); - return msg; -} - -/** - * Same as the above readmsg() but uses an st_reader instead of a raw - * st_netfd_t. - */ -template <typename T> -inline void -readmsg(st_reader &src, T & msg) -{ - managed_array<char> a = src.read(sizeof(uint32_t)); - uint32_t len = ntohl(*reinterpret_cast<const uint32_t*>(a.get())); - check(msg.ParseFromArray(src.read(len), len)); -} - -template<typename T> -inline void -readmsg(anchored_stream_reader &src, T &msg) -{ - uint32_t len = ntohl(src.read<uint32_t>()); - check(msg.ParseFromArray(checkpass(src.read(len)), len)); -} - -template<typename T> -inline void -readmsg(istream &src, T &msg) -{ - uint32_t len; - src.read(reinterpret_cast<char*>(&len), sizeof len); - len = ntohl(len); -#if 0 - IstreamInputStream iis(&src); - LimitingInputStream lis(&iis, len); - check(msg.ParseFromZeroCopyStream(&lis)); -#else - char buf[len]; - src.read(buf, len); - check(msg.ParseFromArray(buf, len)); -#endif -} - -inline uint32_t -readlen(istream &src) -{ - uint32_t len; - src.read(reinterpret_cast<char*>(&len), sizeof len); - len = ntohl(len); - ASSERT(len < 10000); - return len; -} - enum { op_del, op_write, op_commit }; /** @@ -665,971 +201,10 @@ }; // Globals -mii g_map; wal *g_wal; txn_wal *g_twal; //tpcc_wal *g_tpcc_wal; -/** - * Keep issuing transactions to the replicas. - */ -template<typename Types> -void -issue_txns(st_channel<replica_info> &newreps, int &seqno, - st_bool &accept_joiner) -{ - typedef typename Types::TxnBatch TxnBatch; - typedef typename Types::Txn Txn; - typedef typename Types::Op Op; - - Op_OpType types[] = {Op::read, Op::write, Op::del}; - vector<st_netfd_t> fds; - long long start_time = current_time_millis(); - - finally f(lambda () { - showtput("issued", current_time_millis(), __ref(start_time), __ref(seqno), - 0); - }); - - commons::array<char> rbuf(read_buf_size), wbuf(buf_size); - reader r(nullptr, rbuf.get(), rbuf.size()); - function<void(const void*, size_t)> fn; - if (use_twal) - fn = bind(&txn_wal::logbuf, g_twal, _1, _2); - else - fn = lambda(const void *buf, size_t len) { - foreach (st_netfd_t dst, __ref(fds)) - st_timed_write(dst, buf, len); - }; - - char *real_wbuf = newreps.empty() ? rbuf.get() : wbuf.get(); - size_t real_wbuf_size = newreps.empty() ? rbuf.size() : wbuf.size(); - writer w(fn, real_wbuf, real_wbuf_size); - stream s(r,w); - scoped_ptr<TxnBatch> pbatch(new_TxnBatch<TxnBatch>(s)); - TxnBatch batch = *pbatch; - if (Types::is_pb()) - for (int t = 0; t < batch_size; ++t) - batch.add_txn(); - - ser_t serbuf; - while (!stop_hub) { - w.mark(); - batch.Clear(); - - // Did we get a new member? If so, notify an arbitrary member (the first - // one) to prepare to send recovery information (by sending an - // empty/default Txn). - // XXX rec_pwal - if (!newreps.empty() && seqno > 0 && !rec_pwal) { - start_txn(batch); - fin_txn(batch); - // TODO: verify that this made the catch-up stream more efficient, - // starting it only at the point necessary - w.mark_and_flush(); - if (Types::is_pb()) { - if (multirecover) bcastmsg(fds, batch); - else sendmsg(fds[0], batch); - } - batch.Clear(); - } - // Bring in any new members. - // TODO more efficient: copy/extend/append - while (!newreps.empty()) { - fds.push_back(newreps.take().fd()); - } - - // Generate some random transactions. - start_txn(batch); - for (int t = 0; t < batch_size && !stop_hub; ++t) { - char *txn_start = w.cur(); - Txn &txn = *batch.add_txn(); - txn.set_seqno(seqno); - int count = randint(min_ops, max_ops + 1); - start_op(txn); - for (int o = 0; o < count; ++o) { - Op *op = txn.add_op(); - int rtype = general_txns ? randint(3) : 1, - rkey = randint(), - rvalue = randint(); - op->set_type(types[rtype]); - op->set_key(rkey); - op->set_value(rvalue); - } - fin_op(txn); - - // Process immediately if not bcasting. - if (fds.empty()) { - --seqno; - r.reset_range(txn_start, w.cur()); - if (!Types::is_pb()) txn.Clear(); - process_txn<Types, pb_traits>(g_map, txn, seqno, nullptr); - } - - // Checkpoint. - if (check_interval(seqno, yield_interval)) st_sleep(0); - if (check_interval(seqno, issue_display)) { - cout << "issued txn " << seqno << endl; - if (timelim > 0 && current_time_millis() - start_time > timelim) { - cout << "time's up; issued " << seqno << " txns in " << timelim - << " ms" << endl; - stop_hub.set(); - } - } - - // For debugging purposes. - if (issuing_interval > 0) { - st_sleep(issuing_interval); - } - - // Are we to accept a new joiner? - if (seqno == accept_joiner_seqno) { - accept_joiner.set(); - } - - // Set the stopping seqno. - if (seqno == stop_on_seqno) { - cout << "stopping on issue of seqno " << seqno << endl; - stop_hub.set(); - } - - ++seqno; - } - fin_txn(batch); - - bool do_bcast = !fds.empty() && !suppress_txn_msgs; - if (Types::is_pb()) { - // Broadcast/log/serialize. - if (force_ser || do_bcast || use_twal) { - serbuf.clear(); - ser(serbuf, batch); - if (do_bcast) bcastbuf(fds, serbuf); - if (use_twal) g_twal->logbuf(serbuf); - } - } else { - // Reset if we have nobody to send to (incl. disk) or if we actually have - // no txns (possible due to loop structure; want to avoid to avoid - // confusing with the 0-txn message signifying "prepare a recovery msg"). - if (!do_bcast && !use_twal) { - w.reset(); - } - } - - // Pause? - if (do_pause) - do_pause.waitreset(); - } - - // This means "The End." - if (!fds.empty()) { - w.mark(); - batch.Clear(); - start_txn(batch); - Txn &txn = *batch.add_txn(); - txn.set_seqno(-1); - start_op(txn); - fin_op(txn); - fin_txn(batch); - if (Types::is_pb()) bcastmsg(fds, batch); - w.mark_and_flush(); - } -} - -#if 0 -template<typename Types, typename RTypes> -void -process_txn_ext(mii &map, const typename Types::Txn &txn, int &seqno, - typename RTypes::Response *res, ext_map ext) -{ - response -} -#endif - -/** - * Process a transaction: update DB state (incl. seqno) and send response to - * leader. - */ -template<typename Types, typename RTypes> -void -process_txn(mii &map, const typename Types::Txn &txn, int &seqno, - typename RTypes::Response *res) -{ - typedef typename Types::Txn Txn; - typedef typename Types::Op Op; - checkeq(txn.seqno(), seqno + 1); - seqno = txn.seqno(); - if (res != nullptr) { - res->set_seqno(seqno); - res->set_caught_up(true); - start_result(*res); - } - if (!fake_exec) { - for (int o = 0; o < txn.op_size(); ++o) { - const Op &op = txn.op(o); - const char type = op.type(); - const int key = op.key(); - mii::iterator it = map.find(key); - if (show_updates || count_updates) { - if (it != map.end()) { - if (show_updates) cout << "existing key: " << key << endl; - if (count_updates) ++updates; - } - } - switch (type) { - case Op::read: - if (res != nullptr) { - if (it == map.end()) res->add_result(0); - else res->add_result(it->second); - } - break; - case Op::write: - { - int value = op.value(); - if (use_pwal) g_wal->logwrite(key, value); - if (it == map.end()) map[key] = value; - else it->second = value; - break; - } - case Op::del: - if (it != map.end()) { - if (use_pwal) g_wal->logdel(key); - map.erase(it); - } - break; - } - } - } - if (res != nullptr) - fin_result(*res); - if (use_pwal) g_wal->logcommit(); -} - -void -showdatarate(const char *action, streamoff len, long long time) -{ - cout << action << " of " << len << " bytes in " << time << " ms (" - << double(len) / double(time) / 1000 << " MB/s)" << endl; -} - -void -showdatarate(const char *action, size_t len, long long time) -{ - cout << action << " of " << len << " bytes in " << time << " ms (" - << double(len) / double(time) / 1000 << " MB/s)" << endl; -} - -void -showtput(const char *action, long long stop_time, long long start_time, - int stop_count, int start_count) -{ - long long time_diff = stop_time - start_time; - int count_diff = stop_count - start_count; - double rate = double(count_diff) * 1000. / double(time_diff); - cout << action << " " << count_diff << " txns [" - << start_count << ".." << stop_count - << "] in " << time_diff << " ms [" - << start_time << ".." << stop_time - << "] (" - << rate << " tps)" << endl; -} - -/** - * Return range * part / nparts, but with proper casting. Assumes that part < - * nparts. - */ -inline int -interp(int range, int part, int nparts) { - return static_cast<int>(static_cast<long long>(range) * part / nparts); -} - -#src -TEST(interp_test, basics) { - EXPECT_EQ(0, interp(3, 0, 3)); - EXPECT_EQ(1, interp(3, 1, 3)); - EXPECT_EQ(2, interp(3, 2, 3)); - EXPECT_EQ(3, interp(3, 3, 3)); - - EXPECT_EQ(0, interp(RAND_MAX, 0, 2)); - EXPECT_EQ(RAND_MAX / 2, interp(RAND_MAX, 1, 2)); - EXPECT_EQ(RAND_MAX, interp(RAND_MAX, 2, 2)); -} -#end - -unique_ptr<TPCCTables> g_tables; - -void -mkres(TpccRes *res, const OrderStatusOutput &output) -{ - OrderStatusOutputMsg &msg = *res->mutable_order_status(); - msg.set_c_id(output.c_id); - msg.set_c_balance(output.c_balance); - msg.set_o_id(output.o_id); - msg.set_o_carrier_id(output.o_carrier_id); - foreach (const OrderStatusOutput::OrderLineSubset &src, output.lines) { - OrderLineSubsetMsg &dst = *msg.add_line(); - dst.set_ol_i_id(src.ol_i_id); - dst.set_ol_supply_w_id(src.ol_supply_w_id); - dst.set_ol_quantity(src.ol_quantity); - dst.set_ol_amount(src.ol_amount); - dst.set_ol_delivery_d(src.ol_delivery_d); - } - msg.set_c_first(output.c_first); - msg.set_c_middle(output.c_middle); - msg.set_c_last(output.c_last); - msg.set_o_entry_d(output.o_entry_d); -} - -void -mkres(TpccRes *res, const PaymentOutput &output) -{ - PaymentOutputMsg &msg = *res->mutable_payment(); - - WarehouseMsg &w = *msg.mutable_warehouse(); - w.set_w_id(output.warehouse.w_id); - w.set_w_tax(output.warehouse.w_tax); - w.set_w_ytd(output.warehouse.w_ytd); - w.set_w_name(output.warehouse.w_name); - w.set_w_street_1(output.warehouse.w_street_1); - w.set_w_street_2(output.warehouse.w_street_2); - w.set_w_city(output.warehouse.w_city); - w.set_w_state(output.warehouse.w_state); - w.set_w_zip(output.warehouse.w_zip); - - DistrictMsg &d = *msg.mutable_district(); - d.set_d_id(output.district.d_id); - d.set_d_w_id(output.district.d_w_id); - d.set_d_tax(output.district.d_tax); - d.set_d_ytd(output.district.d_ytd); - d.set_d_next_o_id(output.district.d_next_o_id); - d.set_d_name(output.district.d_name); - d.set_d_street_1(output.district.d_street_1); - d.set_d_street_2(output.district.d_street_2); - d.set_d_city(output.district.d_city); - d.set_d_state(output.district.d_state); - d.set_d_zip(output.district.d_zip); - - CustomerMsg &c = *msg.mutable_customer(); - c.set_c_id(output.customer.c_id); - c.set_c_d_id(output.customer.c_d_id); - c.set_c_w_id(output.customer.c_w_id); - c.set_c_credit_lim(output.customer.c_credit_lim); - c.set_c_discount(output.customer.c_discount); - c.set_c_balance(output.customer.c_balance); - c.set_c_ytd_payment(output.customer.c_ytd_payment); - c.set_c_payment_cnt(output.customer.c_payment_cnt); - c.set_c_delivery_cnt(output.customer.c_delivery_cnt); - c.set_c_first(output.customer.c_first); - c.set_c_middle(output.customer.c_middle); - c.set_c_last(output.customer.c_last); - c.set_c_street_1(output.customer.c_street_1); - c.set_c_street_2(output.customer.c_street_2); - c.set_c_city(output.customer.c_city); - c.set_c_state(output.customer.c_state); - c.set_c_zip(output.customer.c_zip); - c.set_c_phone(output.customer.c_phone); - c.set_c_since(output.customer.c_since); - c.set_c_credit(output.customer.c_credit); - c.set_c_data(output.customer.c_data); -} - -void -process_tpcc(const TpccReq &req, int &seqno, TpccRes *res) -{ - checkeq(req.seqno(), seqno + 1); - ++seqno; - if (res != nullptr) { - res->Clear(); - res->set_seqno(seqno); - } - // First three are read-only txns, so doesn't make sense to exec if no res to - // put results. They constitute only 8% of the workload. - if (req.has_stock_level()) { - if (res != nullptr) { - const StockLevelMsg &sl = req.stock_level(); - int result = g_tables->stockLevel(sl.warehouse_id(), sl.district_id(), sl.threshold()); - StockLevelOutputMsg &msg = *res->mutable_stock_level(); - msg.set_result(result); - } - } else if (req.has_order_status_1()) { - if (res != nullptr) { - const OrderStatusMsg1 &os = req.order_status_1(); - OrderStatusOutput output; - g_tables->orderStatus(os.warehouse_id(), os.district_id(), os.customer_id(), &output); - mkres(res, output); - } - } else if (req.has_order_status_2()) { - if (res != nullptr) { - const OrderStatusMsg2 &os = req.order_status_2(); - OrderStatusOutput output; - g_tables->orderStatus(os.warehouse_id(), os.district_id(), os.c_last().c_str(), &output); - mkres(res, output); - } - } else if (req.has_new_order()) { - const NewOrderMsg &no = req.new_order(); - vector<NewOrderItem> items(no.item_size()); - for (int i = 0; i < no.item_size(); ++i) { - NewOrderItem &dst = items[i]; - const NewOrderItemMsg &src = no.item(i); - dst.i_id = src.i_id(); - dst.ol_supply_w_id = src.ol_supply_w_id(); - dst.ol_quantity = src.ol_quantity(); - } - NewOrderOutput output; - g_tables->newOrder(no.warehouse_id(), no.district_id(), - no.customer_id(), items, no.now().c_str(), - &output); - if (res != nullptr) { - NewOrderOutputMsg &msg = *res->mutable_new_order(); - msg.set_w_tax(output.w_tax); - msg.set_d_tax(output.d_tax); - msg.set_o_id(output.o_id); - msg.set_c_discount(output.c_discount); - msg.set_total(output.total); - foreach (const NewOrderOutput::ItemInfo &src, output.items) { - ItemInfoMsg &dst = *msg.add_item(); - dst.set_s_quantity(src.s_quantity); - dst.set_i_price(src.i_price); - dst.set_ol_amount(src.ol_amount); - dst.set_brand_generic(src.brand_generic); - dst.set_i_name(src.i_name); - } - msg.set_c_last(output.c_last); - msg.set_c_credit(output.c_credit); - msg.set_status(output.status); - } - } else if (req.has_payment_1()) { - const PaymentMsg1 &p = req.payment_1(); - PaymentOutput output; - g_tables->payment(p.warehouse_id(), p.district_id(), p.c_warehouse_id(), - p.c_district_id(), p.customer_id(), p.h_amount(), - p.now().c_str(), &output); - if (res != nullptr) mkres(res, output); - } else if (req.has_payment_2()) { - const PaymentMsg2 &p = req.payment_2(); - PaymentOutput output; - g_tables->payment(p.warehouse_id(), p.district_id(), p.c_warehouse_id(), - p.c_district_id(), p.c_last().c_str(), p.h_amount(), - p.now().c_str(), &output); - if (res != nullptr) mkres(res, output); - } else if (req.has_delivery()) { - const DeliveryMsg &d = req.delivery(); - vector<DeliveryOrderInfo> orders; - g_tables->delivery(d.warehouse_id(), d.carrier_id(), d.now().c_str(), &orders); - if (res != nullptr) { - DeliveryOutputMsg &msg = *res->mutable_delivery(); - foreach (const DeliveryOrderInfo &src, orders) { - DeliveryOrderInfoMsg &dst = *msg.add_order(); - dst.set_d_id(src.d_id); - dst.set_o_id(src.o_id); - } - } - } else { - ASSERT(false); - } -} - -void -process_tpccs(st_netfd_t leader, int &seqno, - st_channel<recovery_t> &send_states, - st_channel<chunk> &backlog, int init_seqno, - int mypos, int nnodes) -{ - bool caught_up = init_seqno == 0; - // Means we're currently ignoring the incoming txns until we see a fail-ack - // from the leader. - bool depleting = false; - long long start_time = current_time_millis(), - time_failed = -1, - time_caught_up = caught_up ? start_time : -1; - int seqno_caught_up = caught_up ? seqno : -1; - // Used by joiner only to tell where we actually started (init_seqno is just - // the seqno reported by the leader in the Init message, but it may have - // issued more since the Init message). - int first_seqno = -1; - char *marker = nullptr; - int first_seqno_in_chunk = -1; - TpccReq req; - TpccRes res; - txn_wal &wal = *g_twal; - - function<void(anchored_stream_reader& reader)> overflow_fn = - lambda(anchored_stream_reader &reader) { - if (__ref(caught_up)) { - // Anchor should already be correctly set, so just shift down. - shift_reader(reader); - } else if (__ref(first_seqno_in_chunk) == __ref(seqno) + 1) { - // Has the replayer just caught up to the start of the chunk? - ASSERT(reader.buf().get() == reader.anchor()); - // Replay all messages up to but not included the current unprocessed - // message (which we may be in the middle of receiving, triggering this - // overflow). - process_buf(reader.anchor(), __ref(marker), __ref(req), __ref(seqno)); - // Update the anchor to point to the unprocessed message, so that we - // shift the unprocessed message down. - reader.anchor() = __ref(marker); - shift_reader(reader); - } else { - // Push onto backlog and put in new buffer. - ASSERT(reader.buf().get() == reader.anchor()); - __ref(backlog).push(make_tuple(reader.buf(), reader.anchor(), __ref(marker))); - reader.anchor() = __ref(marker); - replace_reader(reader); - cout << "added to backlog, now has " << __ref(backlog).queue().size() - << " chunks" << endl; - } - __ref(marker) = reader.buf().get(); - }; - - sized_array<char> rbuf(new char[read_buf_size], read_buf_size); - commons::array<char> wbuf(buf_size); - anchored_stream_reader reader(st_read_fn(leader), st_read_fully_fn(leader), - overflow_fn, rbuf.get(), rbuf.size()); - writer w(lambda(const void *buf, size_t len) { - st_write(__ref(leader), buf, len); - }, wbuf.get(), wbuf.size()); - - finally f(lambda () { - long long now = current_time_millis(); - showtput("processed", now, __ref(start_time), __ref(seqno), - __ref(init_seqno)); - if (!__ref(caught_up)) { - cout << "live-processing: never entered this phase (never caught up)" << endl; - } else { - showtput("live-processed", now, __ref(time_caught_up), __ref(seqno), - __ref(seqno_caught_up)); - } - __ref(send_states).push(recovery_t()); - __ref(w).mark_and_flush(); - }); - - function<void()> send_failure_msg = lambda() { - TpccRes &res = __ref(res); - writer &w = __ref(w); - res.Clear(); - res.set_seqno(-1); - ser(w, res); - w.mark_and_flush(); - }; - - while (true) - { - marker = reader.start(); - - { - st_intr intr(stop_hub); - readmsg(reader, req); - } - - if (req.seqno() == -1) { - // End of stream. - break; - } else if (req.seqno() == -2) { - // Prepare recovery msg. - send_states.push(make_tpcc_recovery(mypos, nnodes, seqno)); - } else { - - if (depleting) { - if (req.seqno() == -3) { - // Fail-ack. Should not be receiving anything until we resume. - failed.waitreset(); - send_failure_msg(); - // Note that we don't reset depleting; we want the next iteration to - // fall through to the next case in this if-else chain.... - - // Adjust reader so that the next xact (the first one after failure) - // will go to the start of the buffer; this is necessary for - // backlogging. - reader.set_anchor(); - shift_reader(reader); - } else if (!failed) { - // This is the first txn after resuming. Tell the recoverer task - // that this is the seqno to build up to (from another replica's - // log). - resume.push(req.seqno()); - depleting = false; - } - // Ignore all other messages. - } - - if (!depleting) { - if (req.seqno() == -3) { - // Ignore the fail-ack. - } else { - if (use_twal) wal.logbuf(marker, reader.start() - marker); - - // Backlog (auto/implicit) or process. - if (!caught_up) { - // If we were at the start of a new buffer (our chunk was recently reset). - if (reader.buf().get() == marker) - first_seqno_in_chunk = req.seqno(); - // If we fully caught up. - if (req.seqno() == seqno + 1) { - time_caught_up = current_time_millis(); - seqno_caught_up = seqno; - showtput("process_tpccs caught up; backlogged", - time_caught_up, start_time, seqno_caught_up, - first_seqno == -1 ? init_seqno - 1 : first_seqno); - caught_up = true; - } - } - if (caught_up) { - // Process. - process_tpcc(req, seqno, &res); - ser(w, res); - reader.set_anchor(); - } - - // Display/yield. - if (check_interval(req.seqno(), process_display)) - cout << (caught_up ? "processed req " : "backlogged req ") - << req.seqno() << endl; - if (check_interval(req.seqno(), yield_interval)) st_sleep(0); - - // Die. - if (fail_seqno > 0 && req.seqno() == fail_seqno) { - cout << "process_tpccs failing on seqno " << fail_seqno; - time_failed = current_time_millis(); - showtput("; live-processed ", time_failed, start_time, seqno, 0); - ASSERT(init_seqno == 0); - caught_up = false; - depleting = true; - seqno = -1; - - failed.set(); - send_failure_msg(); - } - } - } - - } - } - -} - -void -process_buf(char *begin, char *end, TpccReq &req, int &seqno) -{ - ASSERT(begin < end); - raw_reader reader(begin); - while (reader.ptr() < end) { - uint32_t len = ntohl(reader.read<uint32_t>()); - ASSERT(len < 10000); - ASSERT(reinterpret_cast<char*>(reader.ptr()) + len <= end); - check(req.ParseFromArray(reader.readptr(len), len)); - process_tpcc(req, seqno, nullptr); - if (check_interval(req.seqno(), yield_interval)) st_sleep(0); - if (check_interval(req.seqno(), process_display)) { - cout << "caught up to req " << req.seqno() << endl; - } - } -} - -recovery_t -make_tpcc_recovery(int mypos, int nnodes, int seqno) -{ - long long start_time = current_time_millis(); - cout << "serializing recovery, db state is now at seqno " - << seqno << ":" << endl; - g_tables->show(); - recovery_t recovery = g_tables->ser(mypos, nnodes, seqno); - showdatarate("serialized recovery", recovery.size(), - current_time_millis() - start_time); - return recovery; -} - -/** - * Actually do the work of executing a transaction and sending back the reply. - * - * \param[in] leader The connection to the leader. - * - * \param[in] map The data store. - * - * \param[in] seqno The sequence number last seen. This always starts at 0, - * but may be bumped up by the recovery procedure. - * - * \param[in] send_states Channel of snapshots of the database state to send to - * recovering nodes (sent to recover_joiner). - * - * \param[in] backlog The backlog of txns that need to be processed. - * - * \param[in] init_seqno The seqno that was sent in the Init message from the - * leader. The first expected seqno. - * - * \param[in] mypos This host's position in the Init message list. Used for - * calculating the sub-range of the map for which this node is responsible. - * - * \param[in] nnodes The total number nodes in the Init message list. - * - * \param[in] wal The WAL. - */ -template<typename Types, typename RTypes> -void -process_txns(st_netfd_t leader, mii &map, int &seqno, - st_channel<recovery_t> &send_states, - /* XXX st_channel<shared_ptr<pb::Txn> > &backlog */ - st_channel<chunk> &backlog, int init_seqno, - int mypos, int nnodes) -{ - typedef typename Types::TxnBatch TxnBatch; - typedef typename Types::Txn Txn; - typedef typename Types::Op Op; - typedef typename RTypes::Response Response; - typedef typename RTypes::ResponseBatch ResponseBatch; - - bool caught_up = init_seqno == 0; - long long start_time = current_time_millis(), - time_caught_up = caught_up ? start_time : -1; - int seqno_caught_up = caught_up ? seqno : -1; - // Used by joiner only to tell where we actually started (init_seqno is just - // the seqno reported by the leader in the Init message, but it may have - // issued more since the Init message). - int first_seqno = -1; - - sized_array<char> rbuf(new char[read_buf_size], read_buf_size); - commons::array<char> wbuf(buf_size); - st_reader reader(leader, rbuf.get(), rbuf.size()); - writer w(lambda(const void *buf, size_t len) { - checkeqnneg(st_write(__ref(leader), buf, len, ST_UTIME_NO_TIMEOUT), - static_cast<ssize_t>(len)); - }, wbuf.get(), wbuf.size()); - stream s(reader, w); - - finally f(lambda () { - long long now = current_time_millis(); - showtput("processed", now, __ref(start_time), __ref(seqno), - __ref(init_seqno)); - if (!__ref(caught_up)) { - cout << "live-processing: never entered this phase (never caught up)" << endl; - } else { - showtput("live-processed", now, __ref(time_caught_up), __ref(seqno), - __ref(seqno_caught_up)); - } - __ref(send_states).push(recovery_t()); - __ref(w).mark_and_flush(); - }); - - try { - scoped_ptr<TxnBatch> pbatch(new_TxnBatch<TxnBatch>(s)); - TxnBatch &batch = *pbatch; - scoped_ptr<ResponseBatch> presbatch(new_ResponseBatch<ResponseBatch>(s)); - ResponseBatch &resbatch = *presbatch; - ser_t serbuf; - char *first_start = reader.start(); - ASSERT(first_start == rbuf.get()); - const size_t headerlen = sizeof(uint32_t) + sizeof(short) + sizeof(int); - while (true) { - uint32_t prefix = 0; - char *start = reader.start(); - - // Will overflow on next few reads ("header")? - if (!caught_up && reader.unread() + reader.rem() < headerlen) { - sized_array<char> buf(new char[read_buf_size], read_buf_size); - memcpy(buf.get(), reader.start(), reader.unread()); - swap(buf, reader.buf()); - reader.reset_range(reader.buf().get(), reader.buf().get() + reader.unread()); - backlog.push(make_tuple(buf, first_start, start)); - first_start = reader.start(); - } - - if (Types::is_pb()) { - long long before_read = -1; - if (read_thresh > 0) { - before_read = current_time_millis(); - } - { - st_intr intr(stop_hub); - readmsg(reader, batch); - } - if (read_thresh > 0) { - long long read_time = current_time_millis() - before_read; - if (read_time > read_thresh) { - cout << "thread " << threadname() - << ": read took " << read_time << " ms" << endl; - } - } - } else { - st_intr intr(stop_hub); - prefix = ntohl(reader.read<uint32_t>()); - check(prefix < 10000); - batch.Clear(); - } - - if (batch.txn_size() > 0) { - const Txn &first_txn = batch.txn(0); - if (first_txn.seqno() < 0) { - break; - } else if (first_txn.seqno() > seqno + 1) { - // In backlogging mode? - - // Skip entire message, pushing it to the thread that's handling - // recovery for later processing once snapshot is received. - // TODO: implement and use anchors instead? - if (first_seqno == -1) - cout << "first seqno: " << (first_seqno = first_txn.seqno()) << endl; - - // Caught up? - if (first_seqno == seqno + 1) { - // Rewind so we process accumulated messages. - reader.reset_range(first_start, reader.end()); - continue; - } - - // About to overflow? - if (reader.unread() + reader.rem() < prefix + sizeof(uint32_t) - headerlen) { - // Move current partial message to new buffer. - sized_array<char> tmp(new char[read_buf_size], read_buf_size); - raw_writer ser(tmp.get()); - ser.write(prefix); - ser.write(short(batch.txn_size())); - ser.write(first_txn.seqno()); - memcpy(tmp.get() + headerlen, reader.start(), reader.unread()); - - // Swap the buffers. - swap(tmp, reader.buf()); - reader.reset_range(reader.buf().get() + headerlen, reader.buf().get() + headerlen + reader.unread()); - ASSERT(tmp.get() <= first_start && first_start < tmp.end()); - ASSERT(tmp.get() < start && start < tmp.end()); - ASSERT(first_start < start); - backlog.push(make_tuple(tmp, first_start, start)); - first_start = reader.buf().get(); - first_seqno = first_txn.seqno(); - } - - // Fill up rest of the message - ASSERT(reader.unread() + reader.rem() >= prefix + sizeof(uint32_t) - headerlen); - check0x(reader.accum(prefix + sizeof(uint32_t) - headerlen)); - } else { - // Regular transaction batch. - if (!caught_up) { - time_caught_up = current_time_millis(); - seqno_caught_up = seqno; - showtput("process_txns caught up; backlogged", - time_caught_up, start_time, seqno_caught_up, - first_seqno == -1 ? init_seqno - 1 : first_seqno); - caught_up = true; - } - w.mark(); - resbatch.Clear(); - start_res(resbatch); - for (int t = 0; t < batch.txn_size(); ++t) { - const Txn &txn = t == 0 ? first_txn : batch.txn(t); - Response *res = resbatch.add_res(); - process_txn<Types, RTypes>(map, txn, seqno, res); -#if 0 - if (!sending_recovery) { - process_txn<Types, RTypes>(map, txn, seqno, res); - } else { - process_txn_ext(map, txn, seqno, res, ext); - } -#endif - if (fake_exec && !Types::is_pb()) { - reader.skip(txn.op_size() * Op_Size); - } - - if (check_interval(txn.seqno(), yield_interval)) st_sleep(0); - if (check_interval(txn.seqno(), process_display)) { - cout << "processed txn " << txn.seqno() - << "; db size = " << map.size() - << "; seqno = " << seqno - << "; backlog.size = " << backlog.queue().size() << endl; - } - } - fin_res(resbatch); - if (RTypes::is_pb() && resbatch.res_size() > 0) { - serbuf.clear(); - ser(serbuf, resbatch); - sendbuf(leader, serbuf); - } - } - } else if (multirecover || mypos == 0) { - // Empty (default) TxnBatch means "generate a snapshot." - send_states.push(make_recovery(map, mypos, nnodes, seqno)); - } - } - } catch (break_exception &ex) { - } - -} - -#if 0 -template<typename mii> -unique_ptr<Recovery> -make_recovery(const mii &map, int mypos, int nnodes, int seqno) -{ - // TODO make this faster - cout << "generating recovery..." << endl; - unique_ptr<Recovery> recovery(new Recovery); - typedef ::map<int, int> mii_; - mii_ map_(map.begin(), map.end()); - mii_::const_iterator begin = - map_.lower_bound(multirecover ? interp(RAND_MAX, mypos, nnodes) : 0); - mii_::const_iterator end = multirecover && mypos < nnodes - 1 ? - map_.lower_bound(interp(RAND_MAX, mypos + 1, nnodes)) : map_.end(); - cout << "generating recovery over " << begin->first << ".." - << (end == map_.end() ? "end" : lexical_cast<string>(end->first)); - if (multirecover) - cout << " (node " << mypos << " of " << nnodes << ")"; - cout << endl; - long long start_snap = current_time_millis(); - foreach (const pii &p, make_iterator_range(begin, end)) { - Recovery_Pair *pair = recovery->add_pair(); - pair->set_key(p.first); - pair->set_value(p.second); - } - cout << "generating recovery took " - << current_time_millis() - start_snap << " ms" << endl; - recovery->set_seqno(seqno); - return move(recovery); -} -#endif - -template<typename mii> -recovery_t -make_recovery(const mii &map, int mypos, int nnodes, int seqno) -{ - return recovery_t(); -} - -struct recovery_header -{ - int seqno; - size_t count; - size_t total; - size_t size; -}; - -pair<size_t, size_t> -recovery_range(size_t size, int mypos, int nnodes) -{ - return make_pair(multirecover ? size * mypos / size_t(nnodes) : 0, - multirecover ? size * (mypos + 1) / size_t(nnodes) : size); -} - -template<> -recovery_t -make_recovery(const snap_map<int, int> &map, int mypos, int nnodes, int seqno) -{ - const commons::array<entry> &src = map.get_table(); - pair<size_t, size_t> range = recovery_range(src.size(), mypos, nnodes); - size_t begin = range.first, end = range.second; - ASSERT(end > begin); - recovery_header hdr = { seqno, end - begin, src.size(), map.size() }; - size_t bodylen = sizeof(entry) * hdr.count; - cout << "generating recovery of " << hdr.size << " records in " - << hdr.count << " slots (" - << bodylen << " bytes); range is [" - << begin << ".." << end << "]; seqno is " << hdr.seqno << endl; - long long start_time = current_time_millis(); - commons::array<char> recovery(sizeof(size_t) + sizeof hdr + bodylen); - raw_writer ser(recovery.begin()); - ser.write(recovery.size()); - ser.write(hdr); - memcpy(ser.ptr(), src.begin() + begin, bodylen); - showdatarate("serialized recovery", recovery.size(), - current_time_millis() - start_time); - return recovery; -} - class response_handler { public: @@ -1804,156 +379,6 @@ h.run<Types>(); } -class tpcc_response_handler -{ -public: - tpcc_response_handler(st_netfd_t replica, const int &seqno, int rid, - st_multichannel<long long> &recover_signals, - st_channel<st_netfd_t> &delreps, bool caught_up) - : - replica(replica), - seqno(seqno), - rid(rid), - recover_signals(recover_signals), - delreps(delreps), - caught_up(caught_up), - sub(recover_signals.subscribe()), - start_time(current_time_millis()), - recovery_start_time(caught_up ? -1 : start_time), - recovery_end_time(-1), - start_seqno(seqno), - recovery_start_seqno(caught_up ? -1 : seqno), - recovery_end_seqno(-1), - last_seqno(-1) - {} - - void run() { - finally f(bind(&tpcc_response_handler::cleanup, this)); - - commons::array<char> rbuf(read_buf_size), wbuf(buf_size); - st_reader reader(replica, rbuf.get(), rbuf.size()); - writer w(lambda(const void*, size_t) { - throw not_supported_exception("response handler should not be writing"); - }, wbuf.get(), wbuf.size()); - stream s(reader,w); - - long long last_display_time = current_time_millis(); - - function<void()> loop_cleanup = - bind(&tpcc_response_handler::loop_cleanup, this); - - TpccRes res; - - while (true) { - finally f(loop_cleanup); - - // Read the message, but correctly respond to interrupts so that we can - // cleanly exit (slightly tricky). - if (stopped_issuing && last_seqno + 1 == seqno) { - break; - } else { - st_intr intr(kill_hub); - readmsg(reader, res); - } - - if (res.seqno() == -1) { - st_intr intr(stop_hub); - cout << "got a failed node" << endl; - delreps.push(replica); - readmsg(reader, res); - last_seqno = seqno - 1; - } else { - - if (res.seqno() < last_seqno) - throw msg_exception(string("response seqno decreased from ") + - lexical_cast<string>(last_seqno) + " to " + - lexical_cast<string>(res.seqno())); - - if (!caught_up) { - long long now = current_time_millis(), time_diff = now - start_time; - caught_up = true; - recover_signals.push(now); - cout << rid << ": " << "recovering node caught up; took " - << time_diff << " ms" << endl; - // This will cause the program to exit eventually, but cleanly, such that - // the recovery time will be set first, before the eventual exit (which - // may not even happen in the current iteration). - if (stop_on_recovery) { - cout << "stopping on recovery" << endl; - stop_hub.set(); - } - } - - if (check_interval(res.seqno(), handle_responses_display)) { - cout << rid << ": " << "got response " << res.seqno() << " from " - << replica << "; "; - long long display_time = current_time_millis(); - showtput("handling", display_time, last_display_time, res.seqno(), - res.seqno() - handle_responses_display); - last_display_time = display_time; - } - if (check_interval(res.seqno(), yield_interval)) { - st_sleep(0); - } - last_seqno = res.seqno(); - - } - } - } - -private: - void loop_cleanup() { - // The first timestamp that comes down the subscription pipeline is the - // recovery start time, issued by the main thread. The second one is the - // recovery end time, issued by the response handler associated with the - // joiner. - if (recovery_start_time == -1 && !sub.empty()) { - recovery_start_time = sub.take(); - recovery_start_seqno = last_seqno; - cout << rid << ": "; - showtput("before recovery, finished", recovery_start_time, start_time, - recovery_start_seqno, 0); - } else if (recovery_end_time == -1 && !sub.empty()) { - recovery_end_time = sub.take(); - recovery_end_seqno = last_seqno; - cout << rid << ": "; - showtput("during recovery, finished roughly", recovery_end_time, - recovery_start_time, recovery_end_seqno, recovery_start_seqno); - } - } - - void cleanup() { - long long end_time = current_time_millis(); - cout << rid << ": "; - showtput("handled roughly", end_time, start_time, seqno, start_seqno); - if (recovery_end_time > -1) { - cout << rid << ": "; - showtput("after recovery, finished", end_time, recovery_end_time, - seqno, recovery_end_seqno); - } - } - - st_netfd_t replica; - const int &seqno; - int rid; - st_multichannel<long long> &recover_signals; - st_channel<st_netfd_t> &delreps; - bool caught_up; - st_channel<long long> ⊂ - long long start_time, recovery_start_time, recovery_end_time; - int start_seqno, recovery_start_seqno, recovery_end_seqno, last_seqno; -}; - -void -handle_tpcc_responses(st_netfd_t replica, const int &seqno, int rid, - st_multichannel<long long> &recover_signals, - st_channel<st_netfd_t> &delreps, bool caught_up) -{ - tpcc_response_handler h(replica, seqno, rid, recover_signals, delreps, - caught_up); - h.run(); -} - struct recreq { int start_seqno, end_seqno; }; @@ -2048,1305 +473,3 @@ cout << "AAAAAAAAAAAAAAAAAAAAAA" << endl; } } - -/** - * Run the leader. - */ -template<typename Types, typename RTypes> -void -run_leader(int minreps, uint16_t leader_port) -{ - cout << "starting as leader" << endl; - st_multichannel<long long> recover_signals; - - scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); - g_twal = twal.get(); - scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); - g_wal = pwal.get(); - - // Wait until all replicas have joined. - st_netfd_t listener = st_tcp_listen(leader_port); - st_closing close_listener(listener); - vector<replica_info> replicas; - st_closing_all_infos close_replicas(replicas); - cout << "waiting for at least " << minreps << " replicas to join" << endl; - for (int i = 0; i < minreps; ++i) { - st_netfd_t fd; - { - st_intr intr(stop_hub); - fd = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - } - Join join = readmsg<Join>(fd); - replicas.push_back(replica_info(fd, static_cast<uint16_t>(join.port()))); - } - cout << "got all " << minreps << " replicas" << endl; - - // Construct the initialization message. - Init init; - init.set_txnseqno(0); - init.set_multirecover(multirecover); - foreach (replica_info r, replicas) { - SockAddr *psa = init.add_node(); - psa->set_host(r.host()); - psa->set_port(r.port()); - } - - // Send init to each initial replica. - foreach (replica_info r, replicas) { - init.set_yourhost(r.host()); - sendmsg(r.fd(), init); - } - - // Start dispatching queries. - st_bool accept_joiner; - int seqno = 0; - st_channel<replica_info> newreps; - st_channel<st_netfd_t> delreps; - foreach (const replica_info &r, replicas) newreps.push(r); - function<void()> f; - if (do_tpcc) - f = bind(issue_tpcc, ref(newreps), ref(delreps), ref(seqno), ref(accept_joiner)); - else - f = bind(issue_txns<Types>, ref(newreps), ref(seqno), ref(accept_joiner)); - st_joining join_issue_txns(my_spawn(f, "issue_txns")); - - finally fin(bind(summarize, "LEADER", ref(seqno))); - - try { - // Start handling responses. - st_thread_group handlers; - int rid = 0; - foreach (replica_info r, replicas) { - function<void()> fn; - if (do_tpcc) - fn = bind(handle_tpcc_responses, r.fd(), ref(seqno), rid++, - ref(recover_signals), ref(delreps), true); - else - fn = bind(handle_responses<RTypes>, r.fd(), ref(seqno), rid++, - ref(recover_signals), true); - handlers.insert(my_spawn(fn, "handle_responses")); - } - - // Accept the recovering node, and tell it about the online replicas. - st_netfd_t joiner; - try { - st_intr intr(stop_hub); - joiner = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - accept_joiner.waitset(); - } catch (std::exception &ex) { - string s(ex.what()); - if (s.find("Interrupted system call") == s.npos) - throw; - else - throw break_exception(); - } - Join join = readmsg<Join>(joiner); - replicas.push_back(replica_info(joiner, static_cast<uint16_t>(join.port()))); - cout << "setting seqno to " << seqno << endl; - init.set_txnseqno(seqno); - init.set_yourhost(replicas.back().host()); - sendmsg(joiner, init); - recover_signals.push(current_time_millis()); - - // Start streaming txns to joiner. - cout << "start streaming txns to joiner" << endl; - function<void()> handle_responses_joiner_fn; - if (do_tpcc) - handle_responses_joiner_fn = - bind(handle_tpcc_responses, joiner, ref(seqno), rid++, - ref(recover_signals), ref(delreps), false); - else - ... [truncated message content] |
From: <yan...@us...> - 2009-03-20 17:45:49
|
Revision: 1314 http://assorted.svn.sourceforge.net/assorted/?rev=1314&view=rev Author: yangzhang Date: 2009-03-20 17:45:38 +0000 (Fri, 20 Mar 2009) Log Message: ----------- - added precompiled headers - renamed main2 to ydb - moved some more code around Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/msg.h ydb/trunk/src/stxn.lzz.clamp Added Paths: ----------- ydb/trunk/src/ydb.lzz.clamp Removed Paths: ------------- ydb/trunk/src/main2.lzz.clamp Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-03-20 07:58:28 UTC (rev 1313) +++ ydb/trunk/src/Makefile 2009-03-20 17:45:38 UTC (rev 1314) @@ -1,7 +1,9 @@ TARGET := ydb WTF := wtf -LZZS := $(patsubst %.clamp,%,$(wildcard *.lzz.clamp)) +CLAMPS := $(wildcard *.lzz.clamp) +PURELZZS := $(foreach lzz,$(wildcard *.lzz),$(if $(wildcard $(lzz).clamp),,$(lzz))) +LZZS := $(patsubst %.clamp,%,$(CLAMPS)) $(PURELZZS) LZZHDRS := $(foreach lzz,$(LZZS),$(patsubst %.lzz,%.hh,$(lzz))) LZZSRCS := $(foreach lzz,$(LZZS),$(patsubst %.lzz,%.cc,$(lzz))) LZZOBJS := $(foreach lzz,$(LZZS),$(patsubst %.lzz,%.o,$(lzz))) @@ -38,13 +40,13 @@ endif # CXX := $(WTF) ag++ -k --Xcompiler # $(CXX) CXX := $(WTF) ccache $(CXX) -pipe -LD := $(CXX) +CC := $(CXX) # for linking LDFLAGS := -pthread $(GPROF) LDLIBS := -lstx -lst -lresolv -lprotobuf -lgtest \ -lboost_program_options-gcc43-mt -lboost_thread-gcc43-mt \ -lboost_serialization-gcc43-mt $(PPROF) -CXXFLAGS := $(OPT) -pthread $(GPROF) \ +CXXFLAGS0 := $(OPT) -pthread $(GPROF) \ -Wall \ -Werror \ -Wextra \ @@ -77,6 +79,7 @@ -std=gnu++0x \ -march=native \ $(CXXFLAGS) +CXXFLAGS := $(CXXFLAGS0) -include pch.h # \ -Wmissing-noreturn \ @@ -97,12 +100,13 @@ %.pb.o: %.pb.cc %.pb.h $(CXX) -c $(PBCXXFLAGS) $(OUTPUT_OPTION) $< +%.o: pch.h.gch stxn.o: main.hh $(PBHDRS) main.o: util.hh msg.h $(PBHDRS) util.o: msg.h $(PBHDRS) -main2.o: main.hh stxn.hh tpcc.hh $(PBHDRS) +ydb.o: main.hh stxn.hh tpcc.hh util.hh $(PBHDRS) tpcc.o: main.hh util.hh $(PBHDRS) -ydb: main.o main2.o util.o tpcc.o stxn.o +ydb: ydb.o tpcc.o main.o util.o stxn.o ydb.pb.o $(TPCC_OBJS) # $(OBJS) tpcc/%.o: tpcc/%.cc make -C tpcc/ @@ -126,23 +130,27 @@ mkdir -p clamp/ clamp --outdir clamp/ --prefix $(basename $@) < $< | \ sed "$$( echo -e '1i\\\n\#hdr\n1a\\\n\#end' )" | \ - sed "$$( echo -e '$$i\\\n\#src\n$$a\\\n\#end' )" > $@ + sed "$$( echo -e '$$i\\\n\#hdr\n$$a\\\n\#end' )" > $@ chmod -w $@ -all.h: - fgrep '#include' main.lzz.clamp > all.h +pch.h: + svn ls -rHEAD -R | \ + grep -v '/$$' | \ + xargs sed 's/.*\binclude\b *<\(.*\)>.*/\#include <\1>/; t succ; d; :succ /commons/ d' | \ + sort -u > $@ -all.h.gch: all.h - $(COMPILE.cc) $(PBHDRS) $(OUTPUT_OPTION) $< +%.h.gch: CXXFLAGS = $(CXXFLAGS0) +%.h.gch: %.h + $(LINK.cc) $(OUTPUT_OPTION) $< clean: rm -rf clamp/ $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) \ - main.lzz main2.lzz main.cc main.hh main2.hh main2.cc \ + main.lzz ydb.lzz main.cc main.hh ydb.hh ydb.cc \ util.cc util.hh tpcc.lzz tpcc.hh tpcc.cc make -C tpcc/ clean distclean: clean - rm -f all.h all.h.gch + rm -f pch.h pch.h.gch doc: $(SRCS) $(HDRS) doxygen Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-20 07:58:28 UTC (rev 1313) +++ ydb/trunk/src/main.lzz.clamp 2009-03-20 17:45:38 UTC (rev 1314) @@ -205,180 +205,6 @@ txn_wal *g_twal; //tpcc_wal *g_tpcc_wal; -class response_handler -{ -public: - response_handler(st_netfd_t replica, const int &seqno, int rid, - st_multichannel<long long> &recover_signals, bool caught_up) - : - replica(replica), - seqno(seqno), - rid(rid), - recover_signals(recover_signals), - caught_up(caught_up), - sub(recover_signals.subscribe()), - start_time(current_time_millis()), - recovery_start_time(caught_up ? -1 : start_time), - recovery_end_time(-1), - start_seqno(seqno), - recovery_start_seqno(caught_up ? -1 : seqno), - recovery_end_seqno(-1), - last_seqno(-1) - {} - - template<typename Types> - void run() { - typedef typename Types::Response Response; - typedef typename Types::ResponseBatch ResponseBatch; - - finally f(bind(&response_handler::cleanup, this)); - - commons::array<char> rbuf(read_buf_size), wbuf(buf_size); - st_reader reader(replica, rbuf.get(), rbuf.size()); - writer w(lambda(const void*, size_t) { - throw not_supported_exception("response handler should not be writing"); - }, wbuf.get(), wbuf.size()); - stream s(reader,w); - - scoped_ptr<ResponseBatch> pbatch(new_ResponseBatch<ResponseBatch>(s)); - ResponseBatch &batch = *pbatch; - - long long last_display_time = current_time_millis(); - - function<void()> loop_cleanup = - bind(&response_handler::loop_cleanup, this); - - while (true) { - finally f(loop_cleanup); - uint32_t prefix = 0; - - // Read the message, but correctly respond to interrupts so that we can - // cleanly exit (slightly tricky). - if (last_seqno + 1 == seqno) { - // Stop-interruptible in case we're already caught up. - try { - st_intr intr(stop_hub); - if (Types::is_pb()) readmsg(reader, batch); - else { prefix = ntohl(reader.read<uint32_t>()); batch.Clear(); } - } catch (...) { // TODO: only catch interruptions - // This check on seqnos is OK for termination since the seqno will - // never grow again if stop_hub is set. - if (last_seqno + 1 == seqno) { - cout << rid << ": "; - cout << "clean stop; next expected seqno is " << seqno - << " (last seqno was " << last_seqno << ")" << endl; - break; - } else { - continue; - } - } - } else { - // Only kill-interruptible because we want a clean termination (want - // to get all the acks back). - st_intr intr(kill_hub); - if (Types::is_pb()) readmsg(reader, batch); - else { prefix = ntohl(reader.read<uint32_t>()); batch.Clear(); } - } - - for (int i = 0; i < batch.res_size(); ++i) { - const Response &res = batch.res(i); - // Determine if this response handler's host (the only joiner) has finished - // catching up. If it has, then broadcast a signal so that all response - // handlers will know about this event. - int rseqno = res.seqno(); - if (rseqno <= last_seqno) - throw msg_exception(string("response seqno decreased from ") + - lexical_cast<string>(last_seqno) + " to " + - lexical_cast<string>(rseqno)); - bool rcaught_up = res.caught_up(); - for (int r = 0; r < res.result_size(); ++r) { - cout << rseqno << last_seqno << res.result_size() << " " << r << " " << res.result(r) << endl; - } - if (!caught_up && rcaught_up) { - long long now = current_time_millis(), time_diff = now - start_time; - caught_up = true; - recover_signals.push(now); - cout << rid << ": "; - cout << "recovering node caught up; took " - << time_diff << " ms" << endl; - // This will cause the program to exit eventually, but cleanly, such that - // the recovery time will be set first, before the eventual exit (which - // may not even happen in the current iteration). - if (stop_on_recovery) { - cout << "stopping on recovery" << endl; - stop_hub.set(); - } - } - if (check_interval(rseqno, handle_responses_display)) { - cout << rid << ": " << "got response " << rseqno << " from " - << replica << "; "; - long long display_time = current_time_millis(); - showtput("handling", display_time, last_display_time, rseqno, - rseqno - handle_responses_display); - last_display_time = display_time; - } - if (check_interval(rseqno, yield_interval)) { - st_sleep(0); - } - last_seqno = rseqno; - } - } - } - -private: - void loop_cleanup() { - // The first timestamp that comes down the subscription pipeline is the - // recovery start time, issued by the main thread. The second one is the - // recovery end time, issued by the response handler associated with the - // joiner. - if (recovery_start_time == -1 && !sub.empty()) { - recovery_start_time = sub.take(); - recovery_start_seqno = last_seqno; - cout << rid << ": "; - showtput("before recovery, finished", recovery_start_time, start_time, - recovery_start_seqno, 0); - } else if (recovery_end_time == -1 && !sub.empty()) { - recovery_end_time = sub.take(); - recovery_end_seqno = last_seqno; - cout << rid << ": "; - showtput("during recovery, finished roughly", recovery_end_time, - recovery_start_time, recovery_end_seqno, recovery_start_seqno); - } - } - - void cleanup() { - long long end_time = current_time_millis(); - cout << rid << ": "; - showtput("handled roughly", end_time, start_time, seqno, start_seqno); - if (recovery_end_time > -1) { - cout << rid << ": "; - showtput("after recovery, finished", end_time, recovery_end_time, - seqno, recovery_end_seqno); - } - } - - st_netfd_t replica; - const int &seqno; - int rid; - st_multichannel<long long> &recover_signals; - bool caught_up; - st_channel<long long> ⊂ - long long start_time, recovery_start_time, recovery_end_time; - int start_seqno, recovery_start_seqno, recovery_end_seqno, last_seqno; -}; - -/** - * Swallow replica responses. - */ -template<typename Types> -void -handle_responses(st_netfd_t replica, const int &seqno, int rid, - st_multichannel<long long> &recover_signals, bool caught_up) -{ - response_handler h(replica, seqno, rid, recover_signals, caught_up); - h.run<Types>(); -} - struct recreq { int start_seqno, end_seqno; }; Deleted: ydb/trunk/src/main2.lzz.clamp =================================================================== --- ydb/trunk/src/main2.lzz.clamp 2009-03-20 07:58:28 UTC (rev 1313) +++ ydb/trunk/src/main2.lzz.clamp 2009-03-20 17:45:38 UTC (rev 1314) @@ -1,1073 +0,0 @@ -#hdr -#include "unsetprefs.h" -#include <boost/function.hpp> -#include <boost/scoped_ptr.hpp> -#include <string> -#include <iostream> -#include <st.h> -#include <commons/st/st.h> -#include "tpcc/clock.h" -#include "tpcc/randomgenerator.h" -#include "tpcc/tpccclient.h" -#include "tpcc/tpccgenerator.h" -#include "tpcc/tpcctables.h" -#include "util.hh" -#include "tpcc.hh" -#include "stxn.hh" -#include "main.hh" - -using namespace boost; -using namespace std; -using namespace commons; -#end - -#src -#include "unsetprefs.h" -#include <csignal> // sigaction, etc. -#include <cstring> // strsignal, size_t -#include <boost/program_options.hpp> -#include <gtest/gtest.h> -#include <malloc.h> -#include <string> -#include "setprefs.h" -#end - -using namespace google; -using namespace testing; - -// -// Utilities/system -// - -/** - * Delegate for running thread targets. - * \param[in] f The function to execute. - * \param[in] intr Whether to signal stop_hub on an exception. - */ -void -my_spawn_helper(const function0<void> f, bool intr) -{ - thread_eraser eraser; - try { - f(); - } catch (std::exception &ex) { - cerr_thread_ex(ex) << (intr ? "; interrupting!" : "") << endl; - if (intr) stop_hub.set(); - } -} - -/** - * Spawn a thread using ST but wrap it in an exception handler that interrupts - * all other threads (hopefully causing them to unwind). - * \param[in] f The function to execute. - * \param[in] intr Whether to signal stop_hub on an exception. Not actually - * used anywhere. - */ -st_thread_t -my_spawn(const function0<void> &f, string name, bool intr = false) -{ - st_thread_t t = st_spawn(bind(my_spawn_helper, f, intr)); - threads.insert(t); - threadnames[t] = name; - return t; -} - -/** - * Memory monitor. - */ -void -memmon() -{ - while (!stop_hub) { - { - st_intr intr(stop_hub); - st_sleep(1); - } - malloc_stats(); - } -} - -int sig_pipe[2]; - -// -// Signals -// - -/** - * Raw signal handler that triggers the (synchronous) handler. - */ -void handle_sig(int sig) { - int err = errno; - cerr << "got signal: " << strsignal(sig) << " (" << sig << ")" << endl; - checkeqnneg(write(sig_pipe[1], &sig, sizeof sig), - static_cast<ssize_t>(sizeof sig)); - errno = err; -} - -/** - * Synchronous part of the signal handler; cleanly interrrupts any threads that - * have marked themselves as interruptible. - */ -void handle_sig_sync() { - st_closing fd(checkerr(st_netfd_open(sig_pipe[0]))); - while (true) { - int sig; - checkeqnneg(st_read(fd, &sig, sizeof sig, ST_UTIME_NO_TIMEOUT), - static_cast<ssize_t>(sizeof sig)); - if (sig == SIGINT) { - if (!stop_hub) stop_hub.set(); - else kill_hub.set(); - } else if (sig == SIGTERM) { - foreach (st_thread_t t, threads) { - st_thread_interrupt(t); - } - } else if (sig == SIGUSR1) { - toggle(do_pause); - } - } -} - -// -// Main -// - -/** - * Initialization and command-line parsing. - */ -int -main(int argc, char **argv) -{ - namespace po = boost::program_options; - try { - GOOGLE_PROTOBUF_VERIFY_VERSION; - - bool is_leader, use_epoll; - int minreps; - uint16_t leader_port, listen_port; - string leader_host; - - // Parse options. - po::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "show this help message") - ("debug-memory,M", po::bool_switch(&debug_memory), - "enable memory monitoring/debug outputs") - ("debug-threads,d",po::bool_switch(&debug_threads), - "enable context switch debug outputs") - ("profile-threads,q",po::bool_switch(&profile_threads), - "enable profiling of threads") - ("epoll,e", po::bool_switch(&use_epoll), - "use epoll (select is used by default)") - ("yield-build-up", po::bool_switch(&yield_during_build_up), - "yield periodically during build-up phase of recovery (for recoverer)") - ("yield-catch-up", po::bool_switch(&yield_during_catch_up), - "yield periodically during catch-up phase of recovery (for recoverer)") - ("multirecover,m", po::bool_switch(&multirecover), - "recover from multiple hosts, instead of just one (specified via leader)") - ("rec-twal", po::bool_switch(&rec_twal), - "recover from twal") - ("rec-pwal", po::bool_switch(&rec_pwal), - "recover from pwal") - ("disk,k", po::bool_switch(&disk), - "use disk-based recovery") - ("ship-log", po::bool_switch(&ship_log), - "ship the log instead of the complete database state") - ("dump,D", po::bool_switch(&dump), - "replicas should finally dump their state to a tmp file for " - "inspection/diffing") - ("suppress-txn-msgs", po::bool_switch(&suppress_txn_msgs), - "suppress txn msgs") - ("fake-exec", po::bool_switch(&fake_exec), - "don't actually execute txns") - ("fake-bcast", po::bool_switch(&fake_bcast), - "when using --bcast-async, don't actually perform the socket write") - ("show-updates,U", po::bool_switch(&show_updates), - "log operations that touch (update/read/delete) an existing key") - ("count-updates,u",po::bool_switch(&count_updates), - "count operations that touch (update/read/delete) an existing key") - ("general-txns,g", po::bool_switch(&general_txns), - "issue read and delete transactions as well as the default of (only) insertion/update transactions (for leader)") - ("use-pb", po::bool_switch(&use_pb), - "use protocol buffers instead of raw buffers for txns") - ("use-pb-res", po::bool_switch(&use_pb_res), - "use protocol buffers instead of raw buffers for responses") - ("twal", po::bool_switch(&use_twal), - "enable transactional write-ahead logging") - ("pwal", po::bool_switch(&use_pwal), - "enable physical write-ahead logging") - ("force-ser", po::bool_switch(&force_ser), - "force issue_txns to serialize its Txns") - ("leader,l", po::bool_switch(&is_leader), - "run the leader (run replica by default)") - ("exit-on-recovery,x", po::bool_switch(&stop_on_recovery), - "exit after the joiner fully recovers (for leader)") - ("batch-size,b", po::value<int>(&batch_size)->default_value(100), - "number of txns to batch up in each msg (for leader)") - ("tpcc", po::bool_switch(&do_tpcc), - "run the TPCC workload") - ("exit-on-seqno,X",po::value<int>(&stop_on_seqno)->default_value(-1), - "exit after txn seqno is issued (for leader)") - ("accept-joiner-size,s", - po::value<size_t>(&accept_joiner_size)->default_value(0), - "accept recovering joiner (start recovery) after DB grows to this size " - "(for leader)") - ("handle-responses-display", - po::value<int>(&handle_responses_display)->default_value(0), - "number of responses before printing current handling rate (for leader)") - ("catch-up-display", - po::value<int>(&catch_up_display)->default_value(0), - "number of catch-up txns before printing current recovery rate and queue length (for recoverer)") - ("issue-display", - po::value<int>(&issue_display)->default_value(0), - "number of txns before showing the current issue rate (for leader)") - ("process-display", - po::value<int>(&process_display)->default_value(0), - "number of txns before showing the current issue rate (for worker)") - ("issuing-interval", - po::value<int>(&issuing_interval)->default_value(0), - "seconds to sleep between issuing txns (for leader)") - ("min-ops,o", - po::value<int>(&min_ops)->default_value(5), - "lower bound on randomly generated number of operations per txn (for leader)") - ("max-ops,O", - po::value<int>(&max_ops)->default_value(5), - "upper bound on randomly generated number of operations per txn (for leader)") - ("warehouses,w", - po::value<int>(&nwarehouses)->default_value(1), - "number of warehouses to model") - ("fail-seqno", - po::value<int>(&fail_seqno)->default_value(0), - "fail after processing this seqno (for replica only)") - ("accept-joiner-seqno,j", - po::value<int>(&accept_joiner_seqno)->default_value(0), - "accept recovering joiner (start recovery) after this seqno (for leader " - "only)") - ("leader-host,H", - po::value<string>(&leader_host)->default_value(string("localhost")), - "hostname or address of the leader") - ("leader-port,P", - po::value<uint16_t>(&leader_port)->default_value(7654), - "port the leader listens on") - ("read-buf", po::value<size_t>(&read_buf_size)->default_value(1e7), - "size of the incoming (read) buffer in bytes") - ("write-buf", po::value<size_t>(&buf_size)->default_value(1e5), - "size of the outgoing (write) buffer in bytes") - ("yield-interval,y", po::value<int>(&yield_interval)->default_value(1000), - "number of txns before yielding") - ("timelim,T", po::value<long long>(&timelim)->default_value(0), - "general network IO time limit in milliseconds, or 0 for none") - ("write-thresh,w", po::value<long long>(&write_thresh)->default_value(200), - "if positive and any txn write exceeds this, then print a message") - ("read-thresh,r", po::value<long long>(&read_thresh)->default_value(0), - "if positive and any txn read exceeds this, then print a message") - ("listen-port,p", po::value<uint16_t>(&listen_port)->default_value(7654), - "port to listen on (for worker)") - ("timeout,t", po::value<st_utime_t>(&timeout)->default_value(200000), - "timeout for some IO operations that should actually time out (in microseconds)") - ("test", "execute unit tests instead of running the normal system") - ("minreps,n", po::value<int>(&minreps)->default_value(2), - "minimum number of replicas the system is willing to process txns on"); - - po::variables_map vm; - try { - po::store(po::parse_command_line(argc, argv, desc), vm); - po::notify(vm); - - if (vm.count("help")) { - cout << desc << endl; - return 0; - } - - // Validate arguments. - check(min_ops > 0); - check(max_ops > 0); - check(max_ops >= min_ops); - } catch (std::exception &ex) { - cerr << ex.what() << endl << endl << desc << endl; - return 1; - } - - // Run unit-tests. - if (vm.count("test")) { - InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); - } - - // Initialize support for ST working with asynchronous signals. - check0x(pipe(sig_pipe)); - struct sigaction sa; - sa.sa_handler = handle_sig; - check0x(sigemptyset(&sa.sa_mask)); - sa.sa_flags = 0; - check0x(sigaction(SIGINT, &sa, nullptr)); - check0x(sigaction(SIGTERM, &sa, nullptr)); - check0x(sigaction(SIGUSR1, &sa, nullptr)); - - // Initialize ST. - if (use_epoll) check0x(st_set_eventsys(ST_EVENTSYS_ALT)); - check0x(st_init()); - my_spawn(bind(handle_sig_sync), "handle_sig_sync"); - if (debug_threads || profile_threads) { - st_set_switch_out_cb(switch_out_cb); - st_set_switch_in_cb(switch_in_cb); - } - - // Initialize thread manager for clean shutdown of all threads. - thread_eraser eraser; - threads.insert(st_thread_self()); - threadnames[st_thread_self()] = "main"; - - // Print memory debugging information. - if (debug_memory) { - my_spawn(memmon, "memmon"); - } - - long long start = thread_start_time = current_time_millis(); - - // At the end, cleanly stop the bcaster thread and print thread profiling - // information. - finally f(lambda() { - if (profile_threads) { - long long end = current_time_millis(); - long long all = end - __ref(start); - cout << "thread profiling results:" << endl; - long long total = 0; - typedef pair<st_thread_t, long long> entry; - foreach (entry p, threadtimes) { - total += p.second; - } - foreach (entry p, threadtimes) { - cout << "- " << threadname(p.first) << ": " << p.second << " ms (" - << pct(p.second, total) << "% of total, " - << pct(p.second, all) << "% of all)" << endl; - } - cout << "- total: " << total << " ms (" << pct(total, all) - << "% of all)" << endl; - cout << "- unaccounted: " << all - total << " ms (" - << pct(all - total, all) << "% of all)" << endl; - cout << "- all: " << all << " ms" << endl; - } - }); - - // Initialize the map. - init_map(g_map); - - cout << "pid " << getpid() << endl; - - // Which role are we? - if (is_leader) { - if (use_pb) { - if (use_pb_res) { - run_leader<pb_traits, pb_traits>(minreps, leader_port); - } else { - run_leader<pb_traits, rb_traits>(minreps, leader_port); - } - } else { - if (use_pb_res) { - run_leader<rb_traits, pb_traits>(minreps, leader_port); - } else { - run_leader<rb_traits, rb_traits>(minreps, leader_port); - } - } - } else { - if (use_pb) { - if (use_pb_res) { - run_replica<pb_traits, pb_traits>(leader_host, leader_port, listen_port); - } else { - run_replica<pb_traits, rb_traits>(leader_host, leader_port, listen_port); - } - } else { - if (use_pb_res) { - run_replica<rb_traits, pb_traits>(leader_host, leader_port, listen_port); - } else { - run_replica<rb_traits, rb_traits>(leader_host, leader_port, listen_port); - } - } - } - - return 0; - } catch (std::exception &ex) { - // Must catch all exceptions at the top to make the stack unwind. - cerr_thread_ex(ex) << endl; - return 1; - } -} - -/** - * Run the leader. - */ -template<typename Types, typename RTypes> -void -run_leader(int minreps, uint16_t leader_port) -{ - cout << "starting as leader" << endl; - st_multichannel<long long> recover_signals; - - scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); - g_twal = twal.get(); - scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); - g_wal = pwal.get(); - - // Wait until all replicas have joined. - st_netfd_t listener = st_tcp_listen(leader_port); - st_closing close_listener(listener); - vector<replica_info> replicas; - st_closing_all_infos close_replicas(replicas); - cout << "waiting for at least " << minreps << " replicas to join" << endl; - for (int i = 0; i < minreps; ++i) { - st_netfd_t fd; - { - st_intr intr(stop_hub); - fd = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - } - Join join = readmsg<Join>(fd); - replicas.push_back(replica_info(fd, static_cast<uint16_t>(join.port()))); - } - cout << "got all " << minreps << " replicas" << endl; - - // Construct the initialization message. - Init init; - init.set_txnseqno(0); - init.set_multirecover(multirecover); - foreach (replica_info r, replicas) { - SockAddr *psa = init.add_node(); - psa->set_host(r.host()); - psa->set_port(r.port()); - } - - // Send init to each initial replica. - foreach (replica_info r, replicas) { - init.set_yourhost(r.host()); - sendmsg(r.fd(), init); - } - - // Start dispatching queries. - st_bool accept_joiner; - int seqno = 0; - st_channel<replica_info> newreps; - st_channel<st_netfd_t> delreps; - foreach (const replica_info &r, replicas) newreps.push(r); - function<void()> f; - if (do_tpcc) - f = bind(issue_tpcc, ref(newreps), ref(delreps), ref(seqno), ref(accept_joiner)); - else - f = bind(issue_txns<Types>, ref(newreps), ref(seqno), ref(accept_joiner)); - st_joining join_issue_txns(my_spawn(f, "issue_txns")); - - finally fin(bind(summarize, "LEADER", ref(seqno))); - - try { - // Start handling responses. - st_thread_group handlers; - int rid = 0; - foreach (replica_info r, replicas) { - function<void()> fn; - if (do_tpcc) - fn = bind(handle_tpcc_responses, r.fd(), ref(seqno), rid++, - ref(recover_signals), ref(delreps), true); - else - fn = bind(handle_responses<RTypes>, r.fd(), ref(seqno), rid++, - ref(recover_signals), true); - handlers.insert(my_spawn(fn, "handle_responses")); - } - - // Accept the recovering node, and tell it about the online replicas. - st_netfd_t joiner; - try { - st_intr intr(stop_hub); - joiner = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - accept_joiner.waitset(); - } catch (std::exception &ex) { - string s(ex.what()); - if (s.find("Interrupted system call") == s.npos) - throw; - else - throw break_exception(); - } - Join join = readmsg<Join>(joiner); - replicas.push_back(replica_info(joiner, static_cast<uint16_t>(join.port()))); - cout << "setting seqno to " << seqno << endl; - init.set_txnseqno(seqno); - init.set_yourhost(replicas.back().host()); - sendmsg(joiner, init); - recover_signals.push(current_time_millis()); - - // Start streaming txns to joiner. - cout << "start streaming txns to joiner" << endl; - function<void()> handle_responses_joiner_fn; - if (do_tpcc) - handle_responses_joiner_fn = - bind(handle_tpcc_responses, joiner, ref(seqno), rid++, - ref(recover_signals), ref(delreps), false); - else - handle_responses_joiner_fn = - bind(handle_responses<RTypes>, joiner, ref(seqno), rid++, - ref(recover_signals), false); - newreps.push(replicas.back()); - handlers.insert(my_spawn(handle_responses_joiner_fn, - "handle_responses_joiner")); - } catch (break_exception &ex) { - } catch (std::exception &ex) { - // TODO: maybe there's a cleaner way to do this final step before waiting with the join - cerr_thread_ex(ex) << endl; - throw; - } -} - -void -summarize(const char *role, int seqno) -{ - cout << role << " SUMMARY\n"; - if (do_tpcc) { - cout << "seqno: " << seqno << endl; - if (g_tables != nullptr) { - cout << "state:\n"; - g_tables->show(); - string fname = string("/tmp/ydb") + lexical_cast<string>(getpid()); - if (dump) { - // XXX iterate & dump - } - } - } else { - cout << "- total updates = " << updates << "\n" - << "- final DB state: seqno = " << seqno << ", size = " - << g_map.size() << endl; - string fname = string("/tmp/ydb") + lexical_cast<string>(getpid()); - if (dump) { - cout << "- dumping to " << fname << endl; - ofstream of(fname.c_str()); - of << "seqno: " << seqno << endl; - foreach (const entry &p, g_map) { - of << p.first << ": " << p.second << endl; - } - } - } -} - -/** - * Run a replica. - */ -template<typename Types, typename RTypes> -void -run_replica(string leader_host, uint16_t leader_port, uint16_t listen_port) -{ - if (disk) { - // Disk IO threads. - for (int i = 0; i < 5; ++i) { - //thread somethread(threadfunc); - } - } - - // Initialize database state. - int seqno = -1; - mii &map = g_map; - commons::array<char> recarr(0); - if (do_tpcc) { - TPCCTables *tables = new TPCCTables(); - g_tables.reset(tables); - SystemClock* clock = new SystemClock(); - - // Create a generator for filling the database. - RealRandomGenerator* random = new RealRandomGenerator(); - NURandC cLoad = NURandC::makeRandom(random); - random->setC(cLoad); - - // Generate the data - cout << "loading " << nwarehouses << " warehouses" << endl; - char now[Clock::DATETIME_SIZE+1]; - clock->getDateTimestamp(now); - TPCCGenerator generator(random, now, Item::NUM_ITEMS, - District::NUM_PER_WAREHOUSE, - Customer::NUM_PER_DISTRICT, - NewOrder::INITIAL_NUM_PER_DISTRICT); - long long start_time = current_time_millis(); - generator.makeItemsTable(tables); - for (int i = 0; i < nwarehouses; ++i) { - generator.makeWarehouse(tables, i+1); - } - cout << "loaded " << nwarehouses << " warehouses in " - << current_time_millis() - start_time << " ms" << endl; - tables->show(); - } - recovery_t orig = rec_twal ? g_tables->ser(0, 0, seqno) : recovery_t(); - - finally f(bind(summarize, "REPLICA", ref(seqno))); - st_channel<recovery_t> send_states; - - cout << "starting as replica on port " << listen_port << endl; - - // Listen for connections from other replicas. - st_netfd_t listener = st_tcp_listen(listen_port); - - // Connect to the leader and join the system. - st_netfd_t leader = st_tcp_connect(leader_host.c_str(), leader_port, - timeout); - st_closing closing(leader); - Join join; - join.set_port(listen_port); - sendmsg(leader, join); - Init init; - { - st_intr intr(stop_hub); - readmsg(leader, init); - } - uint32_t listen_host = init.yourhost(); - multirecover = init.multirecover(); - - // Display the info. - cout << "got init msg with txn seqno " << init.txnseqno() - << " and hosts:" << endl; - vector<st_netfd_t> replicas; - st_closing_all close_replicas(replicas); - int mypos = -1; - for (int i = 0; i < init.node_size(); ++i) { - const SockAddr &sa = init.node(i); - char buf[INET_ADDRSTRLEN]; - in_addr host = { sa.host() }; - bool is_self = sa.host() == listen_host && sa.port() == listen_port; - cout << "- " << checkerr(inet_ntop(AF_INET, &host, buf, - INET_ADDRSTRLEN)) - << ':' << sa.port() << (is_self ? " (self)" : "") << endl; - if (is_self) mypos = i; - if (!is_self && (init.txnseqno() > 0 || rec_twal)) { - replicas.push_back(st_tcp_connect(host, - static_cast<uint16_t>(sa.port()), - timeout)); - } - } - - // Initialize physical or txn log. - scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); - g_twal = twal.get(); - scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); - g_wal = pwal.get(); - - // Process txns. - st_channel<chunk> backlog; - function<void()> process_fn; - if (do_tpcc) - process_fn = bind(process_tpccs, leader, ref(seqno), ref(send_states), - ref(backlog), init.txnseqno(), mypos, init.node_size()); - else - process_fn = bind(process_txns<Types, RTypes>, leader, ref(map), ref(seqno), - ref(send_states), ref(backlog), init.txnseqno(), mypos, - init.node_size()); - st_joining join_proc(my_spawn(process_fn, "process_txns")); - st_joining join_rec(init.txnseqno() == 0 && (multirecover || mypos == 0) ? - my_spawn(bind(recover_joiner, listener, ref(send_states)), - "recover_joiner") : - nullptr); - - try { - // If there's anything to recover. - if (init.txnseqno() > 0 || fail_seqno > 0) { - if (do_tpcc) { - - // - // TPCC txns - // - - function<void()> rec_twal_fn = lambda() { - int &seqno = __ref(seqno); - cout << "recovering from twal" << endl; - long long start_time = current_time_millis(); - g_twal->flush(); - sync(); - ifstream inf("twal"); - TpccReq req; - while (inf.peek() != ifstream::traits_type::eof()) { - ASSERT(inf.good()); - readmsg(inf, req); - process_tpcc(req, seqno, nullptr); - if (check_interval(seqno, yield_interval)) st_sleep(0); - } - showdatarate("recovered from twal", inf.tellg(), - current_time_millis() - start_time); - cout << "now at seqno " << seqno << endl; - }; - - function<void()> recv_log_fn = lambda() { - st_netfd_t src = __ref(replicas[0]); - int &seqno = __ref(seqno); - ASSERT(fail_seqno == seqno); - recreq r = { fail_seqno + 1, resume.take() }; - st_write(src, r); - sized_array<char> rbuf(new char[read_buf_size], read_buf_size); - function<void(anchored_stream_reader &reader)> overflow_fn = - lambda(anchored_stream_reader &reader) { - shift_reader(reader); - }; - anchored_stream_reader reader(st_read_fn(src), - st_read_fully_fn(src), - overflow_fn, rbuf.get(), rbuf.size()); - TpccReq req; - while (seqno < r.end_seqno) { - { st_intr intr(stop_hub); readmsg(reader, req); } - process_tpcc(req, seqno, nullptr); - reader.set_anchor(); - if (check_interval(seqno, yield_interval)) st_sleep(0); - } - }; - - if (rec_twal) { - failed.waitset(); - g_tables.reset(new TPCCTables); - tpcc_recovery_header &hdr = *reinterpret_cast<tpcc_recovery_header*>(orig.begin()); - commons::array<char> body(orig.begin() + sizeof(tpcc_recovery_header), - orig.size() - sizeof(tpcc_recovery_header)); - g_tables->deser(mypos, init.node_size(), hdr, body); - body.release(); - rec_twal_fn(); - failed.reset(); - recv_log_fn(); - } - -#if 0 - st_thread_t rec_twal_thread = my_spawn(rec_twal_fn, "rec_twal"); - st_thread_t recv_log_thread = my_spawn(recv_log_fn, "recv_log"); - - st_join(rec_twal_thread); - st_join(recv_log_thread); -#endif - - if (rec_pwal) { - // Recover from phy log. - } else if (rec_twal) { - // Recover from txn log. - } else { - - g_tables.reset(new TPCCTables); - - // - // Build-up - // - - if (ship_log) { - } else { - // XXX indent - - cout << "waiting for recovery message" << (multirecover ? "s" : "") - << endl; - long long before_recv = current_time_millis(); - - vector<st_thread_t> recovery_builders; - ASSERT(seqno == -1); - bool first = true; - for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { - recovery_builders.push_back(my_spawn(lambda() { - // Read the recovery message length and header. - tpcc_recovery_header hdr; - checkeqnneg(st_read_fully(__ref(replicas[i]), - &hdr, sizeof hdr, - ST_UTIME_NO_TIMEOUT), - ssize_t(sizeof hdr)); - check(hdr.seqno >= 0); - - cout << "receiving recovery of " << hdr.len << " bytes" << endl; - - long long start_time = current_time_millis(); - __ref(recarr).reset(new char[hdr.len], hdr.len); - checkeqnneg(st_read_fully(__ref(replicas[i]), - __ref(recarr).get(), hdr.len, - ST_UTIME_NO_TIMEOUT), - ssize_t(hdr.len)); - - long long before_deser = current_time_millis(); - showdatarate("received recovery message", size_t(hdr.len), before_deser - start_time); - - if (__ref(seqno) == -1) - __ref(seqno) = hdr.seqno; - else - checkeq(__ref(seqno), hdr.seqno); - - g_tables->deser(__ctx(i), __ref(init).node_size(), hdr, __ref(recarr)); - - long long end_time = current_time_millis(); - showdatarate("deserialized recovery message", size_t(hdr.len), end_time - before_deser); - cout << "receive & deserialize took " << end_time - __ref(before_recv) - << " ms total; now at seqno " << hdr.seqno << endl; - cout << "after deserialize, db state is now at seqno " - << hdr.seqno << ":" << endl; - g_tables->show(); - -#if 0 - // Resize the table if necessary. - - commons::array<entry> &table = __ref(map).get_table(); - if (!__ref(first)) { - checkeq(table.size(), hdr.total); - checkeq(__ref(map).size(), hdr.size); - } else { - __ref(first) = false; - if (table.size() != hdr.total) { - table.reset(new entry[hdr.total], hdr.total); - } - } - - // Receive straight into the table. - pair<size_t, size_t> range = - recovery_range(table.size(), __ctx(i), __ref(init).node_size()); - // Check that we agree on the number of entries. - checkeq(range.second - range.first, hdr.count); - // Check that the count is a power of two. - checkeq(hdr.count & (hdr.count - 1), size_t(0)); - size_t rangelen = sizeof(entry) * hdr.count; - // Read an extra char to ensure that we're at the EOF. - checkeqnneg(st_read_fully(__ref(replicas[i]), - table.begin() + range.first, rangelen + 1, - ST_UTIME_NO_TIMEOUT), - ssize_t(rangelen)); -#endif - }, "recovery_builder" + lexical_cast<string>(i))); - } - foreach (st_thread_t t, recovery_builders) { - st_join(t); - } - - } - } - - // - // Catch-up - // - - long long mid_time = current_time_millis(); - int mid_seqno = seqno; - TpccReq req; - while (!backlog.empty()) { - chunk chunk = backlog.take(); - cout << "took from backlog, now has " << backlog.queue().size() - << " chunks" << endl; - sized_array<char> &buf = chunk.get<0>(); - char *begin = chunk.get<1>(), *end = chunk.get<2>(); - ASSERT(buf.get() <= begin && begin < buf.end()); - ASSERT(buf.get() < end && end < buf.end()); - process_buf(begin, end, req, seqno); - } - showtput("replayer caught up; from backlog replayed", - current_time_millis(), mid_time, seqno, mid_seqno); - - } else { - - // - // Simple txns - // - - if (rec_pwal) { - // Recover from physical log. - cout << "recovering from pwal" << endl; - long long start_time = current_time_millis(); - ifstream inf("pwal"); - binary_iarchive in(inf); - int rseqno = -1; - while (inf.peek() != ifstream::traits_type::eof()) { - int op; - in & op; - switch (op) { - case op_del: - { - int key; - in & key; - mii::iterator it = map.find(key); - map.erase(it); - break; - } - case op_write: - { - int key, val; - in & key & val; - map[key] = val; - break; - } - case op_commit: - ++rseqno; - break; - } - if (check_interval(rseqno, yield_interval)) st_sleep(0); - } - seqno = init.txnseqno() - 1; - showdatarate("recovered from pwal", inf.tellg(), current_time_millis() - start_time); - cout << "now at seqno " << rseqno << " (really: " << seqno << ")" << endl; - } else { - - // - // Build-up - // - - cout << "waiting for recovery message" << (multirecover ? "s" : "") - << endl; - long long before_recv = current_time_millis(); - - vector<st_thread_t> recovery_builders; - ASSERT(seqno == -1); - bool first = true; - for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { - recovery_builders.push_back(my_spawn(lambda() { - // Read the recovery message length and header. - size_t len; - recovery_header hdr; - char buf[sizeof len + sizeof hdr]; - //try { - checkeqnneg(st_read_fully(__ref(replicas[i]), - buf, sizeof len + sizeof hdr, - ST_UTIME_NO_TIMEOUT), - ssize_t(sizeof len + sizeof hdr)); - //} catch (...) { // TODO just catch "Connection reset by peer" - //return; - //} - raw_reader rdr(buf); - rdr.read(len); - rdr.read(hdr); - check(hdr.seqno >= 0); - - // Resize the table if necessary. - commons::array<entry> &table = __ref(map).get_table(); - if (!__ref(first)) { - checkeq(table.size(), hdr.total); - checkeq(__ref(map).size(), hdr.size); - } else { - __ref(first) = false; - __ref(map).set_size(hdr.size); - if (table.size() != hdr.total) { - table.reset(new entry[hdr.total], hdr.total); - } - } - - // Receive straight into the table. - pair<size_t, size_t> range = - recovery_range(table.size(), __ctx(i), __ref(init).node_size()); - // Check that we agree on the number of entries. - checkeq(range.second - range.first, hdr.count); - // Check that the count is a power of two. - checkeq(hdr.count & (hdr.count - 1), size_t(0)); - size_t rangelen = sizeof(entry) * hdr.count; - // Read an extra char to ensure that we're at the EOF. - long long start_time = current_time_millis(); - checkeqnneg(st_read_fully(__ref(replicas[i]), - table.begin() + range.first, rangelen + 1, - ST_UTIME_NO_TIMEOUT), - ssize_t(rangelen)); - long long end_time = current_time_millis(); - - if (__ref(seqno) != -1) - checkeq(__ref(seqno), hdr.seqno); - __ref(seqno) = hdr.seqno; - showdatarate("got recovery message", len, end_time - start_time); - cout << "receive took " << end_time - __ref(before_recv) - << " ms total; now at seqno " << hdr.seqno << endl; -#if 0 - Recovery recovery; - long long receive_start = 0, receive_end = 0; - size_t len = 0; - { - st_intr intr(stop_hub); - len = readmsg(__ref(replicas)[__ctx(i)], recovery, &receive_start, - &receive_end); - } - long long build_start = current_time_millis(); - cout << "got recovery message of " << len << " bytes in " - << build_start - __ref(before_recv) << " ms: xfer took " - << receive_end - receive_start << " ms, deserialization took " - << build_start - receive_end << " ms" << endl; - for (int i = 0; i < recovery.pair_size(); ++i) { - const Recovery_Pair &p = recovery.pair(i); - __ref(map)[p.key()] = p.value(); - if (i % yield_interval == 0) { - if (yield_during_build_up) st_sleep(0); - } - } - check(recovery.seqno() >= 0); - int seqno = __ref(seqno) = recovery.seqno(); - long long build_end = current_time_millis(); - cout << "receive and build-up took " - << build_end - __ref(before_recv) - << " ms; built up map of " << recovery.pair_size() - << " records in " << build_end - build_start - << " ms; now at seqno " << seqno << endl; -#endif - }, "recovery_builder" + lexical_cast<string>(i))); - } - foreach (st_thread_t t, recovery_builders) { - st_join(t); - } - } - - // - // Catch-up - // - - long long mid_time = current_time_millis(); - int mid_seqno = seqno; - // XXX - using msg::TxnBatch; - using msg::Txn; - commons::array<char> rbuf(0), wbuf(buf_size); - reader reader(nullptr, rbuf.get(), rbuf.size()); - writer writer(lambda(const void*, size_t) { - throw not_supported_exception("should not be writing responses during catch-up phase"); - }, wbuf.get(), wbuf.size()); - stream s(reader, writer); - TxnBatch batch(s); - while (!backlog.empty()) { - chunk chunk = backlog.take(); - sized_array<char> &buf = chunk.get<0>(); - ASSERT(buf.get() <= chunk.get<1>() && chunk.get<1>() < buf.end()); - ASSERT(buf.get() < chunk.get<2>() && chunk.get<2>() < buf.end()); - ASSERT(chunk.get<1>() < chunk.get<2>()); - swap(buf, reader.buf()); - reader.reset_range(chunk.get<1>(), chunk.get<2>()); - while (reader.start() < reader.end()) { - char *start = reader.start(); - uint32_t prefix = ntohl(reader.read<uint32_t>()); - ASSERT(prefix < 10000); - ASSERT(start + sizeof(uint32_t) + prefix <= reader.end()); - batch.Clear(); - for (int t = 0; t < batch.txn_size(); ++t) { - const Txn &txn = batch.txn(t); - if (rec_pwal) seqno = txn.seqno() - 1; - process_txn<rb_traits, rb_traits>(map, txn, seqno, nullptr); - if (fake_exec && !Types::is_pb()) { - reader.skip(txn.op_size() * Op_Size); - } - - if (check_interval(txn.seqno(), yield_interval)) st_sleep(0); - if (check_interval(txn.seqno(), process_display)) { - cout << "caught up txn " << txn.seqno() - << "; db size = " << map.size() - << "; seqno = " << seqno - << "; backlog.size = " << backlog.queue().size() << endl; - } - } - ASSERT(start + sizeof(uint32_t) + prefix == reader.start()); - } - } - g_caught_up = true; -#if 0 - while (!backlog.empty()) { - using pb::Txn; - shared_ptr<Txn> p = backlog.take(); - process_txn<pb_traits, pb_traits>(map, *p, seqno, nullptr); - if (check_interval(p->seqno(), catch_up_display)) { - cout << "processed txn " << p->seqno() << " off the backlog; " - << "backlog.size = " << backlog.queue().size() << endl; - } - if (check_interval(p->seqno(), yield_interval)) { - // Explicitly yield. (Note that yielding does still effectively - // happen anyway because process_txn is a yield point.) - st_sleep(0); - } - } -#endif - showtput("replayer caught up; from backlog replayed", - current_time_millis(), mid_time, seqno, mid_seqno); - } - } - } catch (std::exception &ex) { - cerr_thread_ex(ex) << endl; - throw; - } - - stop_hub.insert(st_thread_self()); -} Modified: ydb/trunk/src/msg.h =================================================================== --- ydb/trunk/src/msg.h 2009-03-20 07:58:28 UTC (rev 1313) +++ ydb/trunk/src/msg.h 2009-03-20 17:45:38 UTC (rev 1314) @@ -55,7 +55,7 @@ using namespace commons; using namespace std; -short unset = -7654; +static const short unset = -7654; using ydb::pb::Op_OpType; Modified: ydb/trunk/src/stxn.lzz.clamp =================================================================== --- ydb/trunk/src/stxn.lzz.clamp 2009-03-20 07:58:28 UTC (rev 1313) +++ ydb/trunk/src/stxn.lzz.clamp 2009-03-20 17:45:38 UTC (rev 1314) @@ -556,3 +556,178 @@ { return recovery_t(); } + +class response_handler +{ +public: + response_handler(st_netfd_t replica, const int &seqno, int rid, + st_multichannel<long long> &recover_signals, bool caught_up) + : + replica(replica), + seqno(seqno), + rid(rid), + recover_signals(recover_signals), + caught_up(caught_up), + sub(recover_signals.subscribe()), + start_time(current_time_millis()), + recovery_start_time(caught_up ? -1 : start_time), + recovery_end_time(-1), + start_seqno(seqno), + recovery_start_seqno(caught_up ? -1 : seqno), + recovery_end_seqno(-1), + last_seqno(-1) + {} + + template<typename Types> + void run() { + typedef typename Types::Response Response; + typedef typename Types::ResponseBatch ResponseBatch; + + finally f(bind(&response_handler::cleanup, this)); + + commons::array<char> rbuf(read_buf_size), wbuf(buf_size); + st_reader reader(replica, rbuf.get(), rbuf.size()); + writer w(lambda(const void*, size_t) { + throw not_supported_exception("response handler should not be writing"); + }, wbuf.get(), wbuf.size()); + stream s(reader,w); + + scoped_ptr<ResponseBatch> pbatch(new_ResponseBatch<ResponseBatch>(s)); + ResponseBatch &batch = *pbatch; + + long long last_display_time = current_time_millis(); + + function<void()> loop_cleanup = + bind(&response_handler::loop_cleanup, this); + + while (true) { + finally f(loop_cleanup); + uint32_t prefix = 0; + + // Read the message, but correctly respond to interrupts so that we can + // cleanly exit (slightly tricky). + if (last_seqno + 1 == seqno) { + // Stop-interruptible in case we're already caught up. + try { + st_intr intr(stop_hub); + if (Types::is_pb()) readmsg(reader, batch); + else { prefix = ntohl(reader.read<uint32_t>()); batch.Clear(); } + } catch (...) { // TODO: only catch interruptions + // This check on seqnos is OK for termination since the seqno will + // never grow again if stop_hub is set. + if (last_seqno + 1 == seqno) { + cout << rid << ": "; + cout << "clean stop; next expected seqno is " << seqno + << " (last seqno was " << last_seqno << ")" << endl; + break; + } else { + continue; + } + } + } else { + // Only kill-interruptible because we want a clean termination (want + // to get all the acks back). + st_intr intr(kill_hub); + if (Types::is_pb()) readmsg(reader, batch); + else { prefix = ntohl(reader.read<uint32_t>()); batch.Clear(); } + } + + for (int i = 0; i < batch.res_size(); ++i) { + const Response &res = batch.res(i); + // Determine if this response handler's host (the only joiner) has finished + // catching up. If it has, then broadcast a signal so that all response + // handlers will know about this event. + int rseqno = res.seqno(); + if (rseqno <= last_seqno) + throw msg_exception(string("response seqno decreased from ") + + lexical_cast<string>(last_seqno) + " to " + + lexical_cast<string>(rseqno)); + bool rcaught_up = res.caught_up(); + for (int r = 0; r < res.result_size(); ++r) { + cout << rseqno << last_seqno << res.result_size() << " " << r << " " << res.result(r) << endl; + } + if (!caught_up && rcaught_up) { + long long now = current_time_millis(), time_diff = now - start_time; + caught_up = true; + recover_signals.push(now); + cout << rid << ": "; + cout << "recovering node caught up; took " + << time_diff << " ms" << endl; + // This will cause the program to exit eventually, but cleanly, such that + // the recovery time will be set first, before the eventual exit (which + // may not even happen in the current iteration). + if (stop_on_recovery) { + cout << "stopping on recovery" << endl; + stop_hub.set(); + } + } + if (check_interval(rseqno, handle_responses_display)) { + cout << rid << ": " << "got response " << rseqno << " from " + << replica << "; "; + long long display_time = current_time_millis(); + showtput("handling", display_time, last_display_time, rseqno, + rseqno - handle_responses_display); + last_display_time = display_time; + } + if (check_interval(rseqno, yield_interval)) { + st_sleep(0); + } + last_seqno = rseqno; + } + } + } + +private: + void loop_cleanup() { + // The first timestamp that comes down the subscription pipeline is the + // recovery start time, issued by the main thread. The second one is the + // recovery end time, issued by the response handler associated with the + // joiner. + if (recovery_start_time == -1 && !sub.empty()) { + recovery_start_time = sub.take(); + recovery_start_seqno = last_seqno; + cout << rid << ": "; + showtput("before recovery, finished", recovery_start_time, start_time, + recovery_start_seqno, 0); + } else if (recovery_end_time == -1 && !sub.empty()) { + recovery_end_time = sub.take(); + recovery_end_seqno = last_seqno; + cout << rid << ": "; + showtput("during recovery, finished roughly", recovery_end_time, + recovery_start_time, recovery_end_seqno, recovery_start_seqno); + } + } + + void cleanup() { + long long end_time = current_time_millis(); + cout << rid << ": "; + showtput("handled roughly", end_time, start_time, seqno, start_seqno); + if (recovery_end_time > -1) { + cout << rid << ": "; + showtput("after recovery, finished", end_time, recovery_end_time, + seqno, recovery_end_seqno); + } + } + + st_netfd_t replica; + const int &seqno; + int rid; + st_multichannel<long long> &recover_signals; + bool caught_up; + st_channel<long long> ⊂ + long long start_time, recovery_start_time, recovery_end_time; + int start_seqno, recovery_start_seqno, recovery_end_seqno, last_seqno; +}; + +/** + * Swallow replica responses. + */ +template<typename Types> +void +handle_responses(st_netfd_t replica, const int &seqno, int rid, + st_multichannel<long long> &recover_signals, bool caught_up) +{ + response_handler h(replica, seqno, rid, recover_signals, caught_up); + h.run<Types>(); +} + Copied: ydb/trunk/src/ydb.lzz.clamp (from rev 1313, ydb/trunk/src/main2.lzz.clamp) =================================================================== --- ydb/trunk/src/ydb.lzz.clamp (rev 0) +++ ydb/trunk/src/ydb.lzz.clamp 2009-03-20 17:45:38 UTC (rev 1314) @@ -0,0 +1,1073 @@ +#hdr +#include "unsetprefs.h" +#include <boost/function.hpp> +#include <boost/scoped_ptr.hpp> +#include <string> +#include <iostream> +#include <st.h> +#include <commons/st/st.h> +#include "tpcc/clock.h" +#include "tpcc/randomgenerator.h" +#include "tpcc/tpccclient.h" +#include "tpcc/tpccgenerator.h" +#include "tpcc/tpcctables.h" +#include "util.hh" +#include "tpcc.hh" +#include "stxn.hh" +#include "main.hh" + +using namespace boost; +using namespace std; +using namespace commons; +#end + +#src +#include "unsetprefs.h" +#include <csignal> // sigaction, etc. +#include <cstring> // strsignal, size_t +#include <boost/program_options.hpp> +#include <gtest/gtest.h> +#include <malloc.h> +#include <string> +#include "setprefs.h" +#end + +using namespace google; +using namespace testing; + +// +// Utilities/system +// + +/** + * Delegate for running thread targets. + * \param[in] f The function to execute. + * \param[in] intr Whether to signal stop_hub on an exception. + */ +void +my_spawn_helper(const function0<void> f, bool intr) +{ + thread_eraser eraser; + try { + f(); + } catch (std::exception &ex) { + cerr_thread_ex(ex) << (intr ? "; interrupting!" : "") << endl; + if (intr) stop_hub.set(); + } +} + +/** + * Spawn a thread using ST but wrap it in an exception handler that interrupts + * all other threads (hopefully causing them to unwind). + * \param[in] f The function to execute. + * \param[in] intr Whether to signal stop_hub on an exception. Not actually + * used anywhere. + */ +st_thread_t +my_spawn(const function0<void> &f, string name, bool intr = false) +{ + st_thread_t t = st_spawn(bind(my_spawn_helper, f, intr)); + threads.insert(t); + threadnames[t] = name; + return t; +} + +/** + * Memory monitor. + */ +void +memmon() +{ + while (!stop_hub) { + { + st_intr intr(stop_hub); + st_sleep(1); + } + malloc_stats(); + } +} + +int sig_pipe[2]; + +// +// Signals +// + +/** + * Raw signal handler that triggers the (synchronous) handler. + */ +void handle_sig(int sig) { + int err = errno; + cerr << "got signal: " << strsignal(sig) << " (" << sig << ")" << endl; + checkeqnneg(write(sig_pipe[1], &sig, sizeof sig), + static_cast<ssize_t>(sizeof sig)); + errno = err; +} + +/** + * Synchronous part of the signal handler; cleanly interrrupts any threads that + * have marked themselves as interruptible. + */ +void handle_sig_sync() { + st_closing fd(checkerr(st_netfd_open(sig_pipe[0]))); + while (true) { + int sig; + checkeqnneg(st_read(fd, &sig, sizeof sig, ST_UTIME_NO_TIMEOUT), + static_cast<ssize_t>(sizeof sig)); + if (sig == SIGINT) { + ... [truncated message content] |
From: <yan...@us...> - 2009-03-20 20:40:36
|
Revision: 1316 http://assorted.svn.sourceforge.net/assorted/?rev=1316&view=rev Author: yangzhang Date: 2009-03-20 20:40:27 +0000 (Fri, 20 Mar 2009) Log Message: ----------- - simplified Makefile a bit for aux programs - made ccache, pch usage optional - updated, fixed warnings in aux programs Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/serperf.cc ydb/trunk/src/stxn.lzz.clamp ydb/trunk/src/tpcc/Makefile ydb/trunk/src/tpcc/tpcctables.h ydb/trunk/src/unsetprefs.h ydb/trunk/src/util.lzz ydb/trunk/src/ydb.lzz.clamp Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-03-20 20:20:25 UTC (rev 1315) +++ ydb/trunk/src/Makefile 2009-03-20 20:40:27 UTC (rev 1316) @@ -1,5 +1,6 @@ TARGET := ydb WTF := wtf +CCACHE := ccache CLAMPS := $(wildcard *.lzz.clamp) PURELZZS := $(foreach lzz,$(wildcard *.lzz),$(if $(wildcard $(lzz).clamp),,$(lzz))) @@ -39,7 +40,7 @@ OPT := -g3 endif # CXX := $(WTF) ag++ -k --Xcompiler # $(CXX) -CXX := $(WTF) ccache $(CXX) -pipe +CXX := $(WTF) $(CCACHE) $(CXX) -pipe CC := $(CXX) # for linking LDFLAGS := -pthread $(GPROF) LDLIBS := -lstx -lst -lresolv -lprotobuf -lgtest \ @@ -79,7 +80,6 @@ -std=gnu++0x \ -march=native \ $(CXXFLAGS) -CXXFLAGS := $(CXXFLAGS0) -include pch.h # \ -Wmissing-noreturn \ @@ -93,6 +93,13 @@ -Wstrict-overflow \ -Winline \ +ifeq ($(NPCH),) + CXXFLAGS := $(CXXFLAGS0) -include pch.h +%.o: pch.h.gch +else + CXXFLAGS := $(CXXFLAGS0) +endif + PBCXXFLAGS := $(OPT) -Wall -Werror $(GPROF) all: $(TARGET) @@ -100,13 +107,12 @@ %.pb.o: %.pb.cc %.pb.h $(CXX) -c $(PBCXXFLAGS) $(OUTPUT_OPTION) $< -%.o: pch.h.gch stxn.o: main.hh $(PBHDRS) main.o: util.hh msg.h $(PBHDRS) util.o: msg.h $(PBHDRS) ydb.o: main.hh stxn.hh tpcc.hh util.hh $(PBHDRS) tpcc.o: main.hh util.hh $(PBHDRS) -ydb: ydb.o tpcc.o main.o util.o stxn.o ydb.pb.o $(TPCC_OBJS) # $(OBJS) +ydb: $(OBJS) tpcc/%.o: tpcc/%.cc make -C tpcc/ @@ -135,7 +141,7 @@ pch.h: svn ls -rHEAD -R | \ - grep -v '/$$' | \ + egrep -v '/$$|Makefile' | \ xargs sed 's/.*\binclude\b *<\(.*\)>.*/\#include <\1>/; t succ; d; :succ /commons/ d' | \ sort -u > $@ @@ -157,17 +163,10 @@ .PHONY: clean -.SECONDARY: $(SRCS) $(HDRS) $(OBJS) main.lzz +.SECONDARY: $(SRCS) $(HDRS) $(OBJS) main.lzz pch.h.gch ### -serperf: serperf.o ydb.pb.o - $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) $(OUTPUT_OPTION) - -# serperf.cc ydb.pb.h - -p2: p2.cc - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) $(OUTPUT_OPTION) - -ser: ser.cc msg.h ydb.pb.o - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) $(OUTPUT_OPTION) +serperf: ydb.pb.o +ser: ydb.pb.o +ser.o: msg.h Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-20 20:20:25 UTC (rev 1315) +++ ydb/trunk/src/main.lzz.clamp 2009-03-20 20:40:27 UTC (rev 1316) @@ -2,17 +2,9 @@ #include "unsetprefs.h" #include <boost/archive/binary_iarchive.hpp> #include <boost/archive/binary_oarchive.hpp> -#include <boost/bind.hpp> -#include <boost/foreach.hpp> -//#include <boost/range/iterator_range.hpp> -//#include <boost/shared_ptr.hpp> #include <boost/tuple/tuple.hpp> -#include <commons/assert.h> -#include <commons/nullptr.h> #include <commons/st/st.h> -#include <commons/time.h> #include <fstream> // ofstream -#include <iostream> #include <vector> #include "msg.h" #include "util.hh" @@ -27,7 +19,14 @@ #end #src +#include "unsetprefs.h" +#include <boost/foreach.hpp> +#include <commons/assert.h> +#include <commons/nullptr.h> +#include <commons/time.h> +#include <iostream> #include <unistd.h> // pipe, write, sync +#include "setprefs.h" #end typedef tuple<sized_array<char>, char*, char*> chunk; Modified: ydb/trunk/src/serperf.cc =================================================================== --- ydb/trunk/src/serperf.cc 2009-03-20 20:20:25 UTC (rev 1315) +++ ydb/trunk/src/serperf.cc 2009-03-20 20:40:27 UTC (rev 1316) @@ -1,16 +1,18 @@ #include <iostream> #include <sstream> #include <commons/time.h> +#include <commons/utility.h> #include "ydb.pb.h" #include <boost/archive/binary_oarchive.hpp> using namespace boost::archive; using namespace std; using namespace commons; +using namespace ydb::pb; -int main(int argc, char **argv) { +int main(UNUSED int argc, char **argv) { const int count = atoi(argv[1]), batchsize = atoi(argv[2]); - + TxnBatch batch; for (int i = 0; i < batchsize; ++i) { Txn &txn = *batch.add_txn(); @@ -30,7 +32,7 @@ batch.SerializeToOstream(&ss); } long long time = current_time_millis() - start; - double tps = 1000 * static_cast<double>(count * batchsize) / time; + double tps = 1000 * count * batchsize / double(time); cout << "protobuf: " << time << " ms, " << tps << " tps" << endl; } @@ -51,7 +53,7 @@ } } long long time = current_time_millis() - start; - double tps = 1000 * static_cast<double>(count * batchsize) / time; + double tps = 1000 * count * batchsize / double(time); cout << "boost: " << time << " ms, " << tps << " tps" << endl; } @@ -61,7 +63,7 @@ stringbuf sb; for (int j = 0; j < batchsize; ++j) { const Txn &txn = batch.txn(j); -#define write(x) { typeof(x) __x = x; sb.sputn((char*)(&__x), sizeof __x); } +#define write(x) { typeof(x) __x = x; sb.sputn(reinterpret_cast<char*>(&__x), sizeof __x); } write(txn.seqno()); for (int k = 0; k < 5; ++k) { const Op &op = txn.op(k); @@ -72,7 +74,7 @@ } } long long time = current_time_millis() - start; - double tps = 1000 * static_cast<double>(count * batchsize) / time; + double tps = 1000 * count * batchsize / double(time); cout << "streambuf.sputn: " << time << " ms, " << tps << " tps" << endl; } Modified: ydb/trunk/src/stxn.lzz.clamp =================================================================== --- ydb/trunk/src/stxn.lzz.clamp 2009-03-20 20:20:25 UTC (rev 1315) +++ ydb/trunk/src/stxn.lzz.clamp 2009-03-20 20:40:27 UTC (rev 1316) @@ -1,5 +1,6 @@ #hdr #include "unsetprefs.h" +#include <boost/bind.hpp> #include <commons/memory.h> #include <boost/foreach.hpp> #include <commons/snap_map.h> Modified: ydb/trunk/src/tpcc/Makefile =================================================================== --- ydb/trunk/src/tpcc/Makefile 2009-03-20 20:20:25 UTC (rev 1315) +++ ydb/trunk/src/tpcc/Makefile 2009-03-20 20:40:27 UTC (rev 1316) @@ -1,6 +1,7 @@ WARNINGS = -Werror -Wall -Wextra -Wconversion -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare -Wno-unused-parameter -CXX := ccache $(CXX) +CCACHE := ccache +CXX := $(CCACHE) $(CXX) # Debug flags ifeq ($(OPT),) Modified: ydb/trunk/src/tpcc/tpcctables.h =================================================================== --- ydb/trunk/src/tpcc/tpcctables.h 2009-03-20 20:20:25 UTC (rev 1315) +++ ydb/trunk/src/tpcc/tpcctables.h 2009-03-20 20:40:27 UTC (rev 1316) @@ -1,6 +1,7 @@ #ifndef TPCCTABLES_H__ #define TPCCTABLES_H__ +#include <limits> #include <map> #include <set> #include <vector> Modified: ydb/trunk/src/unsetprefs.h =================================================================== --- ydb/trunk/src/unsetprefs.h 2009-03-20 20:20:25 UTC (rev 1315) +++ ydb/trunk/src/unsetprefs.h 2009-03-20 20:40:27 UTC (rev 1316) @@ -1,5 +0,0 @@ -#undef function -#undef shared_ptr -#undef ref -#undef tuple -#undef make_tuple Modified: ydb/trunk/src/util.lzz =================================================================== --- ydb/trunk/src/util.lzz 2009-03-20 20:20:25 UTC (rev 1315) +++ ydb/trunk/src/util.lzz 2009-03-20 20:40:27 UTC (rev 1316) @@ -7,12 +7,10 @@ #include <map> #include <set> #include <utility> -#include <boost/function.hpp> #include <boost/scoped_array.hpp> #include <commons/array.h> #include <commons/nullptr.h> #include <commons/time.h> -#include <sys/socket.h> // getpeername #include <google/protobuf/io/zero_copy_stream_impl.h> #include "msg.h" using namespace std; @@ -21,6 +19,7 @@ using namespace google::protobuf::io; #end #src +#include <sys/socket.h> // getpeername #include <gtest/gtest.h> #include <netinet/in.h> // in_addr etc. #end Modified: ydb/trunk/src/ydb.lzz.clamp =================================================================== --- ydb/trunk/src/ydb.lzz.clamp 2009-03-20 20:20:25 UTC (rev 1315) +++ ydb/trunk/src/ydb.lzz.clamp 2009-03-20 20:40:27 UTC (rev 1316) @@ -1,5 +1,6 @@ #hdr #include "unsetprefs.h" +#include <boost/bind.hpp> #include <boost/function.hpp> #include <boost/scoped_ptr.hpp> #include <string> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-22 09:06:41
|
Revision: 1321 http://assorted.svn.sourceforge.net/assorted/?rev=1321&view=rev Author: yangzhang Date: 2009-03-22 09:06:24 +0000 (Sun, 22 Mar 2009) Log Message: ----------- - cleaned up build, fixed distcc arch issues - restored unsetprefs.h; somehow that got clobbered Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/unsetprefs.h Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-03-21 23:23:39 UTC (rev 1320) +++ ydb/trunk/src/Makefile 2009-03-22 09:06:24 UTC (rev 1321) @@ -1,10 +1,18 @@ TARGET := ydb WTF := wtf CCACHE := ccache +DISTCC := distcc +CCACHE_PREFIX := $(DISTCC) +ifeq ($(CCACHE),) +ACCEL := $(DISTCC) +else +ACCEL := $(CCACHE) +endif CLAMPS := $(wildcard *.lzz.clamp) +CLAMPLZZS:= $(patsubst %.clamp,%,$(CLAMPS)) PURELZZS := $(foreach lzz,$(wildcard *.lzz),$(if $(wildcard $(lzz).clamp),,$(lzz))) -LZZS := $(patsubst %.clamp,%,$(CLAMPS)) $(PURELZZS) +LZZS := $(CLAMPLZZS) $(PURELZZS) LZZHDRS := $(foreach lzz,$(LZZS),$(patsubst %.lzz,%.hh,$(lzz))) LZZSRCS := $(foreach lzz,$(LZZS),$(patsubst %.lzz,%.cc,$(lzz))) LZZOBJS := $(foreach lzz,$(LZZS),$(patsubst %.lzz,%.o,$(lzz))) @@ -40,7 +48,7 @@ OPT := -g3 endif # CXX := $(WTF) ag++ -k --Xcompiler # $(CXX) -CXX := $(WTF) $(CCACHE) $(CXX) -pipe +CXX := $(WTF) $(ACCEL) $(CXX) -pipe CC := $(CXX) # for linking LDFLAGS := -pthread $(GPROF) LDLIBS := -lstx -lst -lresolv -lprotobuf -lgtest \ @@ -78,7 +86,8 @@ -Wlong-long \ -Wvolatile-register-var \ -std=gnu++0x \ - -march=native \ + -m64 \ + -march=$(shell gcc-config march) \ $(CXXFLAGS) # \ @@ -93,9 +102,9 @@ -Wstrict-overflow \ -Winline \ -ifeq ($(NPCH),) +ifneq ($(PCH),) CXXFLAGS := $(CXXFLAGS0) -include pch.h -%.o: pch.h.gch +$(LZZOBJS): pch.h.gch else CXXFLAGS := $(CXXFLAGS0) endif @@ -140,7 +149,7 @@ chmod -w $@ pch.h: - svn ls -rHEAD -R | \ + svn ls -rHEAD -R https://assorted.svn.sourceforge.net/svnroot/assorted/ydb/trunk/src | \ egrep -v '/$$|Makefile' | \ xargs sed 's/.*\binclude\b *<\(.*\)>.*/\#include <\1>/; t succ; d; :succ /commons/ d' | \ sort -u > $@ @@ -151,8 +160,7 @@ clean: rm -rf clamp/ $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) \ - main.lzz ydb.lzz main.cc main.hh ydb.hh ydb.cc \ - util.cc util.hh tpcc.lzz tpcc.hh tpcc.cc + $(CLAMPLZZS) $(LZZHDRS) $(LZZSRCS) make -C tpcc/ clean distclean: clean Modified: ydb/trunk/src/unsetprefs.h =================================================================== --- ydb/trunk/src/unsetprefs.h 2009-03-21 23:23:39 UTC (rev 1320) +++ ydb/trunk/src/unsetprefs.h 2009-03-22 09:06:24 UTC (rev 1321) @@ -0,0 +1,5 @@ +#undef function +#undef shared_ptr +#undef ref +#undef tuple +#undef make_tuple This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-24 00:10:25
|
Revision: 1327 http://assorted.svn.sourceforge.net/assorted/?rev=1327&view=rev Author: yangzhang Date: 2009-03-24 00:10:13 +0000 (Tue, 24 Mar 2009) Log Message: ----------- - Makefile updates: - using bash as shell - using TARGET_ARCH appropriately - using mkdeps.py for automatic dependency generation Modified Paths: -------------- ydb/trunk/src/Makefile Added Paths: ----------- ydb/trunk/src/mkdeps.py Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-03-23 04:50:10 UTC (rev 1326) +++ ydb/trunk/src/Makefile 2009-03-24 00:10:13 UTC (rev 1327) @@ -2,14 +2,16 @@ # Tool configurations # +SHELL := bash WTF := wtf -# CXX := $(WTF) ag++ -k --Xcompiler # $(CXX) ORIGCXX := $(CXX) CCACHE := ccache export CCACHE_PREFIX := distcc CXX := $(WTF) $(CCACHE) $(CXX) -pipe -# CC := $(CXX) # for linking +TARGET_ARCH := $(shell [[ "$$(uname -m)" == x86_64 ]] && echo -m64 || echo -m32 ) \ + -march=$(shell gcc-config march) + WARNINGS = \ -Wall \ -Werror \ @@ -70,13 +72,8 @@ PPROF := -lprofiler endif -ARCH := $(shell gcc-config march) -CXXFLAGS0 = $(OPT) -pthread $(GPROF) \ - $(WARNINGS) \ - -std=gnu++0x \ - -m64 \ - -march=$(ARCH) \ - $(ORIGCXXFLAGS) +CXXFLAGS0 = $(OPT) -MD -pthread $(GPROF) $(WARNINGS) -std=gnu++0x \ + $(ORIGCXXFLAGS) ifneq ($(PCH),) CXXFLAGS = $(CXXFLAGS0) -include pch.h @@ -171,11 +168,8 @@ # Project-specific rules # -stxn.o: main.hh $(PBHDRS) -main.o: util.hh msg.h $(PBHDRS) -util.o: msg.h $(PBHDRS) -ydb.o: main.hh stxn.hh tpcc.hh util.hh $(PBHDRS) -tpcc.o: main.hh util.hh $(PBHDRS) +include $(shell ./mkdeps.py > deps.mk; echo deps.mk) + ydb: $(OBJS) tpcc/%.o: WARNINGS = \ @@ -201,4 +195,5 @@ serperf: ydb.pb.o ser: ydb.pb.o -ser.o: msg.h + +-include *.d Added: ydb/trunk/src/mkdeps.py =================================================================== --- ydb/trunk/src/mkdeps.py (rev 0) +++ ydb/trunk/src/mkdeps.py 2009-03-24 00:10:13 UTC (rev 1327) @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +from __future__ import with_statement +from subprocess import * +from re import * +from path import path + +pwd = path('.') + +def memoized(f): + cache = {} + return lambda *args: cache[args] if args in cache else cache.setdefault(args, f(*args)) + +def settify(f): return lambda *args: set(f(*args)) + +@memoized +@settify +def hdrs(i): + with file(i) as f: + for line in f: + if search(r'# *include +"', line): + yield i.dirname() / sub(r'.*"(.*)".*', r'\1', line.strip()) + +@memoized +def src(i): + if i.endswith('.hh'): + clamp = path(i[:-3] + '.lzz.clamp') + lzz = path(i[:-2] + '.lzz') + if clamp.isfile(): return clamp + if lzz.isfile(): return lzz + return i + +@memoized +@settify +def deps(i): + for hdr in hdrs(i): yield hdr + for hdr in hdrs(i): + if src(hdr).isfile(): + for dep in deps(src(hdr)): + yield dep + +for i in pwd.glob('*.lzz') + pwd.glob('*.lzz.clamp'): + print sub(r'\.lzz(\.clamp)?', '.o', i), ':', ' '.join(deps(i)) + +for i in pwd.glob('*.d'): + with file(i) as f: + for line in f: + for word in line.split(): + if '.clamp' in word: + print sub(r'(\.clamp/(.+)_lambda_.+\.clamp_h)', r'\1: \2.lzz.clamp', word) Property changes on: ydb/trunk/src/mkdeps.py ___________________________________________________________________ Added: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-24 06:11:05
|
Revision: 1330 http://assorted.svn.sourceforge.net/assorted/?rev=1330&view=rev Author: yangzhang Date: 2009-03-24 06:10:53 +0000 (Tue, 24 Mar 2009) Log Message: ----------- - updated build system to first lzz then clamp, fixing the issue of where to place lambdas (header or source) - bunch of physical refactoring, particularly trying to reduce the number of includes in headers Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/main.lzz.clamp ydb/trunk/src/mkdeps.py ydb/trunk/src/msg.h ydb/trunk/src/stxn.lzz.clamp ydb/trunk/src/tpcc.lzz.clamp ydb/trunk/src/ydb.lzz.clamp Added Paths: ----------- ydb/trunk/src/util.lzz.clamp Removed Paths: ------------- ydb/trunk/src/util.lzz Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-03-24 06:10:43 UTC (rev 1329) +++ ydb/trunk/src/Makefile 2009-03-24 06:10:53 UTC (rev 1330) @@ -140,20 +140,41 @@ %.cc: %.cc.cog cog.py $< > $@ -%.cc %.hh: %.lzz - lzz -hx hh -sx cc -hl -sl -hd -sd $< - %.pb.h %.pb.cc: %.proto protoc --cpp_out=. $< -%.lzz: %.lzz.clamp +# ORIG +# +#%.cc %.hh: %.lzz +# lzz -hx hh -sx cc -hl -sl -hd -sd $< +# +#%.lzz: %.lzz.clamp +# rm -f $@ +# mkdir -p .clamp/ +# clamp --outdir .clamp/ --prefix $(basename $@) < $< | \ +# sed "$$( echo -e '1i\\\n\#hdr\n1a\\\n\#end' )" | \ +# sed "$$( echo -e '$$i\\\n\#hdr\n$$a\\\n\#end' )" > $@ +# chmod -w $@ + +%.cc.clamp %.hh.clamp: %.lzz.clamp + ln -sf $< $(basename $<) + rm -f $(basename $(basename $<)).{hh,cc}.clamp + lzz -hx hh.clamp -sx cc.clamp -hd -sd $(basename $<) + chmod -w $(basename $(basename $<)).{hh.clamp,cc.clamp} + +%.cc: %.cc.clamp rm -f $@ mkdir -p .clamp/ - clamp --outdir .clamp/ --prefix $(basename $@) < $< | \ - sed "$$( echo -e '1i\\\n\#hdr\n1a\\\n\#end' )" | \ - sed "$$( echo -e '$$i\\\n\#hdr\n$$a\\\n\#end' )" > $@ + clamp --outdir .clamp/ --prefix $(basename $@)_cc < $< | \ + sed 's/"$(basename $@).hh.clamp"/"$(basename $@).hh"/' > $@ chmod -w $@ +%.hh: %.hh.clamp + rm -f $@ + mkdir -p .clamp/ + clamp --outdir .clamp/ --prefix $(basename $@)_hh < $< > $@ + chmod -w $@ + pch.h: svn ls -rHEAD -R $(SVNURL) | \ egrep -v '/$$|Makefile' | \ @@ -186,7 +207,7 @@ -Wno-unused-parameter clean: - rm -rf .clamp/ $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) $(CLAMPLZZS) + rm -rf .clamp/ $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) $(CLAMPLZZS) *.d *.hh.clamp *.cc.clamp distclean: clean rm -f pch.h pch.h.gch Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-24 06:10:43 UTC (rev 1329) +++ ydb/trunk/src/main.lzz.clamp 2009-03-24 06:10:53 UTC (rev 1330) @@ -1,15 +1,16 @@ #hdr #include "unsetprefs.h" -#include <boost/archive/binary_iarchive.hpp> -#include <boost/archive/binary_oarchive.hpp> #include <boost/tuple/tuple.hpp> -#include <commons/st/st.h> +#include <commons/st/intr.h> +#include <commons/st/sync.h> +#include <commons/st/channel.h> #include <fstream> // ofstream #include <vector> -#include "msg.h" #include "util.hh" #include "setprefs.h" +namespace boost { namespace archive { class binary_oarchive; } } + using namespace boost; using namespace boost::archive; using namespace commons; @@ -21,11 +22,14 @@ #src #include "unsetprefs.h" #include <boost/foreach.hpp> +#include <boost/archive/binary_oarchive.hpp> #include <commons/assert.h> -#include <commons/nullptr.h> #include <commons/time.h> +#include <commons/st/io.h> +#include <commons/st/sockets.h> #include <iostream> #include <unistd.h> // pipe, write, sync +#include "msg.h" #include "setprefs.h" #end @@ -33,6 +37,7 @@ typedef commons::array<char> recovery_t; + // Configuration. st_utime_t timeout; int yield_interval, accept_joiner_seqno, issuing_interval, min_ops, max_ops, @@ -161,29 +166,36 @@ class wal { public: - wal(const string &fname) : of(fname.c_str()), out(of) {} + wal(const string &fname) : + of_(fname.c_str()), + ar_(new binary_oarchive(of())) + {} + ~wal() { delete ar_; } template <typename T> - void log(const T &msg) { ser(of, msg); } + void log(const T &msg) { ser(of(), msg); } void logbuf(const ser_t &s) { logbuf(s.data(), s.size()); } void logbuf(const void *buf, size_t len) { - of.write(reinterpret_cast<const char*>(buf), len); + of().write(reinterpret_cast<const char*>(buf), len); } void logdel(int key) { int op = op_del; // TODO: is this really necessary? - out & op & key; + ar() & op & key; } void logwrite(int key, int val) { int op = op_write; - out & op & key & val; + ar() & op & key & val; } void logcommit() { int op = op_commit; - out & op; + ar() & op; } - void flush() { of.flush(); } + void flush() { of().flush(); } private: - ofstream of; - binary_oarchive out; + ofstream of_; + //unique_ptr<binary_oarchive> ar_; + binary_oarchive *ar_; + ofstream &of() { return of_; } + binary_oarchive &ar() { return *ar_; }; }; // TODO? Modified: ydb/trunk/src/mkdeps.py =================================================================== --- ydb/trunk/src/mkdeps.py 2009-03-24 06:10:43 UTC (rev 1329) +++ ydb/trunk/src/mkdeps.py 2009-03-24 06:10:53 UTC (rev 1330) @@ -39,12 +39,17 @@ for dep in deps(src(hdr)): yield dep -for i in pwd.glob('*.lzz') + pwd.glob('*.lzz.clamp'): - print sub(r'\.lzz(\.clamp)?', '.o', i), ':', ' '.join(deps(i)) +for i in pwd.glob('*.lzz.clamp'): + print sub(r'\.lzz\.clamp', '.o', i), ':', sub(r'\.lzz\.clamp', '.hh', i), ' '.join(deps(i)) for i in pwd.glob('*.d'): with file(i) as f: for line in f: for word in line.split(): - if '.clamp' in word: - print sub(r'(\.clamp/(.+)_lambda_.+\.clamp_h)', r'\1: \2.lzz.clamp', word) + if '.clamp/' in word: + if '_hh_lambda_' in word: + print sub(r'(\.clamp/(.+)_hh_lambda_.+\.clamp_h)', r'\1: \2.hh.clamp', word) + elif '_cc_lambda_' in word: + print sub(r'(\.clamp/(.+)_cc_lambda_.+\.clamp_h)', r'\1: \2.cc.clamp', word) + else: + print sub(r'(\.clamp/(.+)_lambda_.+\.clamp_h)', r'\1: \2.lzz.clamp', word) Modified: ydb/trunk/src/msg.h =================================================================== --- ydb/trunk/src/msg.h 2009-03-24 06:10:43 UTC (rev 1329) +++ ydb/trunk/src/msg.h 2009-03-24 06:10:53 UTC (rev 1330) @@ -3,7 +3,7 @@ #include <commons/array.h> #include <commons/exceptions.h> -#include <commons/st/st.h> +#include <commons/st/reader.h> #include <commons/streamwriter.h> #include <commons/utility.h> #include <iomanip> Modified: ydb/trunk/src/stxn.lzz.clamp =================================================================== --- ydb/trunk/src/stxn.lzz.clamp 2009-03-24 06:10:43 UTC (rev 1329) +++ ydb/trunk/src/stxn.lzz.clamp 2009-03-24 06:10:53 UTC (rev 1330) @@ -1,13 +1,15 @@ #hdr #include "unsetprefs.h" #include <boost/bind.hpp> +#include <boost/foreach.hpp> +#include <boost/tuple/tuple.hpp> +#include <commons/array.h> #include <commons/memory.h> -#include <boost/foreach.hpp> +#include <commons/rand.h> #include <commons/snap_map.h> -#include <commons/rand.h> -#include <commons/array.h> +#include <commons/time.h> #include <google/dense_hash_map> -#include <boost/tuple/tuple.hpp> +#include "msg.h" #include "util.hh" #include "main.hh" #include "setprefs.h" @@ -89,6 +91,9 @@ issue_txns(st_channel<replica_info> &newreps, int &seqno, st_bool &accept_joiner) { + USE(newreps); + USE(seqno); + USE(accept_joiner); typedef typename Types::TxnBatch TxnBatch; typedef typename Types::Txn Txn; typedef typename Types::Op Op; @@ -255,6 +260,10 @@ process_txn(mii &map, const typename Types::Txn &txn, int &seqno, typename RTypes::Response *res) { + USE(map); + USE(txn); + USE(seqno); + USE(res); typedef typename Types::Txn Txn; typedef typename Types::Op Op; checkeq(txn.seqno(), seqno + 1); @@ -338,6 +347,14 @@ st_channel<chunk> &backlog, int init_seqno, int mypos, int nnodes) { + USE(leader); + USE(map); + USE(seqno); + USE(send_states); + USE(backlog); + USE(init_seqno); + USE(mypos); + USE(nnodes); typedef typename Types::TxnBatch TxnBatch; typedef typename Types::Txn Txn; typedef typename Types::Op Op; Modified: ydb/trunk/src/tpcc.lzz.clamp =================================================================== --- ydb/trunk/src/tpcc.lzz.clamp 2009-03-24 06:10:43 UTC (rev 1329) +++ ydb/trunk/src/tpcc.lzz.clamp 2009-03-24 06:10:53 UTC (rev 1330) @@ -11,6 +11,8 @@ #src #include "unsetprefs.h" #include <commons/memory.h> +#include <commons/st/io.h> +#include <commons/time.h> #include <string> #include "tpcc/clock.h" #include "tpcc/randomgenerator.h" @@ -18,6 +20,7 @@ #include "tpcc/tpccdb.h" #include "tpcc/tpccgenerator.h" #include "tpcc/tpcctables.h" +#include "msg.h" #include "setprefs.h" #end @@ -32,6 +35,16 @@ unique_ptr<TPCCTables> g_tables; namespace { +class st_bcast { + const vector<st_netfd_t> &fds_; +public: + st_bcast(const vector<st_netfd_t> &fds) : fds_(fds) {} + void operator()(const void *buf, size_t len) { + foreach (st_netfd_t dst, fds_) + st_timed_write(dst, buf, len); + } +}; + class st_tpcc : public TPCCDB { private: @@ -43,10 +56,7 @@ public: st_tpcc(const vector<st_netfd_t> &fds) : a_(buf_size), - writer_(lambda(const void *buf, size_t len) { - foreach (st_netfd_t dst, __ref(fds)) - st_timed_write(dst, buf, len); - }, a_, buf_size) {} + writer_(st_bcast(fds), a_, buf_size) {} void flush() { writer_.mark_and_flush(); } void set_seqno(int seqno) { seqno_ = seqno; } @@ -821,4 +831,3 @@ ASSERT(false); } } - Deleted: ydb/trunk/src/util.lzz =================================================================== --- ydb/trunk/src/util.lzz 2009-03-24 06:10:43 UTC (rev 1329) +++ ydb/trunk/src/util.lzz 2009-03-24 06:10:53 UTC (rev 1330) @@ -1,503 +0,0 @@ -#hdr -#include "unsetprefs.h" -#include <cstring> // size_t -#include <ios> // streamoff -#include <st.h> -#include <string> -#include <map> -#include <set> -#include <utility> -#include <boost/scoped_array.hpp> -#include <commons/array.h> -#include <commons/nullptr.h> -#include <commons/time.h> -#include <google/protobuf/io/zero_copy_stream_impl.h> -#include "msg.h" -using namespace std; -using namespace boost; -using namespace ydb::msg; -using namespace google::protobuf::io; -#end -#src -#include <sys/socket.h> // getpeername -#include <gtest/gtest.h> -#include <netinet/in.h> // in_addr etc. -#end - -using namespace testing; - -// -// Globals -// - -bool fake_bcast, profile_threads, multirecover, debug_threads; -size_t buf_size; -long long write_thresh; - -// -// Display -// - -void -showdatarate(const char *action, streamoff len, long long time) -{ - cout << action << " of " << len << " bytes in " << time << " ms (" - << double(len) / double(time) / 1000 << " MB/s)" << endl; -} - -void -showdatarate(const char *action, size_t len, long long time) -{ - cout << action << " of " << len << " bytes in " << time << " ms (" - << double(len) / double(time) / 1000 << " MB/s)" << endl; -} - -void -showtput(const char *action, long long stop_time, long long start_time, - int stop_count, int start_count) -{ - long long time_diff = stop_time - start_time; - int count_diff = stop_count - start_count; - double rate = double(count_diff) * 1000. / double(time_diff); - cout << action << " " << count_diff << " txns [" - << start_count << ".." << stop_count - << "] in " << time_diff << " ms [" - << start_time << ".." << stop_time - << "] (" - << rate << " tps)" << endl; -} - -// -// Calculations -// - -pair<size_t, size_t> -recovery_range(size_t size, int mypos, int nnodes) -{ - return make_pair(multirecover ? size * mypos / size_t(nnodes) : 0, - multirecover ? size * (mypos + 1) / size_t(nnodes) : size); -} - -inline bool -check_interval(int seqno, int interval) -{ - return interval > 0 && seqno % interval == interval - 1; -} - -/** - * Return range * part / nparts, but with proper casting. Assumes that part < - * nparts. - */ -inline int -interp(int range, int part, int nparts) { - return static_cast<int>(static_cast<long long>(range) * part / nparts); -} - -#src -TEST(interp_test, basics) { - EXPECT_EQ(0, interp(3, 0, 3)); - EXPECT_EQ(1, interp(3, 1, 3)); - EXPECT_EQ(2, interp(3, 2, 3)); - EXPECT_EQ(3, interp(3, 3, 3)); - - EXPECT_EQ(0, interp(RAND_MAX, 0, 2)); - EXPECT_EQ(RAND_MAX / 2, interp(RAND_MAX, 1, 2)); - EXPECT_EQ(RAND_MAX, interp(RAND_MAX, 2, 2)); -} -#end - -/** - * Convenience function for calculating percentages. - */ -template<typename T> -inline double pct(T sub, T tot) -{ - return 100 * double(sub) / double(tot); -} - -// -// ST IO -// - -/** - * Perform an st_write but warn if it took over write_thresh ms. - */ -void -st_timed_write(st_netfd_t dst, const void *buf, size_t len) -{ - long long before_write = -1; - if (write_thresh > 0) { - before_write = current_time_millis(); - } - - checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), - static_cast<ssize_t>(len)); - - if (write_thresh > 0) { - long long write_time = current_time_millis() - before_write; - if (write_time > write_thresh) { - cout << "thread " << threadname() << " write of " << len - << " bytes to dst " << show_sockaddr(dst) << " blocked for " - << write_time << " ms" << endl; - } - } -} - -// -// ST Sockets -// - -char * -show_sockaddr(st_netfd_t fd) -{ - sockaddr_in sa; - socklen_t salen = sizeof sa; - check0x(getpeername(st_netfd_fileno(fd), - reinterpret_cast<sockaddr*>(&sa), - &salen)); - return inet_ntoa(sa.sin_addr); -} - -inline const string& -nfd2name(st_netfd_t fd) -{ - return nfdnames[fd]; -} - -map<st_netfd_t, string> nfdnames; - -// -// ST Threads -// - -/** - * The list of all threads. Keep track of these so that we may cleanly shut - * down all threads. - */ -set<st_thread_t> threads; - -/** - * RAII for adding/removing the current thread from the global threads set. - */ -class thread_eraser -{ - public: - thread_eraser() { threads.insert(st_thread_self()); } - ~thread_eraser() { threads.erase(st_thread_self()); } -}; - -/** - * For debug/error-printing purposes. - */ -map<st_thread_t, string> threadnames; -st_thread_t last_thread; - -/** - * For profiling. - */ -map<st_thread_t, long long> threadtimes; -long long thread_start_time; - -/** - * Look up thread name, or just show thread ID. - */ -inline string -threadname(st_thread_t t = st_thread_self()) { - if (threadnames.find(t) != threadnames.end()) { - return threadnames[t]; - } else { - return lexical_cast<string>(t); - } -} - -/** - * Debug function for thread names. Remember what we're switching from. - */ -inline void -switch_out_cb() -{ - if (debug_threads) last_thread = st_thread_self(); - if (profile_threads) - threadtimes[st_thread_self()] += current_time_millis() - thread_start_time; -} - -/** - * Debug function for thread names. Show what we're switching from/to. - */ -inline void switch_in_cb() -{ - if (debug_threads && last_thread != st_thread_self()) { - cout << "switching"; - if (last_thread != 0) cout << " from " << threadname(last_thread); - cout << " to " << threadname() << endl; - } - if (profile_threads) - thread_start_time = current_time_millis(); -} - -/** - * Print to cerr a thread exception. - */ -ostream& -cerr_thread_ex(const std::exception &ex) -{ - return cerr << "exception in thread " << threadname() - << ": " << ex.what(); -} - -// -// Serialization -// - -/** - * Adapter for arrays to look like strings (for PB serialization). - */ -class ser_array -{ - commons::array<char> a_; - size_t size_; -public: - ser_array(size_t size = buf_size) : a_(size), size_(0) {} - char *data() const { return a_.get(); } - size_t size() const { return size_; } - void clear() { size_ = 0; } - void stretch(size_t size) { - if (size > a_.size()) - a_.reset(new char[size], size); - size_ = size; - } -}; - -//typedef string ser_t; -typedef ser_array ser_t; - -template<typename T> -void -ser(writer &w, const T &msg) -{ - uint32_t len = msg.ByteSize(); - w.mark(); - w.reserve(len); - check(msg.SerializeToArray(w.cur(), len)); - w.skip(len); -} - -/** - * Serialization. - * - * TODO: experiment with which method is the fastest: using a string as shown - * here or computing the bytesize then allocating (or grabbing/reserving) the - * array. - */ -template<typename T> -void -ser(string &s, const T &msg) -{ - // Serialize message to a buffer. - uint32_t len; - s.append(sizeof len, '\0'); - check(msg.AppendToString(&s)); - - // Warn if the message is large. - if (s.size() > 1000000) - cout << "serializing large message of " << s.size() << " bytes" << endl; - - // Prefix the message with a four-byte length. - len = htonl(static_cast<uint32_t>(s.size() - sizeof len)); - char *plen = reinterpret_cast<char*>(&len); - copy(plen, plen + sizeof len, s.begin()); -} - -template<typename T> -inline void -ser(ser_array &s, const T &msg) -{ - int len = msg.ByteSize(); - - // Grow the array as needed. - s.stretch(len + sizeof(uint32_t)); - - // Serialize message to a buffer with four-byte length prefix. - check(msg.SerializeToArray(s.data() + sizeof(uint32_t), len)); - *reinterpret_cast<uint32_t*>(s.data()) = htonl(uint32_t(len)); -} - -/** - * Serialization. - */ -template<typename T> -inline void -ser(ostream &s, const T &msg) -{ - uint32_t len = htonl(uint32_t(msg.ByteSize())); - s.write(reinterpret_cast<const char*>(&len), sizeof len); - check(msg.SerializeToOstream(&s)); -} - -// -// Messaging -// - -/** - * Send a message to some destinations. - */ -inline void -bcastbuf(const vector<st_netfd_t> &dsts, const ser_t &msg) -{ - if (!fake_bcast) { - foreach (st_netfd_t dst, dsts) { - st_timed_write(dst, msg.data(), msg.size()); - } - } -} - -/** - * Send a message to some destinations, using whichever method of network IO - * was chosen (sync or async). - */ -template<typename T> -inline void -bcastmsg(const vector<st_netfd_t> &dsts, const T &msg) -{ - ser_t s; - ser(s, msg); - bcastbuf(dsts, s); -} - -/** - * Send a message to a single recipient. - */ -inline void -sendbuf(st_netfd_t dst, const ser_t &msg) -{ - if (!fake_bcast) - st_timed_write(dst, msg.data(), msg.size()); -} - -/** - * Send a message to a single recipient. - */ -template<typename T> -inline void -sendmsg(st_netfd_t dst, const T &msg) -{ - ser_t s; - ser(s, msg); - sendbuf(dst, s); -} - -/** - * Read a message. This is done in two steps: first by reading the length - * prefix, then by reading the actual body. This function also provides a way - * to measure how much time is spent actually reading the message from the - * network. Such measurement only makes sense for large messages which take a - * long time to receive. - * - * \param[in] src The socket from which to read. - * - * \param[in] msg The protobuf to read into. - * - * \param[out] start_time If not null, record the time at which we start to - * receive the message (after the length is received). - * - * \param[out] stop_time If not null, record the time at which we finish - * receiving the message (before we deserialize the protobuf). - * - * \param[out] len If not null, record the size of the serialized message - * in bytes. - * - * \param[in] timeout on each of the two read operations (first one is on - * length, second one is on the rest). - * - * \return The length of the serialized message. - */ -template <typename T> -size_t -readmsg(st_netfd_t src, T & msg, long long *start_time = nullptr, long long - *stop_time = nullptr, st_utime_t timeout = ST_UTIME_NO_TIMEOUT) -{ - // Read the message length. - uint32_t len; - checkeqnneg(st_read_fully(src, static_cast<void*>(&len), sizeof len, - timeout), - static_cast<ssize_t>(sizeof len)); - if (start_time != nullptr) - *start_time = current_time_millis(); - len = ntohl(len); - - // Parse the message body. Try stack-allocation if possible. - scoped_array<char> sbuf; - char *buf; - if (len <= 4096) buf = reinterpret_cast<char*>(alloca(len)); - else sbuf.reset(buf = new char[len]); - checkeqnneg(st_read_fully(src, buf, len, timeout), int(len)); - if (stop_time != nullptr) - *stop_time = current_time_millis(); - check(msg.ParseFromArray(buf, len)); - - return len; -} - -/** - * Same as the above readmsg(), but returns an internally constructed message. - * This is a "higher-level" readmsg() that relies on return-value optimization - * for avoiding unnecessary copies. - */ -template <typename T> -inline T -readmsg(st_netfd_t src, st_utime_t timeout = ST_UTIME_NO_TIMEOUT) -{ - T msg; - readmsg(src, msg, nullptr, nullptr, timeout); - return msg; -} - -/** - * Same as the above readmsg() but uses an st_reader instead of a raw - * st_netfd_t. - */ -template <typename T> -inline void -readmsg(st_reader &src, T & msg) -{ - managed_array<char> a = src.read(sizeof(uint32_t)); - uint32_t len = ntohl(*reinterpret_cast<const uint32_t*>(a.get())); - check(msg.ParseFromArray(src.read(len), len)); -} - -template<typename T> -inline void -readmsg(anchored_stream_reader &src, T &msg) -{ - uint32_t len = ntohl(src.read<uint32_t>()); - check(msg.ParseFromArray(checkpass(src.read(len)), len)); -} - -template<typename T> -inline void -readmsg(istream &src, T &msg) -{ - uint32_t len; - src.read(reinterpret_cast<char*>(&len), sizeof len); - len = ntohl(len); -#if 0 - IstreamInputStream iis(&src); - LimitingInputStream lis(&iis, len); - check(msg.ParseFromZeroCopyStream(&lis)); -#else - char buf[len]; - src.read(buf, len); - check(msg.ParseFromArray(buf, len)); -#endif -} - -inline uint32_t -readlen(istream &src) -{ - uint32_t len; - src.read(reinterpret_cast<char*>(&len), sizeof len); - len = ntohl(len); - ASSERT(len < 10000); - return len; -} - Added: ydb/trunk/src/util.lzz.clamp =================================================================== --- ydb/trunk/src/util.lzz.clamp (rev 0) +++ ydb/trunk/src/util.lzz.clamp 2009-03-24 06:10:53 UTC (rev 1330) @@ -0,0 +1,534 @@ +#hdr +#include "unsetprefs.h" +#include <cstring> // size_t +#include <iosfwd> // streamoff +#include <st.h> +#include <string> +#include <map> +#include <set> +#include <utility> // pair +#include <vector> +#include <commons/array.h> +#include <commons/delegates.h> +#include <commons/nullptr.h> +#include <arpa/inet.h> // htonl, ntohl +//#include <commons/st/st.h> + +using namespace std; +using namespace boost; +using namespace commons; + +namespace commons { + class st_reader; + class stream_writer; + class anchored_stream_reader; +} +namespace ydb { namespace msg { typedef stream_writer writer; } } +using namespace ydb::msg; +namespace google { namespace protobuf { class Message; } } +using google::protobuf::Message; +#end + +#src +#include "unsetprefs.h" +#include <boost/foreach.hpp> +#include <boost/lexical_cast.hpp> +#include <boost/scoped_array.hpp> +#include <commons/st/reader.h> +#include <commons/st/threads.h> +#include <commons/streamreader.h> +#include <commons/streamwriter.h> +#include <commons/time.h> +#include <sys/socket.h> // getpeername +#include <gtest/gtest.h> +#include <netinet/in.h> // in_addr etc. +#include <google/protobuf/message.h> +//#include <google/protobuf/io/zero_copy_stream_impl.h> +//using namespace google::protobuf::io; +#include "setprefs.h" +#end + +#if 1 + +// +// Globals +// + +bool fake_bcast, profile_threads, multirecover, debug_threads; +size_t buf_size; +long long write_thresh; + +// +// Display +// + +void +showdatarate(const char *action, streamoff len, long long time) +{ + cout << action << " of " << len << " bytes in " << time << " ms (" + << double(len) / double(time) / 1000 << " MB/s)" << endl; +} + +void +showdatarate(const char *action, size_t len, long long time) +{ + cout << action << " of " << len << " bytes in " << time << " ms (" + << double(len) / double(time) / 1000 << " MB/s)" << endl; +} + +void +showtput(const char *action, long long stop_time, long long start_time, + int stop_count, int start_count) +{ + long long time_diff = stop_time - start_time; + int count_diff = stop_count - start_count; + double rate = double(count_diff) * 1000. / double(time_diff); + cout << action << " " << count_diff << " txns [" + << start_count << ".." << stop_count + << "] in " << time_diff << " ms [" + << start_time << ".." << stop_time + << "] (" + << rate << " tps)" << endl; +} + +// +// Calculations +// + +pair<size_t, size_t> +recovery_range(size_t size, int mypos, int nnodes) +{ + return make_pair(multirecover ? size * mypos / size_t(nnodes) : 0, + multirecover ? size * (mypos + 1) / size_t(nnodes) : size); +} + +inline bool +check_interval(int seqno, int interval) +{ + return interval > 0 && seqno % interval == interval - 1; +} + +/** + * Return range * part / nparts, but with proper casting. Assumes that part < + * nparts. + */ +inline int +interp(int range, int part, int nparts) { + return static_cast<int>(static_cast<long long>(range) * part / nparts); +} + +#src +TEST(interp_test, basics) { + EXPECT_EQ(0, interp(3, 0, 3)); + EXPECT_EQ(1, interp(3, 1, 3)); + EXPECT_EQ(2, interp(3, 2, 3)); + EXPECT_EQ(3, interp(3, 3, 3)); + + EXPECT_EQ(0, interp(RAND_MAX, 0, 2)); + EXPECT_EQ(RAND_MAX / 2, interp(RAND_MAX, 1, 2)); + EXPECT_EQ(RAND_MAX, interp(RAND_MAX, 2, 2)); +} +#end + +/** + * Convenience function for calculating percentages. + */ +template<typename T> +inline double pct(T sub, T tot) +{ + return 100 * double(sub) / double(tot); +} + +// +// ST IO +// + +/** + * Perform an st_write but warn if it took over write_thresh ms. + */ +void +st_timed_write(st_netfd_t dst, const void *buf, size_t len) +{ + long long before_write = -1; + if (write_thresh > 0) { + before_write = current_time_millis(); + } + + checkeqnneg(st_write(dst, buf, len, ST_UTIME_NO_TIMEOUT), + static_cast<ssize_t>(len)); + + if (write_thresh > 0) { + long long write_time = current_time_millis() - before_write; + if (write_time > write_thresh) { + cout << "thread " << threadname() << " write of " << len + << " bytes to dst " << show_sockaddr(dst) << " blocked for " + << write_time << " ms" << endl; + } + } +} + +// +// ST Sockets +// + +char * +show_sockaddr(st_netfd_t fd) +{ + sockaddr_in sa; + socklen_t salen = sizeof sa; + check0x(getpeername(st_netfd_fileno(fd), + reinterpret_cast<sockaddr*>(&sa), + &salen)); + return inet_ntoa(sa.sin_addr); +} + +inline const string& +nfd2name(st_netfd_t fd) +{ + return nfdnames[fd]; +} + +map<st_netfd_t, string> nfdnames; + +// +// ST Threads +// + +/** + * The list of all threads. Keep track of these so that we may cleanly shut + * down all threads. + */ +set<st_thread_t> threads; + +/** + * RAII for adding/removing the current thread from the global threads set. + */ +class thread_eraser +{ + public: + thread_eraser() { threads.insert(st_thread_self()); } + ~thread_eraser() { threads.erase(st_thread_self()); } +}; + +/** + * For debug/error-printing purposes. + */ +typedef map<st_thread_t, string> threadnames_t; +threadnames_t threadnames; +st_thread_t last_thread; + +/** + * For profiling. + */ +map<st_thread_t, long long> threadtimes; +long long thread_start_time; + +/** + * Look up thread name, or just show thread ID. + */ +const string & +threadname(st_thread_t t = st_thread_self()) { + threadnames_t::iterator it = threadnames.find(t); + if (it == threadnames.end()) { + return threadnames[t] = lexical_cast<string>(t); + } else { + return it->second; + } +} + +/** + * Debug function for thread names. Remember what we're switching from. + */ +void +switch_out_cb() +{ + if (debug_threads) last_thread = st_thread_self(); + if (profile_threads) + threadtimes[st_thread_self()] += current_time_millis() - thread_start_time; +} + +/** + * Debug function for thread names. Show what we're switching from/to. + */ +void +switch_in_cb() +{ + if (debug_threads && last_thread != st_thread_self()) { + cout << "switching"; + if (last_thread != 0) cout << " from " << threadname(last_thread); + cout << " to " << threadname() << endl; + } + if (profile_threads) + thread_start_time = current_time_millis(); +} + +/** + * Print to cerr a thread exception. + */ +ostream& +cerr_thread_ex(const std::exception &ex) +{ + return cerr << "exception in thread " << threadname() + << ": " << ex.what(); +} + +/** + * Delegate for running thread targets. + * \param[in] f The function to execute. + */ +void +my_spawn_helper(const fn f) +{ + thread_eraser eraser; + try { f(); } + catch (std::exception &ex) { cerr_thread_ex(ex) << endl; } +} + +/** + * Spawn a thread using ST but wrap it in an exception handler that interrupts + * all other threads (hopefully causing them to unwind). + * \param[in] f The function to execute. + */ +st_thread_t +my_spawn(const fn &f, string name) +{ + st_thread_t t = st_spawn(bind(my_spawn_helper, f)); + threads.insert(t); + threadnames[t] = name; + return t; +} + + +// +// Serialization +// + +/** + * Adapter for arrays to look like strings (for PB serialization). + */ +class ser_array +{ + commons::array<char> a_; + size_t size_; +public: + ser_array(size_t size = buf_size) : a_(size), size_(0) {} + char *data() const { return a_.get(); } + size_t size() const { return size_; } + void clear() { size_ = 0; } + void stretch(size_t size) { + if (size > a_.size()) + a_.reset(new char[size], size); + size_ = size; + } +}; + +//typedef string ser_t; +typedef ser_array ser_t; + +void +ser(writer &w, const Message &msg) +{ + uint32_t len = msg.ByteSize(); + w.mark(); + w.reserve(len); + check(msg.SerializeToArray(w.cur(), len)); + w.skip(len); +} + +/** + * Serialization. + * + * TODO: experiment with which method is the fastest: using a string as shown + * here or computing the bytesize then allocating (or grabbing/reserving) the + * array. + */ +void +ser(string &s, const Message &msg) +{ + // Serialize message to a buffer. + uint32_t len; + s.append(sizeof len, '\0'); + check(msg.AppendToString(&s)); + + // Warn if the message is large. + if (s.size() > 1000000) + cout << "serializing large message of " << s.size() << " bytes" << endl; + + // Prefix the message with a four-byte length. + len = htonl(static_cast<uint32_t>(s.size() - sizeof len)); + char *plen = reinterpret_cast<char*>(&len); + copy(plen, plen + sizeof len, s.begin()); +} + +void +ser(ser_array &s, const Message &msg) +{ + int len = msg.ByteSize(); + + // Grow the array as needed. + s.stretch(len + sizeof(uint32_t)); + + // Serialize message to a buffer with four-byte length prefix. + check(msg.SerializeToArray(s.data() + sizeof(uint32_t), len)); + *reinterpret_cast<uint32_t*>(s.data()) = htonl(uint32_t(len)); +} + +/** + * Serialization. + */ +void +ser(ostream &s, const Message &msg) +{ + uint32_t len = htonl(uint32_t(msg.ByteSize())); + s.write(reinterpret_cast<const char*>(&len), sizeof len); + check(msg.SerializeToOstream(&s)); +} + +// +// Messaging +// + +/** + * Send a message to some destinations. + */ +void +bcastbuf(const vector<st_netfd_t> &dsts, const ser_t &msg) +{ + if (!fake_bcast) { + foreach (st_netfd_t dst, dsts) { + st_timed_write(dst, msg.data(), msg.size()); + } + } +} + +/** + * Send a message to some destinations, using whichever method of network IO + * was chosen (sync or async). + */ +void +bcastmsg(const vector<st_netfd_t> &dsts, const Message &msg) +{ + ser_t s; + ser(s, msg); + bcastbuf(dsts, s); +} + +/** + * Send a message to a single recipient. + */ +void +sendbuf(st_netfd_t dst, const ser_t &msg) +{ + if (!fake_bcast) + st_timed_write(dst, msg.data(), msg.size()); +} + +/** + * Send a message to a single recipient. + */ +void +sendmsg(st_netfd_t dst, const Message &msg) +{ + ser_t s; + ser(s, msg); + sendbuf(dst, s); +} + +/** + * Read a message. This is done in two steps: first by reading the length + * prefix, then by reading the actual body. This function also provides a way + * to measure how much time is spent actually reading the message from the + * network. Such measurement only makes sense for large messages which take a + * long time to receive. + * + * \param[in] src The socket from which to read. + * + * \param[in] msg The protobuf to read into. + * + * \param[out] start_time If not null, record the time at which we start to + * receive the message (after the length is received). + * + * \param[out] stop_time If not null, record the time at which we finish + * receiving the message (before we deserialize the protobuf). + * + * \param[out] len If not null, record the size of the serialized message + * in bytes. + * + * \param[in] timeout on each of the two read operations (first one is on + * length, second one is on the rest). + * + * \return The length of the serialized message. + */ +size_t +readmsg(st_netfd_t src, Message &msg, long long *start_time = nullptr, long long + *stop_time = nullptr, st_utime_t timeout = ST_UTIME_NO_TIMEOUT) +{ + // Read the message length. + uint32_t len; + checkeqnneg(st_read_fully(src, static_cast<void*>(&len), sizeof len, + timeout), + static_cast<ssize_t>(sizeof len)); + if (start_time != nullptr) + *start_time = current_time_millis(); + len = ntohl(len); + + // Parse the message body. Try stack-allocation if possible. + scoped_array<char> sbuf; + char *buf; + if (len <= 4096) buf = reinterpret_cast<char*>(alloca(len)); + else sbuf.reset(buf = new char[len]); + checkeqnneg(st_read_fully(src, buf, len, timeout), int(len)); + if (stop_time != nullptr) + *stop_time = current_time_millis(); + check(msg.ParseFromArray(buf, len)); + + return len; +} + +/** + * Same as the above readmsg() but uses an st_reader instead of a raw + * st_netfd_t. + */ +void +readmsg(st_reader &src, Message &msg) +{ + managed_array<char> a = src.read(sizeof(uint32_t)); + uint32_t len = ntohl(*reinterpret_cast<const uint32_t*>(a.get())); + check(msg.ParseFromArray(src.read(len), len)); +} + +void +readmsg(anchored_stream_reader &src, Message &msg) +{ + uint32_t len = ntohl(src.read<uint32_t>()); + check(msg.ParseFromArray(checkpass(src.read(len)), len)); +} + +void +readmsg(istream &src, Message &msg) +{ + uint32_t len; + src.read(reinterpret_cast<char*>(&len), sizeof len); + len = ntohl(len); +#if 0 + IstreamInputStream iis(&src); + LimitingInputStream lis(&iis, len); + check(msg.ParseFromZeroCopyStream(&lis)); +#else + char buf[len]; + src.read(buf, len); + check(msg.ParseFromArray(buf, len)); +#endif +} + +inline uint32_t +readlen(istream &src) +{ + uint32_t len; + src.read(reinterpret_cast<char*>(&len), sizeof len); + len = ntohl(len); + ASSERT(len < 10000); + return len; +} + +#endif Modified: ydb/trunk/src/ydb.lzz.clamp =================================================================== --- ydb/trunk/src/ydb.lzz.clamp 2009-03-24 06:10:43 UTC (rev 1329) +++ ydb/trunk/src/ydb.lzz.clamp 2009-03-24 06:10:53 UTC (rev 1330) @@ -6,7 +6,6 @@ #include <string> #include <iostream> #include <st.h> -#include <commons/st/st.h> #include "tpcc/clock.h" #include "tpcc/randomgenerator.h" #include "tpcc/tpccclient.h" @@ -26,54 +25,23 @@ #include "unsetprefs.h" #include <csignal> // sigaction, etc. #include <cstring> // strsignal, size_t +#include <boost/archive/binary_iarchive.hpp> #include <boost/program_options.hpp> #include <gtest/gtest.h> #include <malloc.h> #include <string> +#include <commons/st/io.h> +#include <commons/st/sockets.h> +#include <commons/st/threads.h> #include "setprefs.h" #end using namespace google; using namespace testing; +using namespace boost::archive; -// -// Utilities/system -// - +namespace { /** - * Delegate for running thread targets. - * \param[in] f The function to execute. - * \param[in] intr Whether to signal stop_hub on an exception. - */ -void -my_spawn_helper(const function0<void> f, bool intr) -{ - thread_eraser eraser; - try { - f(); - } catch (std::exception &ex) { - cerr_thread_ex(ex) << (intr ? "; interrupting!" : "") << endl; - if (intr) stop_hub.set(); - } -} - -/** - * Spawn a thread using ST but wrap it in an exception handler that interrupts - * all other threads (hopefully causing them to unwind). - * \param[in] f The function to execute. - * \param[in] intr Whether to signal stop_hub on an exception. Not actually - * used anywhere. - */ -st_thread_t -my_spawn(const function0<void> &f, string name, bool intr = false) -{ - st_thread_t t = st_spawn(bind(my_spawn_helper, f, intr)); - threads.insert(t); - threadnames[t] = name; - return t; -} - -/** * Memory monitor. */ void @@ -127,6 +95,7 @@ } } } +} // // Main @@ -361,13 +330,13 @@ if (use_pb_res) { run_leader<pb_traits, pb_traits>(minreps, leader_port); } else { - run_leader<pb_traits, rb_traits>(minreps, leader_port); + //run_leader<pb_traits, rb_traits>(minreps, leader_port); } } else { if (use_pb_res) { - run_leader<rb_traits, pb_traits>(minreps, leader_port); + //run_leader<rb_traits, pb_traits>(minreps, leader_port); } else { - run_leader<rb_traits, rb_traits>(minreps, leader_port); + //run_leader<rb_traits, rb_traits>(minreps, leader_port); } } } else { @@ -375,13 +344,13 @@ if (use_pb_res) { run_replica<pb_traits, pb_traits>(leader_host, leader_port, listen_port); } else { - run_replica<pb_traits, rb_traits>(leader_host, leader_port, listen_port); + //run_replica<pb_traits, rb_traits>(leader_host, leader_port, listen_port); } } else { if (use_pb_res) { - run_replica<rb_traits, pb_traits>(leader_host, leader_port, listen_port); + //run_replica<rb_traits, pb_traits>(leader_host, leader_port, listen_port); } else { - run_replica<rb_traits, rb_traits>(leader_host, leader_port, listen_port); + //run_replica<rb_traits, rb_traits>(leader_host, leader_port, listen_port); } } } @@ -394,6 +363,17 @@ } } +#if 0 +template<typename Types, typename RTypes> +void +run_leader(int minreps, uint16_t leader_port); +template<typename Types, typename RTypes> +void +run_replica(string leader_host, uint16_t leader_port, uint16_t listen_port); +#endif + +#if 1 +namespace { /** * Run the leader. */ @@ -415,6 +395,7 @@ vector<replica_info> replicas; st_closing_all_infos close_replicas(replicas); cout << "waiting for at least " << minreps << " replicas to join" << endl; + Join join; for (int i = 0; i < minreps; ++i) { st_netfd_t fd; { @@ -422,7 +403,7 @@ fd = checkerr(st_accept(listener, nullptr, nullptr, ST_UTIME_NO_TIMEOUT)); } - Join join = readmsg<Join>(fd); + readmsg(fd, join); replicas.push_back(replica_info(fd, static_cast<uint16_t>(join.port()))); } cout << "got all " << minreps << " replicas" << endl; @@ -487,7 +468,8 @@ else throw break_exception(); } - Join join = readmsg<Join>(joiner); + Join join; + readmsg(joiner, join); replicas.push_back(replica_info(joiner, static_cast<uint16_t>(join.port()))); cout << "setting seqno to " << seqno << endl; init.set_txnseqno(seqno); @@ -1072,3 +1054,5 @@ stop_hub.insert(st_thread_self()); } +} +#endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-24 07:54:52
|
Revision: 1331 http://assorted.svn.sourceforge.net/assorted/?rev=1331&view=rev Author: yangzhang Date: 2009-03-24 07:54:47 +0000 (Tue, 24 Mar 2009) Log Message: ----------- major physical refactoring; substantially reduced critical-path build times by breaking down files into smaller files and introducing decoupling, particularly with templated entities Modified Paths: -------------- ydb/trunk/src/main.lzz.clamp ydb/trunk/src/stxn.lzz.clamp ydb/trunk/src/tpcc.lzz.clamp ydb/trunk/src/ydb.lzz.clamp Added Paths: ----------- ydb/trunk/src/leader.lzz.clamp ydb/trunk/src/rectpcc.lzz.clamp ydb/trunk/src/replica.lzz.clamp ydb/trunk/src/run.lzz.clamp Added: ydb/trunk/src/leader.lzz.clamp =================================================================== --- ydb/trunk/src/leader.lzz.clamp (rev 0) +++ ydb/trunk/src/leader.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) @@ -0,0 +1,137 @@ +#hdr +#include <stdint.h> +#end + +#src +#include "unsetprefs.h" +#include <commons/st/sockets.h> +#include <commons/st/threads.h> +#include "run.hh" +#include "stxn.hh" +#include "tpcc.hh" +#include "setprefs.h" +#end + +/** + * Run the leader. + */ +void +run_leader(int minreps, uint16_t leader_port) +{ + cout << "starting as leader" << endl; + st_multichannel<long long> recover_signals; + + scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); + g_twal = twal.get(); + scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); + g_wal = pwal.get(); + + // Wait until all replicas have joined. + st_netfd_t listener = st_tcp_listen(leader_port); + st_closing close_listener(listener); + vector<replica_info> replicas; + st_closing_all_infos close_replicas(replicas); + cout << "waiting for at least " << minreps << " replicas to join" << endl; + Join join; + for (int i = 0; i < minreps; ++i) { + st_netfd_t fd; + { + st_intr intr(stop_hub); + fd = checkerr(st_accept(listener, nullptr, nullptr, + ST_UTIME_NO_TIMEOUT)); + } + readmsg(fd, join); + replicas.push_back(replica_info(fd, static_cast<uint16_t>(join.port()))); + } + cout << "got all " << minreps << " replicas" << endl; + + // Construct the initialization message. + Init init; + init.set_txnseqno(0); + init.set_multirecover(multirecover); + foreach (replica_info r, replicas) { + SockAddr *psa = init.add_node(); + psa->set_host(r.host()); + psa->set_port(r.port()); + } + + // Send init to each initial replica. + foreach (replica_info r, replicas) { + init.set_yourhost(r.host()); + sendmsg(r.fd(), init); + } + + // Start dispatching queries. + st_bool accept_joiner; + int seqno = 0; + st_channel<replica_info> newreps; + st_channel<st_netfd_t> delreps; + foreach (const replica_info &r, replicas) newreps.push(r); + function<void()> f; + if (do_tpcc) + f = bind(issue_tpcc, ref(newreps), ref(delreps), ref(seqno), ref(accept_joiner)); + else + f = bind(issue_txns, ref(newreps), ref(seqno), ref(accept_joiner)); + st_joining join_issue_txns(my_spawn(f, "issue_txns")); + + finally fin(bind(summarize, "LEADER", ref(seqno))); + + try { + // Start handling responses. + st_thread_group handlers; + int rid = 0; + foreach (replica_info r, replicas) { + function<void()> fn; + if (do_tpcc) + fn = bind(handle_tpcc_responses, r.fd(), ref(seqno), rid++, + ref(recover_signals), ref(delreps), true); + else + fn = bind(handle_responses, r.fd(), ref(seqno), rid++, + ref(recover_signals), true); + handlers.insert(my_spawn(fn, "handle_responses")); + } + + // Accept the recovering node, and tell it about the online replicas. + st_netfd_t joiner; + try { + st_intr intr(stop_hub); + joiner = checkerr(st_accept(listener, nullptr, nullptr, + ST_UTIME_NO_TIMEOUT)); + accept_joiner.waitset(); + } catch (std::exception &ex) { + string s(ex.what()); + if (s.find("Interrupted system call") == s.npos) + throw; + else + throw break_exception(); + } + Join join; + readmsg(joiner, join); + replicas.push_back(replica_info(joiner, static_cast<uint16_t>(join.port()))); + cout << "setting seqno to " << seqno << endl; + init.set_txnseqno(seqno); + init.set_yourhost(replicas.back().host()); + sendmsg(joiner, init); + recover_signals.push(current_time_millis()); + + // Start streaming txns to joiner. + cout << "start streaming txns to joiner" << endl; + function<void()> handle_responses_joiner_fn; + if (do_tpcc) + handle_responses_joiner_fn = + bind(handle_tpcc_responses, joiner, ref(seqno), rid++, + ref(recover_signals), ref(delreps), false); + else + handle_responses_joiner_fn = + bind(handle_responses, joiner, ref(seqno), rid++, + ref(recover_signals), false); + newreps.push(replicas.back()); + handlers.insert(my_spawn(handle_responses_joiner_fn, + "handle_responses_joiner")); + } catch (break_exception &ex) { + } catch (std::exception &ex) { + // TODO: maybe there's a cleaner way to do this final step before waiting with the join + cerr_thread_ex(ex) << endl; + throw; + } +} Modified: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-24 06:10:53 UTC (rev 1330) +++ ydb/trunk/src/main.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) @@ -29,6 +29,7 @@ #include <commons/st/sockets.h> #include <iostream> #include <unistd.h> // pipe, write, sync +#include "tpcc/tpcctables.h" #include "msg.h" #include "setprefs.h" #end Added: ydb/trunk/src/rectpcc.lzz.clamp =================================================================== --- ydb/trunk/src/rectpcc.lzz.clamp (rev 0) +++ ydb/trunk/src/rectpcc.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) @@ -0,0 +1,175 @@ +#hdr +#include "tpcc.hh" +namespace ydb { namespace pb { class Init; } } +using namespace ydb::pb; +#end + +#src +#include "unsetprefs.h" +#include <commons/time.h> +#include <commons/st/io.h> +#include <commons/st/threads.h> +#include <commons/st/reader.h> +#include "tpcc/tpcctables.h" +#include "ydb.pb.h" +#include "setprefs.h" +#end + +void +rec_tpcc(int &seqno, int mypos, const Init &init, + const vector<st_netfd_t> &replicas, recovery_t &orig, + st_channel<chunk> &backlog) +{ + commons::array<char> recarr(0); + + function<void()> rec_twal_fn = lambda() { + int &seqno = __ref(seqno); + cout << "recovering from twal" << endl; + long long start_time = current_time_millis(); + g_twal->flush(); + sync(); + ifstream inf("twal"); + TpccReq req; + while (inf.peek() != ifstream::traits_type::eof()) { + ASSERT(inf.good()); + readmsg(inf, req); + process_tpcc(req, seqno, nullptr); + if (check_interval(seqno, yield_interval)) st_sleep(0); + } + showdatarate("recovered from twal", inf.tellg(), + current_time_millis() - start_time); + cout << "now at seqno " << seqno << endl; + }; + + function<void()> recv_log_fn = lambda() { + st_netfd_t src = __ref(replicas[0]); + int &seqno = __ref(seqno); + ASSERT(fail_seqno == seqno); + recreq r = { fail_seqno + 1, resume.take() }; + st_write(src, r); + sized_array<char> rbuf(new char[read_buf_size], read_buf_size); + function<void(anchored_stream_reader &reader)> overflow_fn = + lambda(anchored_stream_reader &reader) { + shift_reader(reader); + }; + anchored_stream_reader reader(st_read_fn(src), + st_read_fully_fn(src), + overflow_fn, rbuf.get(), rbuf.size()); + TpccReq req; + while (seqno < r.end_seqno) { + { st_intr intr(stop_hub); readmsg(reader, req); } + process_tpcc(req, seqno, nullptr); + reader.set_anchor(); + if (check_interval(seqno, yield_interval)) st_sleep(0); + } + }; + + if (rec_twal) { + failed.waitset(); + g_tables.reset(new TPCCTables); + tpcc_recovery_header &hdr = *reinterpret_cast<tpcc_recovery_header*>(orig.begin()); + commons::array<char> body(orig.begin() + sizeof(tpcc_recovery_header), + orig.size() - sizeof(tpcc_recovery_header)); + g_tables->deser(mypos, init.node_size(), hdr, body); + body.release(); + rec_twal_fn(); + failed.reset(); + recv_log_fn(); + } + +#if 0 + st_thread_t rec_twal_thread = my_spawn(rec_twal_fn, "rec_twal"); + st_thread_t recv_log_thread = my_spawn(recv_log_fn, "recv_log"); + + st_join(rec_twal_thread); + st_join(recv_log_thread); +#endif + + if (rec_pwal) { + // Recover from phy log. + } else if (rec_twal) { + // Recover from txn log. + } else { + + g_tables.reset(new TPCCTables); + + // + // Build-up + // + + if (ship_log) { + } else { + // XXX indent + + cout << "waiting for recovery message" << (multirecover ? "s" : "") + << endl; + long long before_recv = current_time_millis(); + + vector<st_thread_t> recovery_builders; + ASSERT(seqno == -1); + for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { + recovery_builders.push_back(my_spawn(lambda() { + // Read the recovery message length and header. + tpcc_recovery_header hdr; + checkeqnneg(st_read_fully(__ref(replicas[i]), + &hdr, sizeof hdr, + ST_UTIME_NO_TIMEOUT), + ssize_t(sizeof hdr)); + check(hdr.seqno >= 0); + + cout << "receiving recovery of " << hdr.len << " bytes" << endl; + + long long start_time = current_time_millis(); + __ref(recarr).reset(new char[hdr.len], hdr.len); + checkeqnneg(st_read_fully(__ref(replicas[i]), + __ref(recarr).get(), hdr.len, + ST_UTIME_NO_TIMEOUT), + ssize_t(hdr.len)); + + long long before_deser = current_time_millis(); + showdatarate("received recovery message", size_t(hdr.len), before_deser - start_time); + + if (__ref(seqno) == -1) + __ref(seqno) = hdr.seqno; + else + checkeq(__ref(seqno), hdr.seqno); + + g_tables->deser(__ctx(i), __ref(init).node_size(), hdr, __ref(recarr)); + + long long end_time = current_time_millis(); + showdatarate("deserialized recovery message", size_t(hdr.len), end_time - before_deser); + cout << "receive & deserialize took " << end_time - __ref(before_recv) + << " ms total; now at seqno " << hdr.seqno << endl; + cout << "after deserialize, db state is now at seqno " + << hdr.seqno << ":" << endl; + g_tables->show(); + + }, "recovery_builder" + lexical_cast<string>(i))); + } + foreach (st_thread_t t, recovery_builders) { + st_join(t); + } + + } + } + + // + // Catch-up + // + + long long mid_time = current_time_millis(); + int mid_seqno = seqno; + TpccReq req; + while (!backlog.empty()) { + chunk chunk = backlog.take(); + cout << "took from backlog, now has " << backlog.queue().size() + << " chunks" << endl; + sized_array<char> &buf = chunk.get<0>(); + char *begin = chunk.get<1>(), *end = chunk.get<2>(); + ASSERT(buf.get() <= begin && begin < buf.end()); + ASSERT(buf.get() < end && end < buf.end()); + process_buf(begin, end, req, seqno); + } + showtput("replayer caught up; from backlog replayed", + current_time_millis(), mid_time, seqno, mid_seqno); +} Added: ydb/trunk/src/replica.lzz.clamp =================================================================== --- ydb/trunk/src/replica.lzz.clamp (rev 0) +++ ydb/trunk/src/replica.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) @@ -0,0 +1,362 @@ +#hdr +#include "unsetprefs.h" +#include <string> +#end + +#src +#include "unsetprefs.h" +#include <boost/archive/binary_iarchive.hpp> +#include <commons/st/sockets.h> +#include <commons/st/threads.h> +#include "tpcc/clock.h" +#include "tpcc/randomgenerator.h" +#include "tpcc/tpccclient.h" +#include "tpcc/tpccgenerator.h" +#include "tpcc/tpcctables.h" +#include "rectpcc.hh" +#include "run.hh" +#include "stxn.hh" +#include "tpcc.hh" +#end + +/** + * Run a replica. + */ +void +run_replica(std::string leader_host, uint16_t leader_port, uint16_t listen_port) +{ + if (disk) { + // Disk IO threads. + for (int i = 0; i < 5; ++i) { + //thread somethread(threadfunc); + } + } + + // Initialize database state. + int seqno = -1; + mii &map = g_map; + if (do_tpcc) { + TPCCTables *tables = new TPCCTables(); + g_tables.reset(tables); + SystemClock* clock = new SystemClock(); + + // Create a generator for filling the database. + RealRandomGenerator* random = new RealRandomGenerator(); + NURandC cLoad = NURandC::makeRandom(random); + random->setC(cLoad); + + // Generate the data + cout << "loading " << nwarehouses << " warehouses" << endl; + char now[Clock::DATETIME_SIZE+1]; + clock->getDateTimestamp(now); + TPCCGenerator generator(random, now, Item::NUM_ITEMS, + District::NUM_PER_WAREHOUSE, + Customer::NUM_PER_DISTRICT, + NewOrder::INITIAL_NUM_PER_DISTRICT); + long long start_time = current_time_millis(); + generator.makeItemsTable(tables); + for (int i = 0; i < nwarehouses; ++i) { + generator.makeWarehouse(tables, i+1); + } + cout << "loaded " << nwarehouses << " warehouses in " + << current_time_millis() - start_time << " ms" << endl; + tables->show(); + } + recovery_t orig = rec_twal ? g_tables->ser(0, 0, seqno) : recovery_t(); + + finally f(bind(summarize, "REPLICA", ref(seqno))); + st_channel<recovery_t> send_states; + + cout << "starting as replica on port " << listen_port << endl; + + // Listen for connections from other replicas. + st_netfd_t listener = st_tcp_listen(listen_port); + + // Connect to the leader and join the system. + st_netfd_t leader = st_tcp_connect(leader_host.c_str(), leader_port, + timeout); + st_closing closing(leader); + Join join; + join.set_port(listen_port); + sendmsg(leader, join); + Init init; + { + st_intr intr(stop_hub); + readmsg(leader, init); + } + uint32_t listen_host = init.yourhost(); + multirecover = init.multirecover(); + + // Display the info. + cout << "got init msg with txn seqno " << init.txnseqno() + << " and hosts:" << endl; + vector<st_netfd_t> replicas; + st_closing_all close_replicas(replicas); + int mypos = -1; + for (int i = 0; i < init.node_size(); ++i) { + const SockAddr &sa = init.node(i); + char buf[INET_ADDRSTRLEN]; + in_addr host = { sa.host() }; + bool is_self = sa.host() == listen_host && sa.port() == listen_port; + cout << "- " << checkerr(inet_ntop(AF_INET, &host, buf, + INET_ADDRSTRLEN)) + << ':' << sa.port() << (is_self ? " (self)" : "") << endl; + if (is_self) mypos = i; + if (!is_self && (init.txnseqno() > 0 || rec_twal)) { + replicas.push_back(st_tcp_connect(host, + static_cast<uint16_t>(sa.port()), + timeout)); + } + } + + // Initialize physical or txn log. + scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); + g_twal = twal.get(); + scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); + g_wal = pwal.get(); + + // Process txns. + st_channel<chunk> backlog; + function<void()> process_fn; + if (do_tpcc) + process_fn = bind(process_tpccs, leader, ref(seqno), ref(send_states), + ref(backlog), init.txnseqno(), mypos, init.node_size()); + else + process_fn = bind(process_txns, leader, ref(map), ref(seqno), + ref(send_states), ref(backlog), init.txnseqno(), mypos, + init.node_size()); + st_joining join_proc(my_spawn(process_fn, "process_txns")); + st_joining join_rec(init.txnseqno() == 0 && (multirecover || mypos == 0) ? + my_spawn(bind(recover_joiner, listener, ref(send_states)), + "recover_joiner") : + nullptr); + + try { + // If there's anything to recover. + if (init.txnseqno() > 0 || fail_seqno > 0) { + if (do_tpcc) { + + rec_tpcc(seqno, mypos, init, replicas, orig, backlog); + + } else { + + // + // Simple txns + // + + if (rec_pwal) { + // Recover from physical log. + cout << "recovering from pwal" << endl; + long long start_time = current_time_millis(); + ifstream inf("pwal"); + binary_iarchive in(inf); + int rseqno = -1; + while (inf.peek() != ifstream::traits_type::eof()) { + int op; + in & op; + switch (op) { + case op_del: + { + int key; + in & key; + mii::iterator it = map.find(key); + map.erase(it); + break; + } + case op_write: + { + int key, val; + in & key & val; + map[key] = val; + break; + } + case op_commit: + ++rseqno; + break; + } + if (check_interval(rseqno, yield_interval)) st_sleep(0); + } + seqno = init.txnseqno() - 1; + showdatarate("recovered from pwal", inf.tellg(), current_time_millis() - start_time); + cout << "now at seqno " << rseqno << " (really: " << seqno << ")" << endl; + } else { + + // + // Build-up + // + + cout << "waiting for recovery message" << (multirecover ? "s" : "") + << endl; + long long before_recv = current_time_millis(); + + vector<st_thread_t> recovery_builders; + ASSERT(seqno == -1); + bool first = true; + for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { + recovery_builders.push_back(my_spawn(lambda() { + // Read the recovery message length and header. + size_t len; + recovery_header hdr; + char buf[sizeof len + sizeof hdr]; + //try { + checkeqnneg(st_read_fully(__ref(replicas[i]), + buf, sizeof len + sizeof hdr, + ST_UTIME_NO_TIMEOUT), + ssize_t(sizeof len + sizeof hdr)); + //} catch (...) { // TODO just catch "Connection reset by peer" + //return; + //} + raw_reader rdr(buf); + rdr.read(len); + rdr.read(hdr); + check(hdr.seqno >= 0); + + // Resize the table if necessary. + commons::array<entry> &table = __ref(map).get_table(); + if (!__ref(first)) { + checkeq(table.size(), hdr.total); + checkeq(__ref(map).size(), hdr.size); + } else { + __ref(first) = false; + __ref(map).set_size(hdr.size); + if (table.size() != hdr.total) { + table.reset(new entry[hdr.total], hdr.total); + } + } + + // Receive straight into the table. + pair<size_t, size_t> range = + recovery_range(table.size(), __ctx(i), __ref(init).node_size()); + // Check that we agree on the number of entries. + checkeq(range.second - range.first, hdr.count); + // Check that the count is a power of two. + checkeq(hdr.count & (hdr.count - 1), size_t(0)); + size_t rangelen = sizeof(entry) * hdr.count; + // Read an extra char to ensure that we're at the EOF. + long long start_time = current_time_millis(); + checkeqnneg(st_read_fully(__ref(replicas[i]), + table.begin() + range.first, rangelen + 1, + ST_UTIME_NO_TIMEOUT), + ssize_t(rangelen)); + long long end_time = current_time_millis(); + + if (__ref(seqno) != -1) + checkeq(__ref(seqno), hdr.seqno); + __ref(seqno) = hdr.seqno; + showdatarate("got recovery message", len, end_time - start_time); + cout << "receive took " << end_time - __ref(before_recv) + << " ms total; now at seqno " << hdr.seqno << endl; +#if 0 + Recovery recovery; + long long receive_start = 0, receive_end = 0; + size_t len = 0; + { + st_intr intr(stop_hub); + len = readmsg(__ref(replicas)[__ctx(i)], recovery, &receive_start, + &receive_end); + } + long long build_start = current_time_millis(); + cout << "got recovery message of " << len << " bytes in " + << build_start - __ref(before_recv) << " ms: xfer took " + << receive_end - receive_start << " ms, deserialization took " + << build_start - receive_end << " ms" << endl; + for (int i = 0; i < recovery.pair_size(); ++i) { + const Recovery_Pair &p = recovery.pair(i); + __ref(map)[p.key()] = p.value(); + if (i % yield_interval == 0) { + if (yield_during_build_up) st_sleep(0); + } + } + check(recovery.seqno() >= 0); + int seqno = __ref(seqno) = recovery.seqno(); + long long build_end = current_time_millis(); + cout << "receive and build-up took " + << build_end - __ref(before_recv) + << " ms; built up map of " << recovery.pair_size() + << " records in " << build_end - build_start + << " ms; now at seqno " << seqno << endl; +#endif + }, "recovery_builder" + lexical_cast<string>(i))); + } + foreach (st_thread_t t, recovery_builders) { + st_join(t); + } + } + + // + // Catch-up + // + + long long mid_time = current_time_millis(); + int mid_seqno = seqno; + // XXX + using msg::TxnBatch; + using msg::Txn; + commons::array<char> rbuf(0), wbuf(buf_size); + reader reader(nullptr, rbuf.get(), rbuf.size()); + writer writer(lambda(const void*, size_t) { + throw not_supported_exception("should not be writing responses during catch-up phase"); + }, wbuf.get(), wbuf.size()); + stream s(reader, writer); + TxnBatch batch(s); + while (!backlog.empty()) { + chunk chunk = backlog.take(); + sized_array<char> &buf = chunk.get<0>(); + ASSERT(buf.get() <= chunk.get<1>() && chunk.get<1>() < buf.end()); + ASSERT(buf.get() < chunk.get<2>() && chunk.get<2>() < buf.end()); + ASSERT(chunk.get<1>() < chunk.get<2>()); + swap(buf, reader.buf()); + reader.reset_range(chunk.get<1>(), chunk.get<2>()); + while (reader.start() < reader.end()) { + char *start = reader.start(); + uint32_t prefix = ntohl(reader.read<uint32_t>()); + ASSERT(prefix < 10000); + ASSERT(start + sizeof(uint32_t) + prefix <= reader.end()); + batch.Clear(); + for (int t = 0; t < batch.txn_size(); ++t) { + const Txn &txn = batch.txn(t); + if (rec_pwal) seqno = txn.seqno() - 1; + process_txn(map, txn, seqno); + if (fake_exec && !use_pb) { + reader.skip(txn.op_size() * Op_Size); + } + + if (check_interval(txn.seqno(), yield_interval)) st_sleep(0); + if (check_interval(txn.seqno(), process_display)) { + cout << "caught up txn " << txn.seqno() + << "; db size = " << map.size() + << "; seqno = " << seqno + << "; backlog.size = " << backlog.queue().size() << endl; + } + } + ASSERT(start + sizeof(uint32_t) + prefix == reader.start()); + } + } + g_caught_up = true; +#if 0 + while (!backlog.empty()) { + using pb::Txn; + shared_ptr<Txn> p = backlog.take(); + process_txn<pb_traits, pb_traits>(map, *p, seqno, nullptr); + if (check_interval(p->seqno(), catch_up_display)) { + cout << "processed txn " << p->seqno() << " off the backlog; " + << "backlog.size = " << backlog.queue().size() << endl; + } + if (check_interval(p->seqno(), yield_interval)) { + // Explicitly yield. (Note that yielding does still effectively + // happen anyway because process_txn is a yield point.) + st_sleep(0); + } + } +#endif + showtput("replayer caught up; from backlog replayed", + current_time_millis(), mid_time, seqno, mid_seqno); + } + } + } catch (std::exception &ex) { + cerr_thread_ex(ex) << endl; + throw; + } + + stop_hub.insert(st_thread_self()); +} Added: ydb/trunk/src/run.lzz.clamp =================================================================== --- ydb/trunk/src/run.lzz.clamp (rev 0) +++ ydb/trunk/src/run.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) @@ -0,0 +1,12 @@ +#src +#include "tpcc.hh" +#include "stxn.hh" +#end + +void +summarize(const char *role, int seqno) +{ + cout << role << " SUMMARY\n"; + if (do_tpcc) summarize_tpcc(seqno); + else summarize_stxn(seqno); +} Modified: ydb/trunk/src/stxn.lzz.clamp =================================================================== --- ydb/trunk/src/stxn.lzz.clamp 2009-03-24 06:10:53 UTC (rev 1330) +++ ydb/trunk/src/stxn.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) @@ -56,6 +56,8 @@ map.set_deleted_key(-2); } +namespace { + template<typename T> recovery_t make_recovery(const T &map, int mypos, int nnodes, int seqno); template<> @@ -88,8 +90,8 @@ */ template<typename Types> void -issue_txns(st_channel<replica_info> &newreps, int &seqno, - st_bool &accept_joiner) +issue_txns0(st_channel<replica_info> &newreps, int &seqno, + st_bool &accept_joiner) { USE(newreps); USE(seqno); @@ -341,11 +343,11 @@ */ template<typename Types, typename RTypes> void -process_txns(st_netfd_t leader, mii &map, int &seqno, - st_channel<recovery_t> &send_states, - /* XXX st_channel<shared_ptr<pb::Txn> > &backlog */ - st_channel<chunk> &backlog, int init_seqno, - int mypos, int nnodes) +process_txns0(st_netfd_t leader, mii &map, int &seqno, + st_channel<recovery_t> &send_states, + /* XXX st_channel<shared_ptr<pb::Txn> > &backlog */ + st_channel<chunk> &backlog, int init_seqno, + int mypos, int nnodes) { USE(leader); USE(map); @@ -737,15 +739,113 @@ int start_seqno, recovery_start_seqno, recovery_end_seqno, last_seqno; }; +template<typename T> +void +ser(ser_array &s, const T &msg) +{ + int len = msg.ByteSize(); + + // Grow the array as needed. + s.stretch(len + sizeof(uint32_t)); + + // Serialize message to a buffer with four-byte length prefix. + check(msg.SerializeToArray(s.data() + sizeof(uint32_t), len)); + *reinterpret_cast<uint32_t*>(s.data()) = htonl(uint32_t(len)); +} + +template<typename T> +void +bcastmsg(const vector<st_netfd_t> &dsts, const T &msg) +{ + ser_t s; + ser(s, msg); + bcastbuf(dsts, s); +} + +template<typename T> +void +sendmsg(st_netfd_t dst, const T &msg) +{ + ser_t s; + ser(s, msg); + sendbuf(dst, s); +} + +template<typename T> +void +readmsg(st_reader &src, T &msg) +{ + managed_array<char> a = src.read(sizeof(uint32_t)); + uint32_t len = ntohl(*reinterpret_cast<const uint32_t*>(a.get())); + check(msg.ParseFromArray(src.read(len), len)); +} + +} + /** * Swallow replica responses. */ -template<typename Types> void handle_responses(st_netfd_t replica, const int &seqno, int rid, st_multichannel<long long> &recover_signals, bool caught_up) { response_handler h(replica, seqno, rid, recover_signals, caught_up); - h.run<Types>(); + if (use_pb_res) + h.run<pb_traits>(); + else + h.run<rb_traits>(); } +void +issue_txns(st_channel<replica_info> &newreps, int &seqno, + st_bool &accept_joiner) +{ + if (use_pb) + issue_txns0<pb_traits>(newreps, seqno, accept_joiner); + else + issue_txns0<rb_traits>(newreps, seqno, accept_joiner); +} + +void +process_txns(st_netfd_t leader, mii &map, int &seqno, + st_channel<recovery_t> &send_states, + /* XXX st_channel<shared_ptr<pb::Txn> > &backlog */ + st_channel<chunk> &backlog, int init_seqno, + int mypos, int nnodes) +{ + if (use_pb && use_pb_res) + process_txns0<pb_traits, pb_traits> + (leader, map, seqno, send_states, backlog, init_seqno, mypos, nnodes); + else if (use_pb && !use_pb_res) + process_txns0<pb_traits, rb_traits> + (leader, map, seqno, send_states, backlog, init_seqno, mypos, nnodes); + else if (!use_pb && use_pb_res) + process_txns0<rb_traits, pb_traits> + (leader, map, seqno, send_states, backlog, init_seqno, mypos, nnodes); + else if (!use_pb && !use_pb_res) + process_txns0<rb_traits, rb_traits> + (leader, map, seqno, send_states, backlog, init_seqno, mypos, nnodes); +} + +void +process_txn(mii &map, const ydb::msg::Txn &txn, int &seqno) +{ + process_txn<rb_traits, rb_traits>(map, txn, seqno, nullptr); +} + +void +summarize_stxn(int seqno) +{ + cout << "- total updates = " << updates << "\n" + << "- final DB state: seqno = " << seqno << ", size = " + << g_map.size() << endl; + string fname = string("/tmp/ydb") + lexical_cast<string>(getpid()); + if (dump) { + cout << "- dumping to " << fname << endl; + ofstream of(fname.c_str()); + of << "seqno: " << seqno << endl; + foreach (const entry &p, g_map) { + of << p.first << ": " << p.second << endl; + } + } +} Modified: ydb/trunk/src/tpcc.lzz.clamp =================================================================== --- ydb/trunk/src/tpcc.lzz.clamp 2009-03-24 06:10:53 UTC (rev 1330) +++ ydb/trunk/src/tpcc.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) @@ -831,3 +831,16 @@ ASSERT(false); } } + +void summarize_tpcc(int seqno) +{ + cout << "seqno: " << seqno << endl; + if (g_tables != nullptr) { + cout << "state:\n"; + g_tables->show(); + string fname = string("/tmp/ydb") + lexical_cast<string>(getpid()); + if (dump) { + // XXX iterate & dump + } + } +} Modified: ydb/trunk/src/ydb.lzz.clamp =================================================================== --- ydb/trunk/src/ydb.lzz.clamp 2009-03-24 06:10:53 UTC (rev 1330) +++ ydb/trunk/src/ydb.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) @@ -1,38 +1,17 @@ -#hdr -#include "unsetprefs.h" -#include <boost/bind.hpp> -#include <boost/function.hpp> -#include <boost/scoped_ptr.hpp> -#include <string> -#include <iostream> -#include <st.h> -#include "tpcc/clock.h" -#include "tpcc/randomgenerator.h" -#include "tpcc/tpccclient.h" -#include "tpcc/tpccgenerator.h" -#include "tpcc/tpcctables.h" -#include "util.hh" -#include "tpcc.hh" -#include "stxn.hh" -#include "main.hh" - -using namespace boost; -using namespace std; -using namespace commons; -#end - #src #include "unsetprefs.h" +#include <boost/program_options.hpp> +#include <commons/st/sockets.h> +#include <commons/st/threads.h> #include <csignal> // sigaction, etc. #include <cstring> // strsignal, size_t -#include <boost/archive/binary_iarchive.hpp> -#include <boost/program_options.hpp> #include <gtest/gtest.h> #include <malloc.h> #include <string> -#include <commons/st/io.h> -#include <commons/st/sockets.h> -#include <commons/st/threads.h> +#include "tpcc.hh" +#include "stxn.hh" +#include "replica.hh" +#include "leader.hh" #include "setprefs.h" #end @@ -41,6 +20,7 @@ using namespace boost::archive; namespace { + /** * Memory monitor. */ @@ -95,6 +75,7 @@ } } } + } // @@ -325,35 +306,10 @@ cout << "pid " << getpid() << endl; // Which role are we? - if (is_leader) { - if (use_pb) { - if (use_pb_res) { - run_leader<pb_traits, pb_traits>(minreps, leader_port); - } else { - //run_leader<pb_traits, rb_traits>(minreps, leader_port); - } - } else { - if (use_pb_res) { - //run_leader<rb_traits, pb_traits>(minreps, leader_port); - } else { - //run_leader<rb_traits, rb_traits>(minreps, leader_port); - } - } - } else { - if (use_pb) { - if (use_pb_res) { - run_replica<pb_traits, pb_traits>(leader_host, leader_port, listen_port); - } else { - //run_replica<pb_traits, rb_traits>(leader_host, leader_port, listen_port); - } - } else { - if (use_pb_res) { - //run_replica<rb_traits, pb_traits>(leader_host, leader_port, listen_port); - } else { - //run_replica<rb_traits, rb_traits>(leader_host, leader_port, listen_port); - } - } - } + if (is_leader) + run_leader(minreps, leader_port); + else + run_replica(leader_host, leader_port, listen_port); return 0; } catch (std::exception &ex) { @@ -362,697 +318,3 @@ return 1; } } - -#if 0 -template<typename Types, typename RTypes> -void -run_leader(int minreps, uint16_t leader_port); -template<typename Types, typename RTypes> -void -run_replica(string leader_host, uint16_t leader_port, uint16_t listen_port); -#endif - -#if 1 -namespace { -/** - * Run the leader. - */ -template<typename Types, typename RTypes> -void -run_leader(int minreps, uint16_t leader_port) -{ - cout << "starting as leader" << endl; - st_multichannel<long long> recover_signals; - - scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); - g_twal = twal.get(); - scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); - g_wal = pwal.get(); - - // Wait until all replicas have joined. - st_netfd_t listener = st_tcp_listen(leader_port); - st_closing close_listener(listener); - vector<replica_info> replicas; - st_closing_all_infos close_replicas(replicas); - cout << "waiting for at least " << minreps << " replicas to join" << endl; - Join join; - for (int i = 0; i < minreps; ++i) { - st_netfd_t fd; - { - st_intr intr(stop_hub); - fd = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - } - readmsg(fd, join); - replicas.push_back(replica_info(fd, static_cast<uint16_t>(join.port()))); - } - cout << "got all " << minreps << " replicas" << endl; - - // Construct the initialization message. - Init init; - init.set_txnseqno(0); - init.set_multirecover(multirecover); - foreach (replica_info r, replicas) { - SockAddr *psa = init.add_node(); - psa->set_host(r.host()); - psa->set_port(r.port()); - } - - // Send init to each initial replica. - foreach (replica_info r, replicas) { - init.set_yourhost(r.host()); - sendmsg(r.fd(), init); - } - - // Start dispatching queries. - st_bool accept_joiner; - int seqno = 0; - st_channel<replica_info> newreps; - st_channel<st_netfd_t> delreps; - foreach (const replica_info &r, replicas) newreps.push(r); - function<void()> f; - if (do_tpcc) - f = bind(issue_tpcc, ref(newreps), ref(delreps), ref(seqno), ref(accept_joiner)); - else - f = bind(issue_txns<Types>, ref(newreps), ref(seqno), ref(accept_joiner)); - st_joining join_issue_txns(my_spawn(f, "issue_txns")); - - finally fin(bind(summarize, "LEADER", ref(seqno))); - - try { - // Start handling responses. - st_thread_group handlers; - int rid = 0; - foreach (replica_info r, replicas) { - function<void()> fn; - if (do_tpcc) - fn = bind(handle_tpcc_responses, r.fd(), ref(seqno), rid++, - ref(recover_signals), ref(delreps), true); - else - fn = bind(handle_responses<RTypes>, r.fd(), ref(seqno), rid++, - ref(recover_signals), true); - handlers.insert(my_spawn(fn, "handle_responses")); - } - - // Accept the recovering node, and tell it about the online replicas. - st_netfd_t joiner; - try { - st_intr intr(stop_hub); - joiner = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - accept_joiner.waitset(); - } catch (std::exception &ex) { - string s(ex.what()); - if (s.find("Interrupted system call") == s.npos) - throw; - else - throw break_exception(); - } - Join join; - readmsg(joiner, join); - replicas.push_back(replica_info(joiner, static_cast<uint16_t>(join.port()))); - cout << "setting seqno to " << seqno << endl; - init.set_txnseqno(seqno); - init.set_yourhost(replicas.back().host()); - sendmsg(joiner, init); - recover_signals.push(current_time_millis()); - - // Start streaming txns to joiner. - cout << "start streaming txns to joiner" << endl; - function<void()> handle_responses_joiner_fn; - if (do_tpcc) - handle_responses_joiner_fn = - bind(handle_tpcc_responses, joiner, ref(seqno), rid++, - ref(recover_signals), ref(delreps), false); - else - handle_responses_joiner_fn = - bind(handle_responses<RTypes>, joiner, ref(seqno), rid++, - ref(recover_signals), false); - newreps.push(replicas.back()); - handlers.insert(my_spawn(handle_responses_joiner_fn, - "handle_responses_joiner")); - } catch (break_exception &ex) { - } catch (std::exception &ex) { - // TODO: maybe there's a cleaner way to do this final step before waiting with the join - cerr_thread_ex(ex) << endl; - throw; - } -} - -void -summarize(const char *role, int seqno) -{ - cout << role << " SUMMARY\n"; - if (do_tpcc) { - cout << "seqno: " << seqno << endl; - if (g_tables != nullptr) { - cout << "state:\n"; - g_tables->show(); - string fname = string("/tmp/ydb") + lexical_cast<string>(getpid()); - if (dump) { - // XXX iterate & dump - } - } - } else { - cout << "- total updates = " << updates << "\n" - << "- final DB state: seqno = " << seqno << ", size = " - << g_map.size() << endl; - string fname = string("/tmp/ydb") + lexical_cast<string>(getpid()); - if (dump) { - cout << "- dumping to " << fname << endl; - ofstream of(fname.c_str()); - of << "seqno: " << seqno << endl; - foreach (const entry &p, g_map) { - of << p.first << ": " << p.second << endl; - } - } - } -} - -/** - * Run a replica. - */ -template<typename Types, typename RTypes> -void -run_replica(string leader_host, uint16_t leader_port, uint16_t listen_port) -{ - if (disk) { - // Disk IO threads. - for (int i = 0; i < 5; ++i) { - //thread somethread(threadfunc); - } - } - - // Initialize database state. - int seqno = -1; - mii &map = g_map; - commons::array<char> recarr(0); - if (do_tpcc) { - TPCCTables *tables = new TPCCTables(); - g_tables.reset(tables); - SystemClock* clock = new SystemClock(); - - // Create a generator for filling the database. - RealRandomGenerator* random = new RealRandomGenerator(); - NURandC cLoad = NURandC::makeRandom(random); - random->setC(cLoad); - - // Generate the data - cout << "loading " << nwarehouses << " warehouses" << endl; - char now[Clock::DATETIME_SIZE+1]; - clock->getDateTimestamp(now); - TPCCGenerator generator(random, now, Item::NUM_ITEMS, - District::NUM_PER_WAREHOUSE, - Customer::NUM_PER_DISTRICT, - NewOrder::INITIAL_NUM_PER_DISTRICT); - long long start_time = current_time_millis(); - generator.makeItemsTable(tables); - for (int i = 0; i < nwarehouses; ++i) { - generator.makeWarehouse(tables, i+1); - } - cout << "loaded " << nwarehouses << " warehouses in " - << current_time_millis() - start_time << " ms" << endl; - tables->show(); - } - recovery_t orig = rec_twal ? g_tables->ser(0, 0, seqno) : recovery_t(); - - finally f(bind(summarize, "REPLICA", ref(seqno))); - st_channel<recovery_t> send_states; - - cout << "starting as replica on port " << listen_port << endl; - - // Listen for connections from other replicas. - st_netfd_t listener = st_tcp_listen(listen_port); - - // Connect to the leader and join the system. - st_netfd_t leader = st_tcp_connect(leader_host.c_str(), leader_port, - timeout); - st_closing closing(leader); - Join join; - join.set_port(listen_port); - sendmsg(leader, join); - Init init; - { - st_intr intr(stop_hub); - readmsg(leader, init); - } - uint32_t listen_host = init.yourhost(); - multirecover = init.multirecover(); - - // Display the info. - cout << "got init msg with txn seqno " << init.txnseqno() - << " and hosts:" << endl; - vector<st_netfd_t> replicas; - st_closing_all close_replicas(replicas); - int mypos = -1; - for (int i = 0; i < init.node_size(); ++i) { - const SockAddr &sa = init.node(i); - char buf[INET_ADDRSTRLEN]; - in_addr host = { sa.host() }; - bool is_self = sa.host() == listen_host && sa.port() == listen_port; - cout << "- " << checkerr(inet_ntop(AF_INET, &host, buf, - INET_ADDRSTRLEN)) - << ':' << sa.port() << (is_self ? " (self)" : "") << endl; - if (is_self) mypos = i; - if (!is_self && (init.txnseqno() > 0 || rec_twal)) { - replicas.push_back(st_tcp_connect(host, - static_cast<uint16_t>(sa.port()), - timeout)); - } - } - - // Initialize physical or txn log. - scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); - g_twal = twal.get(); - scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); - g_wal = pwal.get(); - - // Process txns. - st_channel<chunk> backlog; - function<void()> process_fn; - if (do_tpcc) - process_fn = bind(process_tpccs, leader, ref(seqno), ref(send_states), - ref(backlog), init.txnseqno(), mypos, init.node_size()); - else - process_fn = bind(process_txns<Types, RTypes>, leader, ref(map), ref(seqno), - ref(send_states), ref(backlog), init.txnseqno(), mypos, - init.node_size()); - st_joining join_proc(my_spawn(process_fn, "process_txns")); - st_joining join_rec(init.txnseqno() == 0 && (multirecover || mypos == 0) ? - my_spawn(bind(recover_joiner, listener, ref(send_states)), - "recover_joiner") : - nullptr); - - try { - // If there's anything to recover. - if (init.txnseqno() > 0 || fail_seqno > 0) { - if (do_tpcc) { - - // - // TPCC txns - // - - function<void()> rec_twal_fn = lambda() { - int &seqno = __ref(seqno); - cout << "recovering from twal" << endl; - long long start_time = current_time_millis(); - g_twal->flush(); - sync(); - ifstream inf("twal"); - TpccReq req; - while (inf.peek() != ifstream::traits_type::eof()) { - ASSERT(inf.good()); - readmsg(inf, req); - process_tpcc(req, seqno, nullptr); - if (check_interval(seqno, yield_interval)) st_sleep(0); - } - showdatarate("recovered from twal", inf.tellg(), - current_time_millis() - start_time); - cout << "now at seqno " << seqno << endl; - }; - - function<void()> recv_log_fn = lambda() { - st_netfd_t src = __ref(replicas[0]); - int &seqno = __ref(seqno); - ASSERT(fail_seqno == seqno); - recreq r = { fail_seqno + 1, resume.take() }; - st_write(src, r); - sized_array<char> rbuf(new char[read_buf_size], read_buf_size); - function<void(anchored_stream_reader &reader)> overflow_fn = - lambda(anchored_stream_reader &reader) { - shift_reader(reader); - }; - anchored_stream_reader reader(st_read_fn(src), - st_read_fully_fn(src), - overflow_fn, rbuf.get(), rbuf.size()); - TpccReq req; - while (seqno < r.end_seqno) { - { st_intr intr(stop_hub); readmsg(reader, req); } - process_tpcc(req, seqno, nullptr); - reader.set_anchor(); - if (check_interval(seqno, yield_interval)) st_sleep(0); - } - }; - - if (rec_twal) { - failed.waitset(); - g_tables.reset(new TPCCTables); - tpcc_recovery_header &hdr = *reinterpret_cast<tpcc_recovery_header*>(orig.begin()); - commons::array<char> body(orig.begin() + sizeof(tpcc_recovery_header), - orig.size() - sizeof(tpcc_recovery_header)); - g_tables->deser(mypos, init.node_size(), hdr, body); - body.release(); - rec_twal_fn(); - failed.reset(); - recv_log_fn(); - } - -#if 0 - st_thread_t rec_twal_thread = my_spawn(rec_twal_fn, "rec_twal"); - st_thread_t recv_log_thread = my_spawn(recv_log_fn, "recv_log"); - - st_join(rec_twal_thread); - st_join(recv_log_thread); -#endif - - if (rec_pwal) { - // Recover from phy log. - } else if (rec_twal) { - // Recover from txn log. - } else { - - g_tables.reset(new TPCCTables); - - // - // Build-up - // - - if (ship_log) { - } else { - // XXX indent - - cout << "waiting for recovery message" << (multirecover ? "s" : "") - << endl; - long long before_recv = current_time_millis(); - - vector<st_thread_t> recovery_builders; - ASSERT(seqno == -1); - bool first = true; - for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { - recovery_builders.push_back(my_spawn(lambda() { - // Read the recovery message length and header. - tpcc_recovery_header hdr; - checkeqnneg(st_read_fully(__ref(replicas[i]), - &hdr, sizeof hdr, - ST_UTIME_NO_TIMEOUT), - ssize_t(sizeof hdr)); - check(hdr.seqno >= 0); - - cout << "receiving recovery of " << hdr.len << " bytes" << endl; - - long long start_time = current_time_millis(); - __ref(recarr).reset(new char[hdr.len], hdr.len); - checkeqnneg(st_read_fully(__ref(replicas[i]), - __ref(recarr).get(), hdr.len, - ST_UTIME_NO_TIMEOUT), - ssize_t(hdr.len)); - - long long before_deser = current_time_millis(); - showdatarate("received recovery message", size_t(hdr.len), before_deser - start_time); - - if (__ref(seqno) == -1) - __ref(seqno) = hdr.seqno; - else - checkeq(__ref(seqno), hdr.seqno); - - g_tables->deser(__ctx(i), __ref(init).node_size(), hdr, __ref(recarr)); - - long long end_time = current_time_millis(); - showdatarate("deserialized recovery message", size_t(hdr.len), end_time - before_deser); - cout << "receive & deserialize took " << end_time - __ref(before_recv) - << " ms total; now at seqno " << hdr.seqno << endl; - cout << "after deserialize, db state is now at seqno " - << hdr.seqno << ":" << endl; - g_tables->show(); - -#if 0 - // Resize the table if necessary. - - commons::array<entry> &table = __ref(map).get_table(); - if (!__ref(first)) { - checkeq(table.size(), hdr.total); - checkeq(__ref(map).size(), hdr.size); - } else { - __ref(first) = false; - if (table.size() != hdr.total) { - table.reset(new entry[hdr.total], hdr.total); - } - } - - // Receive straight into the table. - pair<size_t, size_t> range = - recovery_range(table.size(), __ctx(i), __ref(init).node_size()); - // Check that we agree on the number of entries. - checkeq(range.second - range.first, hdr.count); - // Check that the count is a power of two. - checkeq(hdr.count & (hdr.count - 1), size_t(0)); - size_t rangelen = sizeof(entry) * hdr.count; - // Read an extra char to ensure that we're at the EOF. - checkeqnneg(st_read_fully(__ref(replicas[i]), - table.begin() + range.first, rangelen + 1, - ST_UTIME_NO_TIMEOUT), - ssize_t(rangelen)); -#endif - }, "recovery_builder" + lexical_cast<string>(i))); - } - foreach (st_thread_t t, recovery_builders) { - st_join(t); - } - - } - } - - // - // Catch-up - // - - long long mid_time = current_time_millis(); - int mid_seqno = seqno; - TpccReq req; - while (!backlog.empty()) { - chunk chunk = backlog.take(); - cout << "took from backlog, now has " << backlog.queue().size() - << " chunks" << endl; - sized_array<char> &buf = chunk.get<0>(); - char *begin = chunk.get<1>(), *end = chunk.get<2>(); - ASSERT(buf.get() <= begin && begin < buf.end()); - ASSERT(buf.get() < end && end < buf.end()); - process_buf(begin, end, req, seqno); - } - showtput("replayer caught up; from backlog replayed", - current_time_millis(), mid_time, seqno, mid_seqno); - - } else { - - // - // Simple txns - // - - if (rec_pwal) { - // Recover from physical log. - cout << "recovering from pwal" << endl; - long long start_time = current_time_millis(); - ifstream inf("pwal"); - binary_iarchive in(inf); - int rseqno = -1; - while (inf.peek() != ifstream::traits_type::eof()) { - int op; - in & op; - switch (op) { - case op_del: - { - int key; - in & key; - mii::iterator it = map.find(key); - map.erase(it); - break; - } - case op_write: - { - int key, val; - in & key & val; - map[key] = val; - break; - } - case op_commit: - ++rseqno; - break; - } - if (check_interval(rseqno, yield_interval)) st_sleep(0); - } - seqno = init.txnseqno() - 1; - showdatarate("recovered from pwal", inf.tellg(), current_time_millis() - start_time); - cout << "now at seqno " << rseqno << " (really: " << seqno << ")" << endl; - } else { - - // - // Build-up - // - - cout << "waiting for recovery message" << (multirecover ? "s" : "") - << endl; - long long before_recv = current_time_millis(); - - vector<st_thread_t> recovery_builders; - ASSERT(seqno == -1); - bool first = true; - for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { - recovery_builders.push_back(my_spawn(lambda() { - // Read the recovery message length and header. - size_t len; - recovery_header hdr; - char buf[sizeof len + sizeof hdr]; - //try { - checkeqnneg(st_read_fully(__ref(replicas[i]), - buf, sizeof len + sizeof hdr, - ST_UTIME_NO_TIMEOUT), - ssize_t(sizeof len + sizeof hdr)); - //} catch (...) { // TODO just catch "Connection reset by peer" - //return; - //} - raw_reader rdr(buf); - rdr.read(len); - rdr.read(hdr); - check(hdr.seqno >= 0); - - // Resize the table if necessary. - commons::array<entry> &table = __ref(map).get_table(); - if (!__ref(first)) { - checkeq(table.size(), hdr.total); - checkeq(__ref(map).size(), hdr.size); - } else { - __ref(first) = false; - __ref(map).set_size(hdr.size); - if (table.size() != hdr.total) { - table.reset(new entry[hdr.total], hdr.total); - } - } - - // Receive straight into the table. - pair<size_t, size_t> range = - recovery_range(table.size(), __ctx(i), __ref(init).node_size()); - // Check that we agree on the number of entries. - checkeq(range.second - range.first, hdr.count); - // Check that the count is a power of two. - checkeq(hdr.count & (hdr.count - 1), size_t(0)); - size_t rangelen = sizeof(entry) * hdr.count; - // Read an extra char to ensure that we're at the EOF. - long long start_time = current_time_millis(); - checkeqnneg(st_read_fully(__ref(replicas[i]), - table.begin() + range.first, rangelen + 1, - ST_UTIME_NO_TIMEOUT), - ssize_t(rangelen)); - long long end_time = current_time_millis(); - - if (__ref(seqno) != -1) - checkeq(__ref(seqno), hdr.seqno); - __ref(seqno) = hdr.seqno; - showdatarate("got recovery message", len, end_time - start_time); - cout << "receive took " << end_time - __ref(before_recv) - << " ms total; now at seqno " << hdr.seqno << endl; -#if 0 - Recovery recovery; - long long receive_start = 0, receive_end = 0; - size_t len = 0; - { - st_intr intr(stop_hub); - len = readmsg(__ref(replicas)[__ctx(i)], recovery, &receive_start, - &receive_end); - } - long long build_start = current_time_millis(); - cout << "got recovery message of " << len << " bytes in " - << build_start - __ref(before_recv) << " ms: xfer took " - << receive_end - receive_start << " ms, deserialization took " - << build_start - receive_end << " ms" << endl; - for (int i = 0; i < recovery.pair_size(); ++i) { - const Recovery_Pair &p = recovery.pair(i); - __ref(map)[p.key()] = p.value(); - if (i % yield_interval == 0) { - if (yield_during_build_up) st_sleep(0); - } - } - check(recovery.seqno() >= 0); - int seqno = __ref(seqno) = recovery.seqno(); - long long build_end = current_time_millis(); - cout << "receive and build-up took " - << build_end - __ref(before_recv) - << " ms; built up map of " << recovery.pair_size() - << " records in " << build_end - build_start - << " ms; now at seqno " << seqno << endl; -#endif - }, "recovery_builder" + lexical_cast<string>(i))); - } - foreach (st_thread_t t, recovery_builders) { - st_join(t); - } - } - - // - // Catch-up - // - - long long mid_time = current_time_millis(); - int mid_seqno = seqno; - // XXX - using msg::TxnBatch; - using msg::Txn; - commons::array<char> rbuf(0), wbuf(buf_size); - reader reader(nullptr, rbuf.get(), rbuf.size()); - writer writer(lambda(const void*, size_t) { - throw not_supported_exception("should not be writing responses during catch-up phase"); - }, wbuf.get(), wbuf.size()); - stream s(reader, writer); - TxnBatch batch(s); - while (!backlog.empty()) { - chunk chunk = backlog.take(); - sized_array<char> &buf = chunk.get<0>(); - ASSERT(buf.get() <= chunk.get<1>() && chunk.get<1>() < buf.end()); - ASSERT(buf.get() < chunk.get<2>() && chunk.get<2>() < buf.end()); - ASSERT(chunk.get<1>() < chunk.get<2>()); - swap(buf, reader.buf()); - reader.reset_range(chunk.get<1>(), chunk.get<2>()); - while (reader.start() < reader.end()) { - char *start = reader.start(); - uint32_t prefix = ntohl(reader.read<uint32_t>()); - ASSERT(prefix < 10000); - ASSERT(start + sizeof(uint32_t) + prefix <= reader.end()); - batch.Clear(); - for (int t = 0; t < batch.txn_size(); ++t) { - const Txn &txn = batch.txn(t); - if (rec_pwal) seqno = txn.seqno() - 1; - process_txn<rb_traits, rb_traits>(map, txn, seqno, nullptr); - if (fake_exec && !Types::is_pb()) { - reader.skip(txn.op_size() * Op_Size); - } - - if (check_interval(txn.seqno(), yield_interval)) st_sleep(0); - if (check_interval(txn.seqno(), process_display)) { - cout << "caught up txn " << txn.seqno() - << "; db size = " << map.size() - << "; seqno = " << seqno - << "; backlog.size = " << backlog.queue().size() << endl; - } - } - ASSERT(start + sizeof(uint32_t) + prefix == reader.start()); - } - } - g_caught_up = true; -#if 0 - while (!backlog.empty()) { - using pb::Txn; - shared_ptr<Txn> p = backlog.take(); - process_txn<pb_traits, pb_traits>(map, *p, seqno, nullptr); - if (check_interval(p->seqno(), catch_up_display)) { - cout << "processed txn " << p->seqno() << " off the backlog; " - << "backlog.size = " << backlog.queue().size() << endl; - } - if (check_interval(p->seqno(), yield_interval)) { - // Explicitly yield. (Note that yielding does still effectively - // happen anyway because process_txn is a yield point.) - st_sleep(0); - } - } -#endif - showtput("replayer caught up; from backlog replayed", - current_time_millis(), mid_time, seqno, mid_seqno); - } - } - } catch (std::exception &ex) { - cerr_thread_ex(ex) << endl; - throw; - } - - stop_hub.insert(st_thread_self()); -} -} -#endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2009-03-24 09:11:34
|
Revision: 1332 http://assorted.svn.sourceforge.net/assorted/?rev=1332&view=rev Author: yangzhang Date: 2009-03-24 09:11:22 +0000 (Tue, 24 Mar 2009) Log Message: ----------- .lzz.clamp -> .clamp.lzz; cleaned up makefile Modified Paths: -------------- ydb/trunk/src/Makefile ydb/trunk/src/mkdeps.py Added Paths: ----------- ydb/trunk/src/leader.clamp.lzz ydb/trunk/src/main.clamp.lzz ydb/trunk/src/rectpcc.clamp.lzz ydb/trunk/src/replica.clamp.lzz ydb/trunk/src/run.clamp.lzz ydb/trunk/src/stxn.clamp.lzz ydb/trunk/src/tpcc.clamp.lzz ydb/trunk/src/util.clamp.lzz ydb/trunk/src/ydb.clamp.lzz Removed Paths: ------------- ydb/trunk/src/leader.lzz.clamp ydb/trunk/src/main.lzz.clamp ydb/trunk/src/rectpcc.lzz.clamp ydb/trunk/src/replica.lzz.clamp ydb/trunk/src/run.lzz.clamp ydb/trunk/src/stxn.lzz.clamp ydb/trunk/src/tpcc.lzz.clamp ydb/trunk/src/util.lzz.clamp ydb/trunk/src/ydb.lzz.clamp Modified: ydb/trunk/src/Makefile =================================================================== --- ydb/trunk/src/Makefile 2009-03-24 07:54:47 UTC (rev 1331) +++ ydb/trunk/src/Makefile 2009-03-24 09:11:22 UTC (rev 1332) @@ -3,10 +3,11 @@ # SHELL := bash -WTF := wtf -ORIGCXX := $(CXX) CCACHE := ccache export CCACHE_PREFIX := distcc +ifneq ($(WTF),) +WTF := wtf +endif CXX := $(WTF) $(CCACHE) $(CXX) -pipe TARGET_ARCH := $(shell [[ "$$(uname -m)" == x86_64 ]] && echo -m64 || echo -m32 ) \ @@ -95,35 +96,28 @@ # SVNURL := https://assorted.svn.sourceforge.net/svnroot/assorted/ydb/trunk/src - TARGET := ydb -CLAMPS := $(wildcard *.lzz.clamp) -CLAMPLZZS:= $(patsubst %.clamp,%,$(CLAMPS)) -PURELZZS := $(foreach lzz,$(wildcard *.lzz),$(if $(wildcard $(lzz).clamp),,$(lzz))) -LZZS := $(CLAMPLZZS) $(PURELZZS) -LZZHDRS := $(foreach lzz,$(LZZS),$(patsubst %.lzz,%.hh,$(lzz))) -LZZSRCS := $(foreach lzz,$(LZZS),$(patsubst %.lzz,%.cc,$(lzz))) -LZZOBJS := $(foreach lzz,$(LZZS),$(patsubst %.lzz,%.o,$(lzz))) +CLAMPLZZS:= $(wildcard *.clamp.lzz) +CLAMPS := $(foreach lzz,$(CLAMPLZZS),$(patsubst %.clamp.lzz,%.hh.clamp,$(lzz))) \ + $(foreach lzz,$(CLAMPLZZS),$(patsubst %.clamp.lzz,%.cc.clamp,$(lzz))) +CLAMPOUTS:= $(foreach clamp,$(CLAMPS),$(patsubst %.clamp,%,$(clamp))) PBS := $(wildcard *.proto) -PBHDRS := $(foreach pb,$(PBS),$(patsubst %.proto,%.pb.h,$(pb))) -PBSRCS := $(foreach pb,$(PBS),$(patsubst %.proto,%.pb.cc,$(pb))) -PBOBJS := $(foreach pb,$(PBS),$(patsubst %.proto,%.pb.o,$(pb))) +PBOUTS := $(foreach pb,$(PBS),$(patsubst %.proto,%.pb.h,$(pb))) \ + $(foreach pb,$(PBS),$(patsubst %.proto,%.pb.cc,$(pb))) COGS := $(wildcard tpcc/*.cog) -COGSRCS := $(foreach cog,$(COGS),$(patsubst %.cog,%,$(cog))) +COGOUTS := $(foreach cog,$(COGS),$(patsubst %.cog,%,$(cog))) -GENHDRS := $(LZZHDRS) $(PBHDRS) $(COGHDRS) -GENSRCS := $(LZZSRCS) $(PBSRCS) $(COGSRCS) GENOBJS := $(LZZOBJS) $(PBOBJS) $(COGOBJS) +GENOUTS := $(CLAMPOUTS) $(PBOUTS) $(COGOUTS) -TPCC_OBJS:= clock randomgenerator tpccclient tpccdb tpccgenerator tpcctables -TPCC_OBJS:= $(foreach i,$(TPCC_OBJS),tpcc/$(i).o) +TPCCOBJS := clock randomgenerator tpccclient tpccdb tpccgenerator tpcctables +TPCCOBJS := $(foreach i,$(TPCCOBJS),tpcc/$(i).o) -HDRS := $(GENHDRS) -SRCS := $(GENSRCS) -OBJS := $(GENOBJS) $(TPCC_OBJS) +SRCS := $(GENOUTS) msg.h +OBJS := $(patsubst %.cc,%.o,$(filter %.cc,$(SRCS))) $(TPCCOBJS) # # Rules @@ -131,7 +125,7 @@ all: $(TARGET) -doc: $(SRCS) $(HDRS) +doc: $(SRCS) doxygen %.pb.o: WARNINGS = -Wall -Werror @@ -143,24 +137,11 @@ %.pb.h %.pb.cc: %.proto protoc --cpp_out=. $< -# ORIG -# -#%.cc %.hh: %.lzz -# lzz -hx hh -sx cc -hl -sl -hd -sd $< -# -#%.lzz: %.lzz.clamp -# rm -f $@ -# mkdir -p .clamp/ -# clamp --outdir .clamp/ --prefix $(basename $@) < $< | \ -# sed "$$( echo -e '1i\\\n\#hdr\n1a\\\n\#end' )" | \ -# sed "$$( echo -e '$$i\\\n\#hdr\n$$a\\\n\#end' )" > $@ -# chmod -w $@ - -%.cc.clamp %.hh.clamp: %.lzz.clamp +%.cc.clamp %.hh.clamp: %.clamp.lzz ln -sf $< $(basename $<) rm -f $(basename $(basename $<)).{hh,cc}.clamp lzz -hx hh.clamp -sx cc.clamp -hd -sd $(basename $<) - chmod -w $(basename $(basename $<)).{hh.clamp,cc.clamp} + chmod -w $(basename $(basename $<)).{hh,cc}.clamp %.cc: %.cc.clamp rm -f $@ @@ -207,12 +188,12 @@ -Wno-unused-parameter clean: - rm -rf .clamp/ $(GENSRCS) $(GENHDRS) $(OBJS) $(TARGET) $(CLAMPLZZS) *.d *.hh.clamp *.cc.clamp + rm -rf $(GENOUTS) $(OBJS) $(TARGET) $(CLAMPLZZS) .clamp/ *.d distclean: clean rm -f pch.h pch.h.gch -.SECONDARY: $(GENSRCS) $(GENHDRS) $(OBJS) main.lzz pch.h.gch +.SECONDARY: $(GENOUTS) $(OBJS) pch.h.gch serperf: ydb.pb.o ser: ydb.pb.o Copied: ydb/trunk/src/leader.clamp.lzz (from rev 1331, ydb/trunk/src/leader.lzz.clamp) =================================================================== --- ydb/trunk/src/leader.clamp.lzz (rev 0) +++ ydb/trunk/src/leader.clamp.lzz 2009-03-24 09:11:22 UTC (rev 1332) @@ -0,0 +1,137 @@ +#hdr +#include <stdint.h> +#end + +#src +#include "unsetprefs.h" +#include <commons/st/sockets.h> +#include <commons/st/threads.h> +#include "run.hh" +#include "stxn.hh" +#include "tpcc.hh" +#include "setprefs.h" +#end + +/** + * Run the leader. + */ +void +run_leader(int minreps, uint16_t leader_port) +{ + cout << "starting as leader" << endl; + st_multichannel<long long> recover_signals; + + scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); + g_twal = twal.get(); + scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); + g_wal = pwal.get(); + + // Wait until all replicas have joined. + st_netfd_t listener = st_tcp_listen(leader_port); + st_closing close_listener(listener); + vector<replica_info> replicas; + st_closing_all_infos close_replicas(replicas); + cout << "waiting for at least " << minreps << " replicas to join" << endl; + Join join; + for (int i = 0; i < minreps; ++i) { + st_netfd_t fd; + { + st_intr intr(stop_hub); + fd = checkerr(st_accept(listener, nullptr, nullptr, + ST_UTIME_NO_TIMEOUT)); + } + readmsg(fd, join); + replicas.push_back(replica_info(fd, static_cast<uint16_t>(join.port()))); + } + cout << "got all " << minreps << " replicas" << endl; + + // Construct the initialization message. + Init init; + init.set_txnseqno(0); + init.set_multirecover(multirecover); + foreach (replica_info r, replicas) { + SockAddr *psa = init.add_node(); + psa->set_host(r.host()); + psa->set_port(r.port()); + } + + // Send init to each initial replica. + foreach (replica_info r, replicas) { + init.set_yourhost(r.host()); + sendmsg(r.fd(), init); + } + + // Start dispatching queries. + st_bool accept_joiner; + int seqno = 0; + st_channel<replica_info> newreps; + st_channel<st_netfd_t> delreps; + foreach (const replica_info &r, replicas) newreps.push(r); + function<void()> f; + if (do_tpcc) + f = bind(issue_tpcc, ref(newreps), ref(delreps), ref(seqno), ref(accept_joiner)); + else + f = bind(issue_txns, ref(newreps), ref(seqno), ref(accept_joiner)); + st_joining join_issue_txns(my_spawn(f, "issue_txns")); + + finally fin(bind(summarize, "LEADER", ref(seqno))); + + try { + // Start handling responses. + st_thread_group handlers; + int rid = 0; + foreach (replica_info r, replicas) { + function<void()> fn; + if (do_tpcc) + fn = bind(handle_tpcc_responses, r.fd(), ref(seqno), rid++, + ref(recover_signals), ref(delreps), true); + else + fn = bind(handle_responses, r.fd(), ref(seqno), rid++, + ref(recover_signals), true); + handlers.insert(my_spawn(fn, "handle_responses")); + } + + // Accept the recovering node, and tell it about the online replicas. + st_netfd_t joiner; + try { + st_intr intr(stop_hub); + joiner = checkerr(st_accept(listener, nullptr, nullptr, + ST_UTIME_NO_TIMEOUT)); + accept_joiner.waitset(); + } catch (std::exception &ex) { + string s(ex.what()); + if (s.find("Interrupted system call") == s.npos) + throw; + else + throw break_exception(); + } + Join join; + readmsg(joiner, join); + replicas.push_back(replica_info(joiner, static_cast<uint16_t>(join.port()))); + cout << "setting seqno to " << seqno << endl; + init.set_txnseqno(seqno); + init.set_yourhost(replicas.back().host()); + sendmsg(joiner, init); + recover_signals.push(current_time_millis()); + + // Start streaming txns to joiner. + cout << "start streaming txns to joiner" << endl; + function<void()> handle_responses_joiner_fn; + if (do_tpcc) + handle_responses_joiner_fn = + bind(handle_tpcc_responses, joiner, ref(seqno), rid++, + ref(recover_signals), ref(delreps), false); + else + handle_responses_joiner_fn = + bind(handle_responses, joiner, ref(seqno), rid++, + ref(recover_signals), false); + newreps.push(replicas.back()); + handlers.insert(my_spawn(handle_responses_joiner_fn, + "handle_responses_joiner")); + } catch (break_exception &ex) { + } catch (std::exception &ex) { + // TODO: maybe there's a cleaner way to do this final step before waiting with the join + cerr_thread_ex(ex) << endl; + throw; + } +} Property changes on: ydb/trunk/src/leader.clamp.lzz ___________________________________________________________________ Added: svn:mergeinfo + Deleted: ydb/trunk/src/leader.lzz.clamp =================================================================== --- ydb/trunk/src/leader.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) +++ ydb/trunk/src/leader.lzz.clamp 2009-03-24 09:11:22 UTC (rev 1332) @@ -1,137 +0,0 @@ -#hdr -#include <stdint.h> -#end - -#src -#include "unsetprefs.h" -#include <commons/st/sockets.h> -#include <commons/st/threads.h> -#include "run.hh" -#include "stxn.hh" -#include "tpcc.hh" -#include "setprefs.h" -#end - -/** - * Run the leader. - */ -void -run_leader(int minreps, uint16_t leader_port) -{ - cout << "starting as leader" << endl; - st_multichannel<long long> recover_signals; - - scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); - g_twal = twal.get(); - scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); - g_wal = pwal.get(); - - // Wait until all replicas have joined. - st_netfd_t listener = st_tcp_listen(leader_port); - st_closing close_listener(listener); - vector<replica_info> replicas; - st_closing_all_infos close_replicas(replicas); - cout << "waiting for at least " << minreps << " replicas to join" << endl; - Join join; - for (int i = 0; i < minreps; ++i) { - st_netfd_t fd; - { - st_intr intr(stop_hub); - fd = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - } - readmsg(fd, join); - replicas.push_back(replica_info(fd, static_cast<uint16_t>(join.port()))); - } - cout << "got all " << minreps << " replicas" << endl; - - // Construct the initialization message. - Init init; - init.set_txnseqno(0); - init.set_multirecover(multirecover); - foreach (replica_info r, replicas) { - SockAddr *psa = init.add_node(); - psa->set_host(r.host()); - psa->set_port(r.port()); - } - - // Send init to each initial replica. - foreach (replica_info r, replicas) { - init.set_yourhost(r.host()); - sendmsg(r.fd(), init); - } - - // Start dispatching queries. - st_bool accept_joiner; - int seqno = 0; - st_channel<replica_info> newreps; - st_channel<st_netfd_t> delreps; - foreach (const replica_info &r, replicas) newreps.push(r); - function<void()> f; - if (do_tpcc) - f = bind(issue_tpcc, ref(newreps), ref(delreps), ref(seqno), ref(accept_joiner)); - else - f = bind(issue_txns, ref(newreps), ref(seqno), ref(accept_joiner)); - st_joining join_issue_txns(my_spawn(f, "issue_txns")); - - finally fin(bind(summarize, "LEADER", ref(seqno))); - - try { - // Start handling responses. - st_thread_group handlers; - int rid = 0; - foreach (replica_info r, replicas) { - function<void()> fn; - if (do_tpcc) - fn = bind(handle_tpcc_responses, r.fd(), ref(seqno), rid++, - ref(recover_signals), ref(delreps), true); - else - fn = bind(handle_responses, r.fd(), ref(seqno), rid++, - ref(recover_signals), true); - handlers.insert(my_spawn(fn, "handle_responses")); - } - - // Accept the recovering node, and tell it about the online replicas. - st_netfd_t joiner; - try { - st_intr intr(stop_hub); - joiner = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - accept_joiner.waitset(); - } catch (std::exception &ex) { - string s(ex.what()); - if (s.find("Interrupted system call") == s.npos) - throw; - else - throw break_exception(); - } - Join join; - readmsg(joiner, join); - replicas.push_back(replica_info(joiner, static_cast<uint16_t>(join.port()))); - cout << "setting seqno to " << seqno << endl; - init.set_txnseqno(seqno); - init.set_yourhost(replicas.back().host()); - sendmsg(joiner, init); - recover_signals.push(current_time_millis()); - - // Start streaming txns to joiner. - cout << "start streaming txns to joiner" << endl; - function<void()> handle_responses_joiner_fn; - if (do_tpcc) - handle_responses_joiner_fn = - bind(handle_tpcc_responses, joiner, ref(seqno), rid++, - ref(recover_signals), ref(delreps), false); - else - handle_responses_joiner_fn = - bind(handle_responses, joiner, ref(seqno), rid++, - ref(recover_signals), false); - newreps.push(replicas.back()); - handlers.insert(my_spawn(handle_responses_joiner_fn, - "handle_responses_joiner")); - } catch (break_exception &ex) { - } catch (std::exception &ex) { - // TODO: maybe there's a cleaner way to do this final step before waiting with the join - cerr_thread_ex(ex) << endl; - throw; - } -} Copied: ydb/trunk/src/main.clamp.lzz (from rev 1331, ydb/trunk/src/main.lzz.clamp) =================================================================== --- ydb/trunk/src/main.clamp.lzz (rev 0) +++ ydb/trunk/src/main.clamp.lzz 2009-03-24 09:11:22 UTC (rev 1332) @@ -0,0 +1,313 @@ +#hdr +#include "unsetprefs.h" +#include <boost/tuple/tuple.hpp> +#include <commons/st/intr.h> +#include <commons/st/sync.h> +#include <commons/st/channel.h> +#include <fstream> // ofstream +#include <vector> +#include "util.hh" +#include "setprefs.h" + +namespace boost { namespace archive { class binary_oarchive; } } + +using namespace boost; +using namespace boost::archive; +using namespace commons; +using namespace std; +using namespace ydb; +using namespace ydb::msg; +#end + +#src +#include "unsetprefs.h" +#include <boost/foreach.hpp> +#include <boost/archive/binary_oarchive.hpp> +#include <commons/assert.h> +#include <commons/time.h> +#include <commons/st/io.h> +#include <commons/st/sockets.h> +#include <iostream> +#include <unistd.h> // pipe, write, sync +#include "tpcc/tpcctables.h" +#include "msg.h" +#include "setprefs.h" +#end + +typedef tuple<sized_array<char>, char*, char*> chunk; + +typedef commons::array<char> recovery_t; + + +// Configuration. +st_utime_t timeout; +int yield_interval, accept_joiner_seqno, issuing_interval, min_ops, max_ops, + stop_on_seqno, batch_size, handle_responses_display, fail_seqno, + catch_up_display, issue_display, nwarehouses, + process_display; +size_t accept_joiner_size, read_buf_size; +bool yield_during_build_up, yield_during_catch_up, dump, show_updates, + count_updates, stop_on_recovery, general_txns, + disk, debug_memory, use_pwal, use_twal, + use_pb, use_pb_res, g_caught_up, rec_pwal, rec_twal, do_tpcc, + suppress_txn_msgs, force_ser, fake_exec, ship_log; +long long timelim, read_thresh; + +// Control. +st_intr_bool stop_hub, kill_hub; +st_bool do_pause; +// On leader, signifies that a node is in fail mode. On replica, signifies that a node is in fail mode/recovering from the twal. +st_bool failed; +// The seqno on which we should resume. +st_channel<int> resume; +bool stopped_issuing; + +// Statistics. +int updates; + +/** + * Used by the leader to bookkeep information about replicas. + */ +class replica_info +{ + public: + /** port is the replica's listen port, not the port bound to the fd socket. */ + replica_info(st_netfd_t fd, uint16_t port) : fd_(fd), port_(port) {} + st_netfd_t fd() const { return fd_; } + /** The port on which the replica is listening. */ + uint16_t port() const { return port_; } + /** The port on which the replica connected to us. */ + uint16_t local_port() const { return sockaddr().sin_port; } + uint32_t host() const { return sockaddr().sin_addr.s_addr; } + sockaddr_in sockaddr() const { sockaddr_in sa; sockaddr(sa); return sa; } + void sockaddr(sockaddr_in &sa) const { + socklen_t salen = sizeof sa; + check0x(getpeername(st_netfd_fileno(fd_), + reinterpret_cast<struct sockaddr*>(&sa), + &salen)); + } + private: + st_netfd_t fd_; + uint16_t port_; +}; + +/** + * RAII to close all contained netfds. + */ +class st_closing_all_infos +{ + public: + st_closing_all_infos(const vector<replica_info>& rs) : rs_(rs) {} + ~st_closing_all_infos() { + cout << "closing all conns to replicas (replica_infos)" << endl; + foreach (replica_info r, rs_) + check0x(st_netfd_close(r.fd())); + } + private: + const vector<replica_info> &rs_; +}; + +/** + * RAII to close all contained netfds. + */ +class st_closing_all +{ + public: + st_closing_all(const vector<st_netfd_t>& rs) : rs_(rs) {} + ~st_closing_all() { + foreach (st_netfd_t r, rs_) + check0x(st_netfd_close(r)); + } + private: + const vector<st_netfd_t> &rs_; +}; + +#if 0 +st_channel<pair<st_netfd_t, shared_ptr<string> > > msgs; + +/** + * The worker that performs the actual broadcasting. + */ +void +bcaster() +{ + while (!kill_hub) { + pair<st_netfd_t, shared_ptr<string> > pr; + { + st_intr intr(kill_hub); + pr = msgs.take(); + } + st_netfd_t dst = pr.first; + shared_ptr<string> &p = pr.second; + if (p.get() == nullptr) break; + string &s = *p.get(); + + if (!fake_bcast) + st_timed_write(dst, s.data(), s.size()); + } +} + +/** + * Asynchronous version of the broadcaster. + */ +void +bcastbuf_async(const vector<st_netfd_t> &dsts, const ser_t &msg) +{ + shared_ptr<string> p(new string); + ser(*p.get(), msg); + foreach (st_netfd_t dst, dsts) msgs.push(make_pair(dst, p)); +} +#endif + +enum { op_del, op_write, op_commit }; + +/** + * ARIES write-ahead log. No undo logging necessary (no steal). + */ +class wal +{ +public: + wal(const string &fname) : + of_(fname.c_str()), + ar_(new binary_oarchive(of())) + {} + ~wal() { delete ar_; } + template <typename T> + void log(const T &msg) { ser(of(), msg); } + void logbuf(const ser_t &s) { logbuf(s.data(), s.size()); } + void logbuf(const void *buf, size_t len) { + of().write(reinterpret_cast<const char*>(buf), len); + } + void logdel(int key) { + int op = op_del; // TODO: is this really necessary? + ar() & op & key; + } + void logwrite(int key, int val) { + int op = op_write; + ar() & op & key & val; + } + void logcommit() { + int op = op_commit; + ar() & op; + } + void flush() { of().flush(); } +private: + ofstream of_; + //unique_ptr<binary_oarchive> ar_; + binary_oarchive *ar_; + ofstream &of() { return of_; } + binary_oarchive &ar() { return *ar_; }; +}; + +// TODO? +class txn_wal { +public: + txn_wal(const string &fname) : of(fname.c_str()) {} + void logbuf(const ser_t &s) { logbuf(s.data(), s.size()); } + void logbuf(const void *buf, size_t len) { + of.write(reinterpret_cast<const char*>(buf), len); + } + void flush() { of.flush(); } +private: + ofstream of; +}; + +// Globals +wal *g_wal; +txn_wal *g_twal; +//tpcc_wal *g_tpcc_wal; + +struct recreq { + int start_seqno, end_seqno; +}; + +/** + * Help the recovering node. + * + * \param[in] listener The connection on which we're listening for connections + * from recovering joiners. + * + * \param[in] map The database state. + * + * \param[in] seqno The sequence number. Always starts at 0. + * + * \param[in] send_states Channel of snapshots of the database state to receive + * from process_txns. + */ +void +recover_joiner(st_netfd_t listener, + st_channel<recovery_t> &send_states) +{ + cout << "waiting for joiner" << endl; + recovery_t recovery; + st_netfd_t joiner; + if (ship_log) { + { + st_intr intr(stop_hub); + joiner = checkerr(st_accept(listener, nullptr, nullptr, + ST_UTIME_NO_TIMEOUT)); + } + st_closing closing(joiner); + recreq r; + st_read(joiner, r); + commons::array<char> wbuf(buf_size); + writer writer(lambda(const void *buf, size_t len) { + st_write(__ref(joiner), buf, len); + }, wbuf.get(), wbuf.size()); + cout << "got joiner's connection, sending log from seqnos " + << r.start_seqno << " to " << r.end_seqno << endl; + + g_twal->flush(); + sync(); + ifstream inf("twal"); + long long start_time = current_time_millis(); + for (int seqno = 0; seqno < r.start_seqno; ++seqno) { + ASSERT(inf.good()); + inf.seekg(readlen(inf), ios::cur); + } + long long mid_time = current_time_millis(); + streamoff mid_off = inf.tellg(); + showdatarate("scanned log", mid_off, mid_time - start_time); + for (int seqno = r.start_seqno; seqno < r.end_seqno; ++seqno) { + ASSERT(inf.good()); + uint32_t len = readlen(inf); + inf.read(writer.reserve(len), len); + writer.mark(); + cout << seqno << ' ' << len << endl; + if (check_interval(seqno, yield_interval)) st_sleep(0); + } + writer.mark_and_flush(); + long long end_time = current_time_millis(); + streamoff end_off = inf.tellg(); + showdatarate("shipped log", end_off - mid_off, end_time - mid_time); + } else { + { + st_intr intr(stop_hub); + // Wait for the snapshot. + recovery = send_states.take(); + if (recovery == nullptr) { + return; + } + // Wait for the new joiner. + joiner = checkerr(st_accept(listener, nullptr, nullptr, + ST_UTIME_NO_TIMEOUT)); + } + + st_closing closing(joiner); + cout << "got joiner's connection, sending recovery of " + << recovery.size() << " bytes" << endl; + long long start_time = current_time_millis(); + st_write(joiner, recovery.get(), recovery.size()); + long long diff = current_time_millis() - start_time; + showdatarate("sent recovery", recovery.size(), diff); + } +} + +void +threadfunc() +{ + while (true) { + sleep(3); + cout << "AAAAAAAAAAAAAAAAAAAAAA" << endl; + } +} Deleted: ydb/trunk/src/main.lzz.clamp =================================================================== --- ydb/trunk/src/main.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) +++ ydb/trunk/src/main.lzz.clamp 2009-03-24 09:11:22 UTC (rev 1332) @@ -1,313 +0,0 @@ -#hdr -#include "unsetprefs.h" -#include <boost/tuple/tuple.hpp> -#include <commons/st/intr.h> -#include <commons/st/sync.h> -#include <commons/st/channel.h> -#include <fstream> // ofstream -#include <vector> -#include "util.hh" -#include "setprefs.h" - -namespace boost { namespace archive { class binary_oarchive; } } - -using namespace boost; -using namespace boost::archive; -using namespace commons; -using namespace std; -using namespace ydb; -using namespace ydb::msg; -#end - -#src -#include "unsetprefs.h" -#include <boost/foreach.hpp> -#include <boost/archive/binary_oarchive.hpp> -#include <commons/assert.h> -#include <commons/time.h> -#include <commons/st/io.h> -#include <commons/st/sockets.h> -#include <iostream> -#include <unistd.h> // pipe, write, sync -#include "tpcc/tpcctables.h" -#include "msg.h" -#include "setprefs.h" -#end - -typedef tuple<sized_array<char>, char*, char*> chunk; - -typedef commons::array<char> recovery_t; - - -// Configuration. -st_utime_t timeout; -int yield_interval, accept_joiner_seqno, issuing_interval, min_ops, max_ops, - stop_on_seqno, batch_size, handle_responses_display, fail_seqno, - catch_up_display, issue_display, nwarehouses, - process_display; -size_t accept_joiner_size, read_buf_size; -bool yield_during_build_up, yield_during_catch_up, dump, show_updates, - count_updates, stop_on_recovery, general_txns, - disk, debug_memory, use_pwal, use_twal, - use_pb, use_pb_res, g_caught_up, rec_pwal, rec_twal, do_tpcc, - suppress_txn_msgs, force_ser, fake_exec, ship_log; -long long timelim, read_thresh; - -// Control. -st_intr_bool stop_hub, kill_hub; -st_bool do_pause; -// On leader, signifies that a node is in fail mode. On replica, signifies that a node is in fail mode/recovering from the twal. -st_bool failed; -// The seqno on which we should resume. -st_channel<int> resume; -bool stopped_issuing; - -// Statistics. -int updates; - -/** - * Used by the leader to bookkeep information about replicas. - */ -class replica_info -{ - public: - /** port is the replica's listen port, not the port bound to the fd socket. */ - replica_info(st_netfd_t fd, uint16_t port) : fd_(fd), port_(port) {} - st_netfd_t fd() const { return fd_; } - /** The port on which the replica is listening. */ - uint16_t port() const { return port_; } - /** The port on which the replica connected to us. */ - uint16_t local_port() const { return sockaddr().sin_port; } - uint32_t host() const { return sockaddr().sin_addr.s_addr; } - sockaddr_in sockaddr() const { sockaddr_in sa; sockaddr(sa); return sa; } - void sockaddr(sockaddr_in &sa) const { - socklen_t salen = sizeof sa; - check0x(getpeername(st_netfd_fileno(fd_), - reinterpret_cast<struct sockaddr*>(&sa), - &salen)); - } - private: - st_netfd_t fd_; - uint16_t port_; -}; - -/** - * RAII to close all contained netfds. - */ -class st_closing_all_infos -{ - public: - st_closing_all_infos(const vector<replica_info>& rs) : rs_(rs) {} - ~st_closing_all_infos() { - cout << "closing all conns to replicas (replica_infos)" << endl; - foreach (replica_info r, rs_) - check0x(st_netfd_close(r.fd())); - } - private: - const vector<replica_info> &rs_; -}; - -/** - * RAII to close all contained netfds. - */ -class st_closing_all -{ - public: - st_closing_all(const vector<st_netfd_t>& rs) : rs_(rs) {} - ~st_closing_all() { - foreach (st_netfd_t r, rs_) - check0x(st_netfd_close(r)); - } - private: - const vector<st_netfd_t> &rs_; -}; - -#if 0 -st_channel<pair<st_netfd_t, shared_ptr<string> > > msgs; - -/** - * The worker that performs the actual broadcasting. - */ -void -bcaster() -{ - while (!kill_hub) { - pair<st_netfd_t, shared_ptr<string> > pr; - { - st_intr intr(kill_hub); - pr = msgs.take(); - } - st_netfd_t dst = pr.first; - shared_ptr<string> &p = pr.second; - if (p.get() == nullptr) break; - string &s = *p.get(); - - if (!fake_bcast) - st_timed_write(dst, s.data(), s.size()); - } -} - -/** - * Asynchronous version of the broadcaster. - */ -void -bcastbuf_async(const vector<st_netfd_t> &dsts, const ser_t &msg) -{ - shared_ptr<string> p(new string); - ser(*p.get(), msg); - foreach (st_netfd_t dst, dsts) msgs.push(make_pair(dst, p)); -} -#endif - -enum { op_del, op_write, op_commit }; - -/** - * ARIES write-ahead log. No undo logging necessary (no steal). - */ -class wal -{ -public: - wal(const string &fname) : - of_(fname.c_str()), - ar_(new binary_oarchive(of())) - {} - ~wal() { delete ar_; } - template <typename T> - void log(const T &msg) { ser(of(), msg); } - void logbuf(const ser_t &s) { logbuf(s.data(), s.size()); } - void logbuf(const void *buf, size_t len) { - of().write(reinterpret_cast<const char*>(buf), len); - } - void logdel(int key) { - int op = op_del; // TODO: is this really necessary? - ar() & op & key; - } - void logwrite(int key, int val) { - int op = op_write; - ar() & op & key & val; - } - void logcommit() { - int op = op_commit; - ar() & op; - } - void flush() { of().flush(); } -private: - ofstream of_; - //unique_ptr<binary_oarchive> ar_; - binary_oarchive *ar_; - ofstream &of() { return of_; } - binary_oarchive &ar() { return *ar_; }; -}; - -// TODO? -class txn_wal { -public: - txn_wal(const string &fname) : of(fname.c_str()) {} - void logbuf(const ser_t &s) { logbuf(s.data(), s.size()); } - void logbuf(const void *buf, size_t len) { - of.write(reinterpret_cast<const char*>(buf), len); - } - void flush() { of.flush(); } -private: - ofstream of; -}; - -// Globals -wal *g_wal; -txn_wal *g_twal; -//tpcc_wal *g_tpcc_wal; - -struct recreq { - int start_seqno, end_seqno; -}; - -/** - * Help the recovering node. - * - * \param[in] listener The connection on which we're listening for connections - * from recovering joiners. - * - * \param[in] map The database state. - * - * \param[in] seqno The sequence number. Always starts at 0. - * - * \param[in] send_states Channel of snapshots of the database state to receive - * from process_txns. - */ -void -recover_joiner(st_netfd_t listener, - st_channel<recovery_t> &send_states) -{ - cout << "waiting for joiner" << endl; - recovery_t recovery; - st_netfd_t joiner; - if (ship_log) { - { - st_intr intr(stop_hub); - joiner = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - } - st_closing closing(joiner); - recreq r; - st_read(joiner, r); - commons::array<char> wbuf(buf_size); - writer writer(lambda(const void *buf, size_t len) { - st_write(__ref(joiner), buf, len); - }, wbuf.get(), wbuf.size()); - cout << "got joiner's connection, sending log from seqnos " - << r.start_seqno << " to " << r.end_seqno << endl; - - g_twal->flush(); - sync(); - ifstream inf("twal"); - long long start_time = current_time_millis(); - for (int seqno = 0; seqno < r.start_seqno; ++seqno) { - ASSERT(inf.good()); - inf.seekg(readlen(inf), ios::cur); - } - long long mid_time = current_time_millis(); - streamoff mid_off = inf.tellg(); - showdatarate("scanned log", mid_off, mid_time - start_time); - for (int seqno = r.start_seqno; seqno < r.end_seqno; ++seqno) { - ASSERT(inf.good()); - uint32_t len = readlen(inf); - inf.read(writer.reserve(len), len); - writer.mark(); - cout << seqno << ' ' << len << endl; - if (check_interval(seqno, yield_interval)) st_sleep(0); - } - writer.mark_and_flush(); - long long end_time = current_time_millis(); - streamoff end_off = inf.tellg(); - showdatarate("shipped log", end_off - mid_off, end_time - mid_time); - } else { - { - st_intr intr(stop_hub); - // Wait for the snapshot. - recovery = send_states.take(); - if (recovery == nullptr) { - return; - } - // Wait for the new joiner. - joiner = checkerr(st_accept(listener, nullptr, nullptr, - ST_UTIME_NO_TIMEOUT)); - } - - st_closing closing(joiner); - cout << "got joiner's connection, sending recovery of " - << recovery.size() << " bytes" << endl; - long long start_time = current_time_millis(); - st_write(joiner, recovery.get(), recovery.size()); - long long diff = current_time_millis() - start_time; - showdatarate("sent recovery", recovery.size(), diff); - } -} - -void -threadfunc() -{ - while (true) { - sleep(3); - cout << "AAAAAAAAAAAAAAAAAAAAAA" << endl; - } -} Modified: ydb/trunk/src/mkdeps.py =================================================================== --- ydb/trunk/src/mkdeps.py 2009-03-24 07:54:47 UTC (rev 1331) +++ ydb/trunk/src/mkdeps.py 2009-03-24 09:11:22 UTC (rev 1332) @@ -24,7 +24,7 @@ @memoized def src(i): if i.endswith('.hh'): - clamp = path(i[:-3] + '.lzz.clamp') + clamp = path(i[:-3] + '.clamp.lzz') lzz = path(i[:-2] + '.lzz') if clamp.isfile(): return clamp if lzz.isfile(): return lzz @@ -39,8 +39,8 @@ for dep in deps(src(hdr)): yield dep -for i in pwd.glob('*.lzz.clamp'): - print sub(r'\.lzz\.clamp', '.o', i), ':', sub(r'\.lzz\.clamp', '.hh', i), ' '.join(deps(i)) +for i in pwd.glob('*.clamp.lzz'): + print sub(r'\.clamp\.lzz', '.o', i), ':', sub(r'\.clamp\.lzz', '.hh', i), ' '.join(deps(i)) for i in pwd.glob('*.d'): with file(i) as f: @@ -52,4 +52,4 @@ elif '_cc_lambda_' in word: print sub(r'(\.clamp/(.+)_cc_lambda_.+\.clamp_h)', r'\1: \2.cc.clamp', word) else: - print sub(r'(\.clamp/(.+)_lambda_.+\.clamp_h)', r'\1: \2.lzz.clamp', word) + print sub(r'(\.clamp/(.+)_lambda_.+\.clamp_h)', r'\1: \2.clamp.lzz', word) Copied: ydb/trunk/src/rectpcc.clamp.lzz (from rev 1331, ydb/trunk/src/rectpcc.lzz.clamp) =================================================================== --- ydb/trunk/src/rectpcc.clamp.lzz (rev 0) +++ ydb/trunk/src/rectpcc.clamp.lzz 2009-03-24 09:11:22 UTC (rev 1332) @@ -0,0 +1,175 @@ +#hdr +#include "tpcc.hh" +namespace ydb { namespace pb { class Init; } } +using namespace ydb::pb; +#end + +#src +#include "unsetprefs.h" +#include <commons/time.h> +#include <commons/st/io.h> +#include <commons/st/threads.h> +#include <commons/st/reader.h> +#include "tpcc/tpcctables.h" +#include "ydb.pb.h" +#include "setprefs.h" +#end + +void +rec_tpcc(int &seqno, int mypos, const Init &init, + const vector<st_netfd_t> &replicas, recovery_t &orig, + st_channel<chunk> &backlog) +{ + commons::array<char> recarr(0); + + function<void()> rec_twal_fn = lambda() { + int &seqno = __ref(seqno); + cout << "recovering from twal" << endl; + long long start_time = current_time_millis(); + g_twal->flush(); + sync(); + ifstream inf("twal"); + TpccReq req; + while (inf.peek() != ifstream::traits_type::eof()) { + ASSERT(inf.good()); + readmsg(inf, req); + process_tpcc(req, seqno, nullptr); + if (check_interval(seqno, yield_interval)) st_sleep(0); + } + showdatarate("recovered from twal", inf.tellg(), + current_time_millis() - start_time); + cout << "now at seqno " << seqno << endl; + }; + + function<void()> recv_log_fn = lambda() { + st_netfd_t src = __ref(replicas[0]); + int &seqno = __ref(seqno); + ASSERT(fail_seqno == seqno); + recreq r = { fail_seqno + 1, resume.take() }; + st_write(src, r); + sized_array<char> rbuf(new char[read_buf_size], read_buf_size); + function<void(anchored_stream_reader &reader)> overflow_fn = + lambda(anchored_stream_reader &reader) { + shift_reader(reader); + }; + anchored_stream_reader reader(st_read_fn(src), + st_read_fully_fn(src), + overflow_fn, rbuf.get(), rbuf.size()); + TpccReq req; + while (seqno < r.end_seqno) { + { st_intr intr(stop_hub); readmsg(reader, req); } + process_tpcc(req, seqno, nullptr); + reader.set_anchor(); + if (check_interval(seqno, yield_interval)) st_sleep(0); + } + }; + + if (rec_twal) { + failed.waitset(); + g_tables.reset(new TPCCTables); + tpcc_recovery_header &hdr = *reinterpret_cast<tpcc_recovery_header*>(orig.begin()); + commons::array<char> body(orig.begin() + sizeof(tpcc_recovery_header), + orig.size() - sizeof(tpcc_recovery_header)); + g_tables->deser(mypos, init.node_size(), hdr, body); + body.release(); + rec_twal_fn(); + failed.reset(); + recv_log_fn(); + } + +#if 0 + st_thread_t rec_twal_thread = my_spawn(rec_twal_fn, "rec_twal"); + st_thread_t recv_log_thread = my_spawn(recv_log_fn, "recv_log"); + + st_join(rec_twal_thread); + st_join(recv_log_thread); +#endif + + if (rec_pwal) { + // Recover from phy log. + } else if (rec_twal) { + // Recover from txn log. + } else { + + g_tables.reset(new TPCCTables); + + // + // Build-up + // + + if (ship_log) { + } else { + // XXX indent + + cout << "waiting for recovery message" << (multirecover ? "s" : "") + << endl; + long long before_recv = current_time_millis(); + + vector<st_thread_t> recovery_builders; + ASSERT(seqno == -1); + for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { + recovery_builders.push_back(my_spawn(lambda() { + // Read the recovery message length and header. + tpcc_recovery_header hdr; + checkeqnneg(st_read_fully(__ref(replicas[i]), + &hdr, sizeof hdr, + ST_UTIME_NO_TIMEOUT), + ssize_t(sizeof hdr)); + check(hdr.seqno >= 0); + + cout << "receiving recovery of " << hdr.len << " bytes" << endl; + + long long start_time = current_time_millis(); + __ref(recarr).reset(new char[hdr.len], hdr.len); + checkeqnneg(st_read_fully(__ref(replicas[i]), + __ref(recarr).get(), hdr.len, + ST_UTIME_NO_TIMEOUT), + ssize_t(hdr.len)); + + long long before_deser = current_time_millis(); + showdatarate("received recovery message", size_t(hdr.len), before_deser - start_time); + + if (__ref(seqno) == -1) + __ref(seqno) = hdr.seqno; + else + checkeq(__ref(seqno), hdr.seqno); + + g_tables->deser(__ctx(i), __ref(init).node_size(), hdr, __ref(recarr)); + + long long end_time = current_time_millis(); + showdatarate("deserialized recovery message", size_t(hdr.len), end_time - before_deser); + cout << "receive & deserialize took " << end_time - __ref(before_recv) + << " ms total; now at seqno " << hdr.seqno << endl; + cout << "after deserialize, db state is now at seqno " + << hdr.seqno << ":" << endl; + g_tables->show(); + + }, "recovery_builder" + lexical_cast<string>(i))); + } + foreach (st_thread_t t, recovery_builders) { + st_join(t); + } + + } + } + + // + // Catch-up + // + + long long mid_time = current_time_millis(); + int mid_seqno = seqno; + TpccReq req; + while (!backlog.empty()) { + chunk chunk = backlog.take(); + cout << "took from backlog, now has " << backlog.queue().size() + << " chunks" << endl; + sized_array<char> &buf = chunk.get<0>(); + char *begin = chunk.get<1>(), *end = chunk.get<2>(); + ASSERT(buf.get() <= begin && begin < buf.end()); + ASSERT(buf.get() < end && end < buf.end()); + process_buf(begin, end, req, seqno); + } + showtput("replayer caught up; from backlog replayed", + current_time_millis(), mid_time, seqno, mid_seqno); +} Property changes on: ydb/trunk/src/rectpcc.clamp.lzz ___________________________________________________________________ Added: svn:mergeinfo + Deleted: ydb/trunk/src/rectpcc.lzz.clamp =================================================================== --- ydb/trunk/src/rectpcc.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) +++ ydb/trunk/src/rectpcc.lzz.clamp 2009-03-24 09:11:22 UTC (rev 1332) @@ -1,175 +0,0 @@ -#hdr -#include "tpcc.hh" -namespace ydb { namespace pb { class Init; } } -using namespace ydb::pb; -#end - -#src -#include "unsetprefs.h" -#include <commons/time.h> -#include <commons/st/io.h> -#include <commons/st/threads.h> -#include <commons/st/reader.h> -#include "tpcc/tpcctables.h" -#include "ydb.pb.h" -#include "setprefs.h" -#end - -void -rec_tpcc(int &seqno, int mypos, const Init &init, - const vector<st_netfd_t> &replicas, recovery_t &orig, - st_channel<chunk> &backlog) -{ - commons::array<char> recarr(0); - - function<void()> rec_twal_fn = lambda() { - int &seqno = __ref(seqno); - cout << "recovering from twal" << endl; - long long start_time = current_time_millis(); - g_twal->flush(); - sync(); - ifstream inf("twal"); - TpccReq req; - while (inf.peek() != ifstream::traits_type::eof()) { - ASSERT(inf.good()); - readmsg(inf, req); - process_tpcc(req, seqno, nullptr); - if (check_interval(seqno, yield_interval)) st_sleep(0); - } - showdatarate("recovered from twal", inf.tellg(), - current_time_millis() - start_time); - cout << "now at seqno " << seqno << endl; - }; - - function<void()> recv_log_fn = lambda() { - st_netfd_t src = __ref(replicas[0]); - int &seqno = __ref(seqno); - ASSERT(fail_seqno == seqno); - recreq r = { fail_seqno + 1, resume.take() }; - st_write(src, r); - sized_array<char> rbuf(new char[read_buf_size], read_buf_size); - function<void(anchored_stream_reader &reader)> overflow_fn = - lambda(anchored_stream_reader &reader) { - shift_reader(reader); - }; - anchored_stream_reader reader(st_read_fn(src), - st_read_fully_fn(src), - overflow_fn, rbuf.get(), rbuf.size()); - TpccReq req; - while (seqno < r.end_seqno) { - { st_intr intr(stop_hub); readmsg(reader, req); } - process_tpcc(req, seqno, nullptr); - reader.set_anchor(); - if (check_interval(seqno, yield_interval)) st_sleep(0); - } - }; - - if (rec_twal) { - failed.waitset(); - g_tables.reset(new TPCCTables); - tpcc_recovery_header &hdr = *reinterpret_cast<tpcc_recovery_header*>(orig.begin()); - commons::array<char> body(orig.begin() + sizeof(tpcc_recovery_header), - orig.size() - sizeof(tpcc_recovery_header)); - g_tables->deser(mypos, init.node_size(), hdr, body); - body.release(); - rec_twal_fn(); - failed.reset(); - recv_log_fn(); - } - -#if 0 - st_thread_t rec_twal_thread = my_spawn(rec_twal_fn, "rec_twal"); - st_thread_t recv_log_thread = my_spawn(recv_log_fn, "recv_log"); - - st_join(rec_twal_thread); - st_join(recv_log_thread); -#endif - - if (rec_pwal) { - // Recover from phy log. - } else if (rec_twal) { - // Recover from txn log. - } else { - - g_tables.reset(new TPCCTables); - - // - // Build-up - // - - if (ship_log) { - } else { - // XXX indent - - cout << "waiting for recovery message" << (multirecover ? "s" : "") - << endl; - long long before_recv = current_time_millis(); - - vector<st_thread_t> recovery_builders; - ASSERT(seqno == -1); - for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { - recovery_builders.push_back(my_spawn(lambda() { - // Read the recovery message length and header. - tpcc_recovery_header hdr; - checkeqnneg(st_read_fully(__ref(replicas[i]), - &hdr, sizeof hdr, - ST_UTIME_NO_TIMEOUT), - ssize_t(sizeof hdr)); - check(hdr.seqno >= 0); - - cout << "receiving recovery of " << hdr.len << " bytes" << endl; - - long long start_time = current_time_millis(); - __ref(recarr).reset(new char[hdr.len], hdr.len); - checkeqnneg(st_read_fully(__ref(replicas[i]), - __ref(recarr).get(), hdr.len, - ST_UTIME_NO_TIMEOUT), - ssize_t(hdr.len)); - - long long before_deser = current_time_millis(); - showdatarate("received recovery message", size_t(hdr.len), before_deser - start_time); - - if (__ref(seqno) == -1) - __ref(seqno) = hdr.seqno; - else - checkeq(__ref(seqno), hdr.seqno); - - g_tables->deser(__ctx(i), __ref(init).node_size(), hdr, __ref(recarr)); - - long long end_time = current_time_millis(); - showdatarate("deserialized recovery message", size_t(hdr.len), end_time - before_deser); - cout << "receive & deserialize took " << end_time - __ref(before_recv) - << " ms total; now at seqno " << hdr.seqno << endl; - cout << "after deserialize, db state is now at seqno " - << hdr.seqno << ":" << endl; - g_tables->show(); - - }, "recovery_builder" + lexical_cast<string>(i))); - } - foreach (st_thread_t t, recovery_builders) { - st_join(t); - } - - } - } - - // - // Catch-up - // - - long long mid_time = current_time_millis(); - int mid_seqno = seqno; - TpccReq req; - while (!backlog.empty()) { - chunk chunk = backlog.take(); - cout << "took from backlog, now has " << backlog.queue().size() - << " chunks" << endl; - sized_array<char> &buf = chunk.get<0>(); - char *begin = chunk.get<1>(), *end = chunk.get<2>(); - ASSERT(buf.get() <= begin && begin < buf.end()); - ASSERT(buf.get() < end && end < buf.end()); - process_buf(begin, end, req, seqno); - } - showtput("replayer caught up; from backlog replayed", - current_time_millis(), mid_time, seqno, mid_seqno); -} Copied: ydb/trunk/src/replica.clamp.lzz (from rev 1331, ydb/trunk/src/replica.lzz.clamp) =================================================================== --- ydb/trunk/src/replica.clamp.lzz (rev 0) +++ ydb/trunk/src/replica.clamp.lzz 2009-03-24 09:11:22 UTC (rev 1332) @@ -0,0 +1,362 @@ +#hdr +#include "unsetprefs.h" +#include <string> +#end + +#src +#include "unsetprefs.h" +#include <boost/archive/binary_iarchive.hpp> +#include <commons/st/sockets.h> +#include <commons/st/threads.h> +#include "tpcc/clock.h" +#include "tpcc/randomgenerator.h" +#include "tpcc/tpccclient.h" +#include "tpcc/tpccgenerator.h" +#include "tpcc/tpcctables.h" +#include "rectpcc.hh" +#include "run.hh" +#include "stxn.hh" +#include "tpcc.hh" +#end + +/** + * Run a replica. + */ +void +run_replica(std::string leader_host, uint16_t leader_port, uint16_t listen_port) +{ + if (disk) { + // Disk IO threads. + for (int i = 0; i < 5; ++i) { + //thread somethread(threadfunc); + } + } + + // Initialize database state. + int seqno = -1; + mii &map = g_map; + if (do_tpcc) { + TPCCTables *tables = new TPCCTables(); + g_tables.reset(tables); + SystemClock* clock = new SystemClock(); + + // Create a generator for filling the database. + RealRandomGenerator* random = new RealRandomGenerator(); + NURandC cLoad = NURandC::makeRandom(random); + random->setC(cLoad); + + // Generate the data + cout << "loading " << nwarehouses << " warehouses" << endl; + char now[Clock::DATETIME_SIZE+1]; + clock->getDateTimestamp(now); + TPCCGenerator generator(random, now, Item::NUM_ITEMS, + District::NUM_PER_WAREHOUSE, + Customer::NUM_PER_DISTRICT, + NewOrder::INITIAL_NUM_PER_DISTRICT); + long long start_time = current_time_millis(); + generator.makeItemsTable(tables); + for (int i = 0; i < nwarehouses; ++i) { + generator.makeWarehouse(tables, i+1); + } + cout << "loaded " << nwarehouses << " warehouses in " + << current_time_millis() - start_time << " ms" << endl; + tables->show(); + } + recovery_t orig = rec_twal ? g_tables->ser(0, 0, seqno) : recovery_t(); + + finally f(bind(summarize, "REPLICA", ref(seqno))); + st_channel<recovery_t> send_states; + + cout << "starting as replica on port " << listen_port << endl; + + // Listen for connections from other replicas. + st_netfd_t listener = st_tcp_listen(listen_port); + + // Connect to the leader and join the system. + st_netfd_t leader = st_tcp_connect(leader_host.c_str(), leader_port, + timeout); + st_closing closing(leader); + Join join; + join.set_port(listen_port); + sendmsg(leader, join); + Init init; + { + st_intr intr(stop_hub); + readmsg(leader, init); + } + uint32_t listen_host = init.yourhost(); + multirecover = init.multirecover(); + + // Display the info. + cout << "got init msg with txn seqno " << init.txnseqno() + << " and hosts:" << endl; + vector<st_netfd_t> replicas; + st_closing_all close_replicas(replicas); + int mypos = -1; + for (int i = 0; i < init.node_size(); ++i) { + const SockAddr &sa = init.node(i); + char buf[INET_ADDRSTRLEN]; + in_addr host = { sa.host() }; + bool is_self = sa.host() == listen_host && sa.port() == listen_port; + cout << "- " << checkerr(inet_ntop(AF_INET, &host, buf, + INET_ADDRSTRLEN)) + << ':' << sa.port() << (is_self ? " (self)" : "") << endl; + if (is_self) mypos = i; + if (!is_self && (init.txnseqno() > 0 || rec_twal)) { + replicas.push_back(st_tcp_connect(host, + static_cast<uint16_t>(sa.port()), + timeout)); + } + } + + // Initialize physical or txn log. + scoped_ptr<txn_wal> twal(new txn_wal(use_twal ? "twal" : "/dev/null")); + g_twal = twal.get(); + scoped_ptr<wal> pwal(new wal(use_pwal ? "pwal" : "/dev/null")); + g_wal = pwal.get(); + + // Process txns. + st_channel<chunk> backlog; + function<void()> process_fn; + if (do_tpcc) + process_fn = bind(process_tpccs, leader, ref(seqno), ref(send_states), + ref(backlog), init.txnseqno(), mypos, init.node_size()); + else + process_fn = bind(process_txns, leader, ref(map), ref(seqno), + ref(send_states), ref(backlog), init.txnseqno(), mypos, + init.node_size()); + st_joining join_proc(my_spawn(process_fn, "process_txns")); + st_joining join_rec(init.txnseqno() == 0 && (multirecover || mypos == 0) ? + my_spawn(bind(recover_joiner, listener, ref(send_states)), + "recover_joiner") : + nullptr); + + try { + // If there's anything to recover. + if (init.txnseqno() > 0 || fail_seqno > 0) { + if (do_tpcc) { + + rec_tpcc(seqno, mypos, init, replicas, orig, backlog); + + } else { + + // + // Simple txns + // + + if (rec_pwal) { + // Recover from physical log. + cout << "recovering from pwal" << endl; + long long start_time = current_time_millis(); + ifstream inf("pwal"); + binary_iarchive in(inf); + int rseqno = -1; + while (inf.peek() != ifstream::traits_type::eof()) { + int op; + in & op; + switch (op) { + case op_del: + { + int key; + in & key; + mii::iterator it = map.find(key); + map.erase(it); + break; + } + case op_write: + { + int key, val; + in & key & val; + map[key] = val; + break; + } + case op_commit: + ++rseqno; + break; + } + if (check_interval(rseqno, yield_interval)) st_sleep(0); + } + seqno = init.txnseqno() - 1; + showdatarate("recovered from pwal", inf.tellg(), current_time_millis() - start_time); + cout << "now at seqno " << rseqno << " (really: " << seqno << ")" << endl; + } else { + + // + // Build-up + // + + cout << "waiting for recovery message" << (multirecover ? "s" : "") + << endl; + long long before_recv = current_time_millis(); + + vector<st_thread_t> recovery_builders; + ASSERT(seqno == -1); + bool first = true; + for (int i = 0; i < (multirecover ? init.node_size() : 1); ++i) { + recovery_builders.push_back(my_spawn(lambda() { + // Read the recovery message length and header. + size_t len; + recovery_header hdr; + char buf[sizeof len + sizeof hdr]; + //try { + checkeqnneg(st_read_fully(__ref(replicas[i]), + buf, sizeof len + sizeof hdr, + ST_UTIME_NO_TIMEOUT), + ssize_t(sizeof len + sizeof hdr)); + //} catch (...) { // TODO just catch "Connection reset by peer" + //return; + //} + raw_reader rdr(buf); + rdr.read(len); + rdr.read(hdr); + check(hdr.seqno >= 0); + + // Resize the table if necessary. + commons::array<entry> &table = __ref(map).get_table(); + if (!__ref(first)) { + checkeq(table.size(), hdr.total); + checkeq(__ref(map).size(), hdr.size); + } else { + __ref(first) = false; + __ref(map).set_size(hdr.size); + if (table.size() != hdr.total) { + table.reset(new entry[hdr.total], hdr.total); + } + } + + // Receive straight into the table. + pair<size_t, size_t> range = + recovery_range(table.size(), __ctx(i), __ref(init).node_size()); + // Check that we agree on the number of entries. + checkeq(range.second - range.first, hdr.count); + // Check that the count is a power of two. + checkeq(hdr.count & (hdr.count - 1), size_t(0)); + size_t rangelen = sizeof(entry) * hdr.count; + // Read an extra char to ensure that we're at the EOF. + long long start_time = current_time_millis(); + checkeqnneg(st_read_fully(__ref(replicas[i]), + table.begin() + range.first, rangelen + 1, + ST_UTIME_NO_TIMEOUT), + ssize_t(rangelen)); + long long end_time = current_time_millis(); + + if (__ref(seqno) != -1) + checkeq(__ref(seqno), hdr.seqno); + __ref(seqno) = hdr.seqno; + showdatarate("got recovery message", len, end_time - start_time); + cout << "receive took " << end_time - __ref(before_recv) + << " ms total; now at seqno " << hdr.seqno << endl; +#if 0 + Recovery recovery; + long long receive_start = 0, receive_end = 0; + size_t len = 0; + { + st_intr intr(stop_hub); + len = readmsg(__ref(replicas)[__ctx(i)], recovery, &receive_start, + &receive_end); + } + long long build_start = current_time_millis(); + cout << "got recovery message of " << len << " bytes in " + << build_start - __ref(before_recv) << " ms: xfer took " + << receive_end - receive_start << " ms, deserialization took " + << build_start - receive_end << " ms" << endl; + for (int i = 0; i < recovery.pair_size(); ++i) { + const Recovery_Pair &p = recovery.pair(i); + __ref(map)[p.key()] = p.value(); + if (i % yield_interval == 0) { + if (yield_during_build_up) st_sleep(0); + } + } + check(recovery.seqno() >= 0); + int seqno = __ref(seqno) = recovery.seqno(); + long long build_end = current_time_millis(); + cout << "receive and build-up took " + << build_end - __ref(before_recv) + << " ms; built up map of " << recovery.pair_size() + << " records in " << build_end - build_start + << " ms; now at seqno " << seqno << endl; +#endif + }, "recovery_builder" + lexical_cast<string>(i))); + } + foreach (st_thread_t t, recovery_builders) { + st_join(t); + } + } + + // + // Catch-up + // + + long long mid_time = current_time_millis(); + int mid_seqno = seqno; + // XXX + using msg::TxnBatch; + using msg::Txn; + commons::array<char> rbuf(0), wbuf(buf_size); + reader reader(nullptr, rbuf.get(), rbuf.size()); + writer writer(lambda(const void*, size_t) { + throw not_supported_exception("should not be writing responses during catch-up phase"); + }, wbuf.get(), wbuf.size()); + stream s(reader, writer); + TxnBatch batch(s); + while (!backlog.empty()) { + chunk chunk = backlog.take(); + sized_array<char> &buf = chunk.get<0>(); + ASSERT(buf.get() <= chunk.get<1>() && chunk.get<1>() < buf.end()); + ASSERT(buf.get() < chunk.get<2>() && chunk.get<2>() < buf.end()); + ASSERT(chunk.get<1>() < chunk.get<2>()); + swap(buf, reader.buf()); + reader.reset_range(chunk.get<1>(), chunk.get<2>()); + while (reader.start() < reader.end()) { + char *start = reader.start(); + uint32_t prefix = ntohl(reader.read<uint32_t>()); + ASSERT(prefix < 10000); + ASSERT(start + sizeof(uint32_t) + prefix <= reader.end()); + batch.Clear(); + for (int t = 0; t < batch.txn_size(); ++t) { + const Txn &txn = batch.txn(t); + if (rec_pwal) seqno = txn.seqno() - 1; + process_txn(map, txn, seqno); + if (fake_exec && !use_pb) { + reader.skip(txn.op_size() * Op_Size); + } + + if (check_interval(txn.seqno(), yield_interval)) st_sleep(0); + if (check_interval(txn.seqno(), process_display)) { + cout << "caught up txn " << txn.seqno() + << "; db size = " << map.size() + << "; seqno = " << seqno + << "; backlog.size = " << backlog.queue().size() << endl; + } + } + ASSERT(start + sizeof(uint32_t) + prefix == reader.start()); + } + } + g_caught_up = true; +#if 0 + while (!backlog.empty()) { + using pb::Txn; + shared_ptr<Txn> p = backlog.take(); + process_txn<pb_traits, pb_traits>(map, *p, seqno, nullptr); + if (check_interval(p->seqno(), catch_up_display)) { + cout << "processed txn " << p->seqno() << " off the backlog; " + << "backlog.size = " << backlog.queue().size() << endl; + } + if (check_interval(p->seqno(), yield_interval)) { + // Explicitly yield. (Note that yielding does still effectively + // happen anyway because process_txn is a yield point.) + st_sleep(0); + } + } +#endif + showtput("replayer caught up; from backlog replayed", + current_time_millis(), mid_time, seqno, mid_seqno); + } + } + } catch (std::exception &ex) { + cerr_thread_ex(ex) << endl; + throw; + } + + stop_hub.insert(st_thread_self()); +} Property changes on: ydb/trunk/src/replica.clamp.lzz ___________________________________________________________________ Added: svn:mergeinfo + Deleted: ydb/trunk/src/replica.lzz.clamp =================================================================== --- ydb/trunk/src/replica.lzz.clamp 2009-03-24 07:54:47 UTC (rev 1331) +++ ydb/trunk/src/replica.lzz.clamp 2009-03-24 09:11:22 UTC (rev 1332) @@ -1,362 +0,0 @@ -#hdr -#include "unsetprefs.h" -#include <string> -#end - -#src -#include "unsetprefs.h" -#include <boost/archive/binary_iarchive.hpp> -#include <commons/st/sockets.h> -#include <commons/st/threads.h> -#include "tpcc/clock.h" -#include "tpcc/randomgenerator.h" -#include "tpcc/tpccclient.h" -#include "tpcc/tpccgenerator.h" -#include "tpcc/tpcctables.h" -#include "rectpcc.hh" -#include "run.hh" -#include "stxn.hh" -#include "tpcc.hh" -#end - -/** - * Run a replica. - */ -void -run_replica(std::string leader_host, uint16_t leader_port, uint16_t listen_port) -{ - if (disk) { - // Disk IO threads. - ... [truncated message content] |