commit 9b495ce0d5e710198d4f062fc13fd5aab4ca6b45 Author: fan4w Date: Mon Jun 3 14:51:43 2024 +0800 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2101505 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +*.*/ +main/mr-tmp/ +mrtmp.* +824-mrinput-*.txt +/main/diff.out +/mapreduce/x.txt +/pbservice/x.txt +/kvpaxos/x.txt +*.so +/main/mrcoordinator +/main/mrsequential +/main/mrworker diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6e00474 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module 6.5840 + +go 1.15 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e69de29 diff --git a/kvraft/client.go b/kvraft/client.go new file mode 100644 index 0000000..c3efa00 --- /dev/null +++ b/kvraft/client.go @@ -0,0 +1,60 @@ +package kvraft + +import "6.5840/labrpc" +import "crypto/rand" +import "math/big" + + +type Clerk struct { + servers []*labrpc.ClientEnd + // You will have to modify this struct. +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.servers = servers + // You'll have to add code here. + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + + // You will have to modify this function. + return "" +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) { + // You will have to modify this function. +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} +func (ck *Clerk) Append(key string, value string) { + ck.PutAppend(key, value, "Append") +} diff --git a/kvraft/common.go b/kvraft/common.go new file mode 100644 index 0000000..bca7ed1 --- /dev/null +++ b/kvraft/common.go @@ -0,0 +1,32 @@ +package kvraft + +const ( + OK = "OK" + ErrNoKey = "ErrNoKey" + ErrWrongLeader = "ErrWrongLeader" +) + +type Err string + +// Put or Append +type PutAppendArgs struct { + Key string + Value string + // You'll have to add definitions here. + // Field names must start with capital letters, + // otherwise RPC will break. +} + +type PutAppendReply struct { + Err Err +} + +type GetArgs struct { + Key string + // You'll have to add definitions here. +} + +type GetReply struct { + Err Err + Value string +} diff --git a/kvraft/config.go b/kvraft/config.go new file mode 100644 index 0000000..1c62b67 --- /dev/null +++ b/kvraft/config.go @@ -0,0 +1,425 @@ +package kvraft + +import "6.5840/labrpc" +import "testing" +import "os" + +// import "log" +import crand "crypto/rand" +import "math/big" +import "math/rand" +import "encoding/base64" +import "sync" +import "runtime" +import "6.5840/raft" +import "fmt" +import "time" +import "sync/atomic" + +func randstring(n int) string { + b := make([]byte, 2*n) + crand.Read(b) + s := base64.URLEncoding.EncodeToString(b) + return s[0:n] +} + +func makeSeed() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := crand.Int(crand.Reader, max) + x := bigx.Int64() + return x +} + +// Randomize server handles +func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { + sa := make([]*labrpc.ClientEnd, len(kvh)) + copy(sa, kvh) + for i := range sa { + j := rand.Intn(i + 1) + sa[i], sa[j] = sa[j], sa[i] + } + return sa +} + +type config struct { + mu sync.Mutex + t *testing.T + net *labrpc.Network + n int + kvservers []*KVServer + saved []*raft.Persister + endnames [][]string // names of each server's sending ClientEnds + clerks map[*Clerk][]string + nextClientId int + maxraftstate int + start time.Time // time at which make_config() was called + // begin()/end() statistics + t0 time.Time // time at which test_test.go called cfg.begin() + rpcs0 int // rpcTotal() at start of test + ops int32 // number of clerk get/put/append method calls +} + +func (cfg *config) checkTimeout() { + // enforce a two minute real-time limit on each test + if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { + cfg.t.Fatal("test took longer than 120 seconds") + } +} + +func (cfg *config) cleanup() { + cfg.mu.Lock() + defer cfg.mu.Unlock() + for i := 0; i < len(cfg.kvservers); i++ { + if cfg.kvservers[i] != nil { + cfg.kvservers[i].Kill() + } + } + cfg.net.Cleanup() + cfg.checkTimeout() +} + +// Maximum log size across all servers +func (cfg *config) LogSize() int { + logsize := 0 + for i := 0; i < cfg.n; i++ { + n := cfg.saved[i].RaftStateSize() + if n > logsize { + logsize = n + } + } + return logsize +} + +// Maximum snapshot size across all servers +func (cfg *config) SnapshotSize() int { + snapshotsize := 0 + for i := 0; i < cfg.n; i++ { + n := cfg.saved[i].SnapshotSize() + if n > snapshotsize { + snapshotsize = n + } + } + return snapshotsize +} + +// attach server i to servers listed in to +// caller must hold cfg.mu +func (cfg *config) connectUnlocked(i int, to []int) { + // log.Printf("connect peer %d to %v\n", i, to) + + // outgoing socket files + for j := 0; j < len(to); j++ { + endname := cfg.endnames[i][to[j]] + cfg.net.Enable(endname, true) + } + + // incoming socket files + for j := 0; j < len(to); j++ { + endname := cfg.endnames[to[j]][i] + cfg.net.Enable(endname, true) + } +} + +func (cfg *config) connect(i int, to []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.connectUnlocked(i, to) +} + +// detach server i from the servers listed in from +// caller must hold cfg.mu +func (cfg *config) disconnectUnlocked(i int, from []int) { + // log.Printf("disconnect peer %d from %v\n", i, from) + + // outgoing socket files + for j := 0; j < len(from); j++ { + if cfg.endnames[i] != nil { + endname := cfg.endnames[i][from[j]] + cfg.net.Enable(endname, false) + } + } + + // incoming socket files + for j := 0; j < len(from); j++ { + if cfg.endnames[j] != nil { + endname := cfg.endnames[from[j]][i] + cfg.net.Enable(endname, false) + } + } +} + +func (cfg *config) disconnect(i int, from []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.disconnectUnlocked(i, from) +} + +func (cfg *config) All() []int { + all := make([]int, cfg.n) + for i := 0; i < cfg.n; i++ { + all[i] = i + } + return all +} + +func (cfg *config) ConnectAll() { + cfg.mu.Lock() + defer cfg.mu.Unlock() + for i := 0; i < cfg.n; i++ { + cfg.connectUnlocked(i, cfg.All()) + } +} + +// Sets up 2 partitions with connectivity between servers in each partition. +func (cfg *config) partition(p1 []int, p2 []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + // log.Printf("partition servers into: %v %v\n", p1, p2) + for i := 0; i < len(p1); i++ { + cfg.disconnectUnlocked(p1[i], p2) + cfg.connectUnlocked(p1[i], p1) + } + for i := 0; i < len(p2); i++ { + cfg.disconnectUnlocked(p2[i], p1) + cfg.connectUnlocked(p2[i], p2) + } +} + +// Create a clerk with clerk specific server names. +// Give it connections to all of the servers, but for +// now enable only connections to servers in to[]. +func (cfg *config) makeClient(to []int) *Clerk { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + // a fresh set of ClientEnds. + ends := make([]*labrpc.ClientEnd, cfg.n) + endnames := make([]string, cfg.n) + for j := 0; j < cfg.n; j++ { + endnames[j] = randstring(20) + ends[j] = cfg.net.MakeEnd(endnames[j]) + cfg.net.Connect(endnames[j], j) + } + + ck := MakeClerk(random_handles(ends)) + cfg.clerks[ck] = endnames + cfg.nextClientId++ + cfg.ConnectClientUnlocked(ck, to) + return ck +} + +func (cfg *config) deleteClient(ck *Clerk) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + v := cfg.clerks[ck] + for i := 0; i < len(v); i++ { + os.Remove(v[i]) + } + delete(cfg.clerks, ck) +} + +// caller should hold cfg.mu +func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { + // log.Printf("ConnectClient %v to %v\n", ck, to) + endnames := cfg.clerks[ck] + for j := 0; j < len(to); j++ { + s := endnames[to[j]] + cfg.net.Enable(s, true) + } +} + +func (cfg *config) ConnectClient(ck *Clerk, to []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.ConnectClientUnlocked(ck, to) +} + +// caller should hold cfg.mu +func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { + // log.Printf("DisconnectClient %v from %v\n", ck, from) + endnames := cfg.clerks[ck] + for j := 0; j < len(from); j++ { + s := endnames[from[j]] + cfg.net.Enable(s, false) + } +} + +func (cfg *config) DisconnectClient(ck *Clerk, from []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.DisconnectClientUnlocked(ck, from) +} + +// Shutdown a server by isolating it +func (cfg *config) ShutdownServer(i int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + cfg.disconnectUnlocked(i, cfg.All()) + + // disable client connections to the server. + // it's important to do this before creating + // the new Persister in saved[i], to avoid + // the possibility of the server returning a + // positive reply to an Append but persisting + // the result in the superseded Persister. + cfg.net.DeleteServer(i) + + // a fresh persister, in case old instance + // continues to update the Persister. + // but copy old persister's content so that we always + // pass Make() the last persisted state. + if cfg.saved[i] != nil { + cfg.saved[i] = cfg.saved[i].Copy() + } + + kv := cfg.kvservers[i] + if kv != nil { + cfg.mu.Unlock() + kv.Kill() + cfg.mu.Lock() + cfg.kvservers[i] = nil + } +} + +// If restart servers, first call ShutdownServer +func (cfg *config) StartServer(i int) { + cfg.mu.Lock() + + // a fresh set of outgoing ClientEnd names. + cfg.endnames[i] = make([]string, cfg.n) + for j := 0; j < cfg.n; j++ { + cfg.endnames[i][j] = randstring(20) + } + + // a fresh set of ClientEnds. + ends := make([]*labrpc.ClientEnd, cfg.n) + for j := 0; j < cfg.n; j++ { + ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) + cfg.net.Connect(cfg.endnames[i][j], j) + } + + // a fresh persister, so old instance doesn't overwrite + // new instance's persisted state. + // give the fresh persister a copy of the old persister's + // state, so that the spec is that we pass StartKVServer() + // the last persisted state. + if cfg.saved[i] != nil { + cfg.saved[i] = cfg.saved[i].Copy() + } else { + cfg.saved[i] = raft.MakePersister() + } + cfg.mu.Unlock() + + cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate) + + kvsvc := labrpc.MakeService(cfg.kvservers[i]) + rfsvc := labrpc.MakeService(cfg.kvservers[i].rf) + srv := labrpc.MakeServer() + srv.AddService(kvsvc) + srv.AddService(rfsvc) + cfg.net.AddServer(i, srv) +} + +func (cfg *config) Leader() (bool, int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + for i := 0; i < cfg.n; i++ { + _, is_leader := cfg.kvservers[i].rf.GetState() + if is_leader { + return true, i + } + } + return false, 0 +} + +// Partition servers into 2 groups and put current leader in minority +func (cfg *config) make_partition() ([]int, []int) { + _, l := cfg.Leader() + p1 := make([]int, cfg.n/2+1) + p2 := make([]int, cfg.n/2) + j := 0 + for i := 0; i < cfg.n; i++ { + if i != l { + if j < len(p1) { + p1[j] = i + } else { + p2[j-len(p1)] = i + } + j++ + } + } + p2[len(p2)-1] = l + return p1, p2 +} + +var ncpu_once sync.Once + +func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config { + ncpu_once.Do(func() { + if runtime.NumCPU() < 2 { + fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") + } + rand.Seed(makeSeed()) + }) + runtime.GOMAXPROCS(4) + cfg := &config{} + cfg.t = t + cfg.net = labrpc.MakeNetwork() + cfg.n = n + cfg.kvservers = make([]*KVServer, cfg.n) + cfg.saved = make([]*raft.Persister, cfg.n) + cfg.endnames = make([][]string, cfg.n) + cfg.clerks = make(map[*Clerk][]string) + cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid + cfg.maxraftstate = maxraftstate + cfg.start = time.Now() + + // create a full set of KV servers. + for i := 0; i < cfg.n; i++ { + cfg.StartServer(i) + } + + cfg.ConnectAll() + + cfg.net.Reliable(!unreliable) + + return cfg +} + +func (cfg *config) rpcTotal() int { + return cfg.net.GetTotalCount() +} + +// start a Test. +// print the Test message. +// e.g. cfg.begin("Test (2B): RPC counts aren't too high") +func (cfg *config) begin(description string) { + fmt.Printf("%s ...\n", description) + cfg.t0 = time.Now() + cfg.rpcs0 = cfg.rpcTotal() + atomic.StoreInt32(&cfg.ops, 0) +} + +func (cfg *config) op() { + atomic.AddInt32(&cfg.ops, 1) +} + +// end a Test -- the fact that we got here means there +// was no failure. +// print the Passed message, +// and some performance numbers. +func (cfg *config) end() { + cfg.checkTimeout() + if cfg.t.Failed() == false { + t := time.Since(cfg.t0).Seconds() // real time + npeers := cfg.n // number of Raft peers + nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends + ops := atomic.LoadInt32(&cfg.ops) // number of clerk get/put/append calls + + fmt.Printf(" ... Passed --") + fmt.Printf(" %4.1f %d %5d %4d\n", t, npeers, nrpc, ops) + } +} diff --git a/kvraft/server.go b/kvraft/server.go new file mode 100644 index 0000000..dc770f5 --- /dev/null +++ b/kvraft/server.go @@ -0,0 +1,101 @@ +package kvraft + +import ( + "6.5840/labgob" + "6.5840/labrpc" + "6.5840/raft" + "log" + "sync" + "sync/atomic" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + + +type Op struct { + // Your definitions here. + // Field names must start with capital letters, + // otherwise RPC will break. +} + +type KVServer struct { + mu sync.Mutex + me int + rf *raft.Raft + applyCh chan raft.ApplyMsg + dead int32 // set by Kill() + + maxraftstate int // snapshot if log grows this big + + // Your definitions here. +} + + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + // Your code here. +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + // Your code here. +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + // Your code here. +} + +// the tester calls Kill() when a KVServer instance won't +// be needed again. for your convenience, we supply +// code to set rf.dead (without needing a lock), +// and a killed() method to test rf.dead in +// long-running loops. you can also add your own +// code to Kill(). you're not required to do anything +// about this, but it may be convenient (for example) +// to suppress debug output from a Kill()ed instance. +func (kv *KVServer) Kill() { + atomic.StoreInt32(&kv.dead, 1) + kv.rf.Kill() + // Your code here, if desired. +} + +func (kv *KVServer) killed() bool { + z := atomic.LoadInt32(&kv.dead) + return z == 1 +} + +// servers[] contains the ports of the set of +// servers that will cooperate via Raft to +// form the fault-tolerant key/value service. +// me is the index of the current server in servers[]. +// the k/v server should store snapshots through the underlying Raft +// implementation, which should call persister.SaveStateAndSnapshot() to +// atomically save the Raft state along with the snapshot. +// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, +// in order to allow Raft to garbage-collect its log. if maxraftstate is -1, +// you don't need to snapshot. +// StartKVServer() must return quickly, so it should start goroutines +// for any long-running work. +func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { + // call labgob.Register on structures you want + // Go's RPC library to marshall/unmarshall. + labgob.Register(Op{}) + + kv := new(KVServer) + kv.me = me + kv.maxraftstate = maxraftstate + + // You may need initialization code here. + + kv.applyCh = make(chan raft.ApplyMsg) + kv.rf = raft.Make(servers, me, persister, kv.applyCh) + + // You may need initialization code here. + + return kv +} diff --git a/kvraft/test_test.go b/kvraft/test_test.go new file mode 100644 index 0000000..e6fc996 --- /dev/null +++ b/kvraft/test_test.go @@ -0,0 +1,720 @@ +package kvraft + +import "6.5840/porcupine" +import "6.5840/models" +import "testing" +import "strconv" +import "time" +import "math/rand" +import "strings" +import "sync" +import "sync/atomic" +import "fmt" +import "io/ioutil" + +// The tester generously allows solutions to complete elections in one second +// (much more than the paper's range of timeouts). +const electionTimeout = 1 * time.Second + +const linearizabilityCheckTimeout = 1 * time.Second + +type OpLog struct { + operations []porcupine.Operation + sync.Mutex +} + +func (log *OpLog) Append(op porcupine.Operation) { + log.Lock() + defer log.Unlock() + log.operations = append(log.operations, op) +} + +func (log *OpLog) Read() []porcupine.Operation { + log.Lock() + defer log.Unlock() + ops := make([]porcupine.Operation, len(log.operations)) + copy(ops, log.operations) + return ops +} + +// to make sure timestamps use the monotonic clock, instead of computing +// absolute timestamps with `time.Now().UnixNano()` (which uses the wall +// clock), we measure time relative to `t0` using `time.Since(t0)`, which uses +// the monotonic clock +var t0 = time.Now() + +// get/put/putappend that keep counts +func Get(cfg *config, ck *Clerk, key string, log *OpLog, cli int) string { + start := int64(time.Since(t0)) + v := ck.Get(key) + end := int64(time.Since(t0)) + cfg.op() + if log != nil { + log.Append(porcupine.Operation{ + Input: models.KvInput{Op: 0, Key: key}, + Output: models.KvOutput{Value: v}, + Call: start, + Return: end, + ClientId: cli, + }) + } + + return v +} + +func Put(cfg *config, ck *Clerk, key string, value string, log *OpLog, cli int) { + start := int64(time.Since(t0)) + ck.Put(key, value) + end := int64(time.Since(t0)) + cfg.op() + if log != nil { + log.Append(porcupine.Operation{ + Input: models.KvInput{Op: 1, Key: key, Value: value}, + Output: models.KvOutput{}, + Call: start, + Return: end, + ClientId: cli, + }) + } +} + +func Append(cfg *config, ck *Clerk, key string, value string, log *OpLog, cli int) { + start := int64(time.Since(t0)) + ck.Append(key, value) + end := int64(time.Since(t0)) + cfg.op() + if log != nil { + log.Append(porcupine.Operation{ + Input: models.KvInput{Op: 2, Key: key, Value: value}, + Output: models.KvOutput{}, + Call: start, + Return: end, + ClientId: cli, + }) + } +} + +func check(cfg *config, t *testing.T, ck *Clerk, key string, value string) { + v := Get(cfg, ck, key, nil, -1) + if v != value { + t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v) + } +} + +// a client runs the function f and then signals it is done +func run_client(t *testing.T, cfg *config, me int, ca chan bool, fn func(me int, ck *Clerk, t *testing.T)) { + ok := false + defer func() { ca <- ok }() + ck := cfg.makeClient(cfg.All()) + fn(me, ck, t) + ok = true + cfg.deleteClient(ck) +} + +// spawn ncli clients and wait until they are all done +func spawn_clients_and_wait(t *testing.T, cfg *config, ncli int, fn func(me int, ck *Clerk, t *testing.T)) { + ca := make([]chan bool, ncli) + for cli := 0; cli < ncli; cli++ { + ca[cli] = make(chan bool) + go run_client(t, cfg, cli, ca[cli], fn) + } + // log.Printf("spawn_clients_and_wait: waiting for clients") + for cli := 0; cli < ncli; cli++ { + ok := <-ca[cli] + // log.Printf("spawn_clients_and_wait: client %d is done\n", cli) + if ok == false { + t.Fatalf("failure") + } + } +} + +// predict effect of Append(k, val) if old value is prev. +func NextValue(prev string, val string) string { + return prev + val +} + +// check that for a specific client all known appends are present in a value, +// and in order +func checkClntAppends(t *testing.T, clnt int, v string, count int) { + lastoff := -1 + for j := 0; j < count; j++ { + wanted := "x " + strconv.Itoa(clnt) + " " + strconv.Itoa(j) + " y" + off := strings.Index(v, wanted) + if off < 0 { + t.Fatalf("%v missing element %v in Append result %v", clnt, wanted, v) + } + off1 := strings.LastIndex(v, wanted) + if off1 != off { + t.Fatalf("duplicate element %v in Append result", wanted) + } + if off <= lastoff { + t.Fatalf("wrong order for element %v in Append result", wanted) + } + lastoff = off + } +} + +// check that all known appends are present in a value, +// and are in order for each concurrent client. +func checkConcurrentAppends(t *testing.T, v string, counts []int) { + nclients := len(counts) + for i := 0; i < nclients; i++ { + lastoff := -1 + for j := 0; j < counts[i]; j++ { + wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y" + off := strings.Index(v, wanted) + if off < 0 { + t.Fatalf("%v missing element %v in Append result %v", i, wanted, v) + } + off1 := strings.LastIndex(v, wanted) + if off1 != off { + t.Fatalf("duplicate element %v in Append result", wanted) + } + if off <= lastoff { + t.Fatalf("wrong order for element %v in Append result", wanted) + } + lastoff = off + } + } +} + +// repartition the servers periodically +func partitioner(t *testing.T, cfg *config, ch chan bool, done *int32) { + defer func() { ch <- true }() + for atomic.LoadInt32(done) == 0 { + a := make([]int, cfg.n) + for i := 0; i < cfg.n; i++ { + a[i] = (rand.Int() % 2) + } + pa := make([][]int, 2) + for i := 0; i < 2; i++ { + pa[i] = make([]int, 0) + for j := 0; j < cfg.n; j++ { + if a[j] == i { + pa[i] = append(pa[i], j) + } + } + } + cfg.partition(pa[0], pa[1]) + time.Sleep(electionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond) + } +} + +// Basic test is as follows: one or more clients submitting Append/Get +// operations to set of servers for some period of time. After the period is +// over, test checks that all appended values are present and in order for a +// particular key. If unreliable is set, RPCs may fail. If crash is set, the +// servers crash after the period is over and restart. If partitions is set, +// the test repartitions the network concurrently with the clients and servers. If +// maxraftstate is a positive number, the size of the state for Raft (i.e., log +// size) shouldn't exceed 8*maxraftstate. If maxraftstate is negative, +// snapshots shouldn't be used. +func GenericTest(t *testing.T, part string, nclients int, nservers int, unreliable bool, crash bool, partitions bool, maxraftstate int, randomkeys bool) { + + title := "Test: " + if unreliable { + // the network drops RPC requests and replies. + title = title + "unreliable net, " + } + if crash { + // peers re-start, and thus persistence must work. + title = title + "restarts, " + } + if partitions { + // the network may partition + title = title + "partitions, " + } + if maxraftstate != -1 { + title = title + "snapshots, " + } + if randomkeys { + title = title + "random keys, " + } + if nclients > 1 { + title = title + "many clients" + } else { + title = title + "one client" + } + title = title + " (" + part + ")" // 4A or 4B + + cfg := make_config(t, nservers, unreliable, maxraftstate) + defer cfg.cleanup() + + cfg.begin(title) + opLog := &OpLog{} + + ck := cfg.makeClient(cfg.All()) + + done_partitioner := int32(0) + done_clients := int32(0) + ch_partitioner := make(chan bool) + clnts := make([]chan int, nclients) + for i := 0; i < nclients; i++ { + clnts[i] = make(chan int) + } + for i := 0; i < 3; i++ { + // log.Printf("Iteration %v\n", i) + atomic.StoreInt32(&done_clients, 0) + atomic.StoreInt32(&done_partitioner, 0) + go spawn_clients_and_wait(t, cfg, nclients, func(cli int, myck *Clerk, t *testing.T) { + j := 0 + defer func() { + clnts[cli] <- j + }() + last := "" // only used when not randomkeys + if !randomkeys { + Put(cfg, myck, strconv.Itoa(cli), last, opLog, cli) + } + for atomic.LoadInt32(&done_clients) == 0 { + var key string + if randomkeys { + key = strconv.Itoa(rand.Intn(nclients)) + } else { + key = strconv.Itoa(cli) + } + nv := "x " + strconv.Itoa(cli) + " " + strconv.Itoa(j) + " y" + if (rand.Int() % 1000) < 500 { + // log.Printf("%d: client new append %v\n", cli, nv) + Append(cfg, myck, key, nv, opLog, cli) + if !randomkeys { + last = NextValue(last, nv) + } + j++ + } else if randomkeys && (rand.Int()%1000) < 100 { + // we only do this when using random keys, because it would break the + // check done after Get() operations + Put(cfg, myck, key, nv, opLog, cli) + j++ + } else { + // log.Printf("%d: client new get %v\n", cli, key) + v := Get(cfg, myck, key, opLog, cli) + // the following check only makes sense when we're not using random keys + if !randomkeys && v != last { + t.Fatalf("get wrong value, key %v, wanted:\n%v\n, got\n%v\n", key, last, v) + } + } + } + }) + + if partitions { + // Allow the clients to perform some operations without interruption + time.Sleep(1 * time.Second) + go partitioner(t, cfg, ch_partitioner, &done_partitioner) + } + time.Sleep(5 * time.Second) + + atomic.StoreInt32(&done_clients, 1) // tell clients to quit + atomic.StoreInt32(&done_partitioner, 1) // tell partitioner to quit + + if partitions { + // log.Printf("wait for partitioner\n") + <-ch_partitioner + // reconnect network and submit a request. A client may + // have submitted a request in a minority. That request + // won't return until that server discovers a new term + // has started. + cfg.ConnectAll() + // wait for a while so that we have a new term + time.Sleep(electionTimeout) + } + + if crash { + // log.Printf("shutdown servers\n") + for i := 0; i < nservers; i++ { + cfg.ShutdownServer(i) + } + // Wait for a while for servers to shutdown, since + // shutdown isn't a real crash and isn't instantaneous + time.Sleep(electionTimeout) + // log.Printf("restart servers\n") + // crash and re-start all + for i := 0; i < nservers; i++ { + cfg.StartServer(i) + } + cfg.ConnectAll() + } + + // log.Printf("wait for clients\n") + for i := 0; i < nclients; i++ { + // log.Printf("read from clients %d\n", i) + j := <-clnts[i] + // if j < 10 { + // log.Printf("Warning: client %d managed to perform only %d put operations in 1 sec?\n", i, j) + // } + key := strconv.Itoa(i) + // log.Printf("Check %v for client %d\n", j, i) + v := Get(cfg, ck, key, opLog, 0) + if !randomkeys { + checkClntAppends(t, i, v, j) + } + } + + if maxraftstate > 0 { + // Check maximum after the servers have processed all client + // requests and had time to checkpoint. + sz := cfg.LogSize() + if sz > 8*maxraftstate { + t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, maxraftstate) + } + } + if maxraftstate < 0 { + // Check that snapshots are not used + ssz := cfg.SnapshotSize() + if ssz > 0 { + t.Fatalf("snapshot too large (%v), should not be used when maxraftstate = %d", ssz, maxraftstate) + } + } + } + + res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), linearizabilityCheckTimeout) + if res == porcupine.Illegal { + file, err := ioutil.TempFile("", "*.html") + if err != nil { + fmt.Printf("info: failed to create temp file for visualization") + } else { + err = porcupine.Visualize(models.KvModel, info, file) + if err != nil { + fmt.Printf("info: failed to write history visualization to %s\n", file.Name()) + } else { + fmt.Printf("info: wrote history visualization to %s\n", file.Name()) + } + } + t.Fatal("history is not linearizable") + } else if res == porcupine.Unknown { + fmt.Println("info: linearizability check timed out, assuming history is ok") + } + + cfg.end() +} + +// Check that ops are committed fast enough, better than 1 per heartbeat interval +func GenericTestSpeed(t *testing.T, part string, maxraftstate int) { + const nservers = 3 + const numOps = 1000 + cfg := make_config(t, nservers, false, maxraftstate) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.All()) + + cfg.begin(fmt.Sprintf("Test: ops complete fast enough (%s)", part)) + + // wait until first op completes, so we know a leader is elected + // and KV servers are ready to process client requests + ck.Get("x") + + start := time.Now() + for i := 0; i < numOps; i++ { + ck.Append("x", "x 0 "+strconv.Itoa(i)+" y") + } + dur := time.Since(start) + + v := ck.Get("x") + checkClntAppends(t, 0, v, numOps) + + // heartbeat interval should be ~ 100 ms; require at least 3 ops per + const heartbeatInterval = 100 * time.Millisecond + const opsPerInterval = 3 + const timePerOp = heartbeatInterval / opsPerInterval + if dur > numOps*timePerOp { + t.Fatalf("Operations completed too slowly %v/op > %v/op\n", dur/numOps, timePerOp) + } + + cfg.end() +} + +func TestBasic4A(t *testing.T) { + // Test: one client (4A) ... + GenericTest(t, "4A", 1, 5, false, false, false, -1, false) +} + +func TestSpeed4A(t *testing.T) { + GenericTestSpeed(t, "4A", -1) +} + +func TestConcurrent4A(t *testing.T) { + // Test: many clients (4A) ... + GenericTest(t, "4A", 5, 5, false, false, false, -1, false) +} + +func TestUnreliable4A(t *testing.T) { + // Test: unreliable net, many clients (4A) ... + GenericTest(t, "4A", 5, 5, true, false, false, -1, false) +} + +func TestUnreliableOneKey4A(t *testing.T) { + const nservers = 3 + cfg := make_config(t, nservers, true, -1) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.All()) + + cfg.begin("Test: concurrent append to same key, unreliable (4A)") + + Put(cfg, ck, "k", "", nil, -1) + + const nclient = 5 + const upto = 10 + spawn_clients_and_wait(t, cfg, nclient, func(me int, myck *Clerk, t *testing.T) { + n := 0 + for n < upto { + Append(cfg, myck, "k", "x "+strconv.Itoa(me)+" "+strconv.Itoa(n)+" y", nil, -1) + n++ + } + }) + + var counts []int + for i := 0; i < nclient; i++ { + counts = append(counts, upto) + } + + vx := Get(cfg, ck, "k", nil, -1) + checkConcurrentAppends(t, vx, counts) + + cfg.end() +} + +// Submit a request in the minority partition and check that the requests +// doesn't go through until the partition heals. The leader in the original +// network ends up in the minority partition. +func TestOnePartition4A(t *testing.T) { + const nservers = 5 + cfg := make_config(t, nservers, false, -1) + defer cfg.cleanup() + ck := cfg.makeClient(cfg.All()) + + Put(cfg, ck, "1", "13", nil, -1) + + cfg.begin("Test: progress in majority (4A)") + + p1, p2 := cfg.make_partition() + cfg.partition(p1, p2) + + ckp1 := cfg.makeClient(p1) // connect ckp1 to p1 + ckp2a := cfg.makeClient(p2) // connect ckp2a to p2 + ckp2b := cfg.makeClient(p2) // connect ckp2b to p2 + + Put(cfg, ckp1, "1", "14", nil, -1) + check(cfg, t, ckp1, "1", "14") + + cfg.end() + + done0 := make(chan bool) + done1 := make(chan bool) + + cfg.begin("Test: no progress in minority (4A)") + go func() { + Put(cfg, ckp2a, "1", "15", nil, -1) + done0 <- true + }() + go func() { + Get(cfg, ckp2b, "1", nil, -1) // different clerk in p2 + done1 <- true + }() + + select { + case <-done0: + t.Fatalf("Put in minority completed") + case <-done1: + t.Fatalf("Get in minority completed") + case <-time.After(time.Second): + } + + check(cfg, t, ckp1, "1", "14") + Put(cfg, ckp1, "1", "16", nil, -1) + check(cfg, t, ckp1, "1", "16") + + cfg.end() + + cfg.begin("Test: completion after heal (4A)") + + cfg.ConnectAll() + cfg.ConnectClient(ckp2a, cfg.All()) + cfg.ConnectClient(ckp2b, cfg.All()) + + time.Sleep(electionTimeout) + + select { + case <-done0: + case <-time.After(30 * 100 * time.Millisecond): + t.Fatalf("Put did not complete") + } + + select { + case <-done1: + case <-time.After(30 * 100 * time.Millisecond): + t.Fatalf("Get did not complete") + default: + } + + check(cfg, t, ck, "1", "15") + + cfg.end() +} + +func TestManyPartitionsOneClient4A(t *testing.T) { + // Test: partitions, one client (4A) ... + GenericTest(t, "4A", 1, 5, false, false, true, -1, false) +} + +func TestManyPartitionsManyClients4A(t *testing.T) { + // Test: partitions, many clients (4A) ... + GenericTest(t, "4A", 5, 5, false, false, true, -1, false) +} + +func TestPersistOneClient4A(t *testing.T) { + // Test: restarts, one client (4A) ... + GenericTest(t, "4A", 1, 5, false, true, false, -1, false) +} + +func TestPersistConcurrent4A(t *testing.T) { + // Test: restarts, many clients (4A) ... + GenericTest(t, "4A", 5, 5, false, true, false, -1, false) +} + +func TestPersistConcurrentUnreliable4A(t *testing.T) { + // Test: unreliable net, restarts, many clients (4A) ... + GenericTest(t, "4A", 5, 5, true, true, false, -1, false) +} + +func TestPersistPartition4A(t *testing.T) { + // Test: restarts, partitions, many clients (4A) ... + GenericTest(t, "4A", 5, 5, false, true, true, -1, false) +} + +func TestPersistPartitionUnreliable4A(t *testing.T) { + // Test: unreliable net, restarts, partitions, many clients (4A) ... + GenericTest(t, "4A", 5, 5, true, true, true, -1, false) +} + +func TestPersistPartitionUnreliableLinearizable4A(t *testing.T) { + // Test: unreliable net, restarts, partitions, random keys, many clients (4A) ... + GenericTest(t, "4A", 15, 7, true, true, true, -1, true) +} + +// if one server falls behind, then rejoins, does it +// recover by using the InstallSnapshot RPC? +// also checks that majority discards committed log entries +// even if minority doesn't respond. +func TestSnapshotRPC4B(t *testing.T) { + const nservers = 3 + maxraftstate := 1000 + cfg := make_config(t, nservers, false, maxraftstate) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.All()) + + cfg.begin("Test: InstallSnapshot RPC (4B)") + + Put(cfg, ck, "a", "A", nil, -1) + check(cfg, t, ck, "a", "A") + + // a bunch of puts into the majority partition. + cfg.partition([]int{0, 1}, []int{2}) + { + ck1 := cfg.makeClient([]int{0, 1}) + for i := 0; i < 50; i++ { + Put(cfg, ck1, strconv.Itoa(i), strconv.Itoa(i), nil, -1) + } + time.Sleep(electionTimeout) + Put(cfg, ck1, "b", "B", nil, -1) + } + + // check that the majority partition has thrown away + // most of its log entries. + sz := cfg.LogSize() + if sz > 8*maxraftstate { + t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, maxraftstate) + } + + // now make group that requires participation of + // lagging server, so that it has to catch up. + cfg.partition([]int{0, 2}, []int{1}) + { + ck1 := cfg.makeClient([]int{0, 2}) + Put(cfg, ck1, "c", "C", nil, -1) + Put(cfg, ck1, "d", "D", nil, -1) + check(cfg, t, ck1, "a", "A") + check(cfg, t, ck1, "b", "B") + check(cfg, t, ck1, "1", "1") + check(cfg, t, ck1, "49", "49") + } + + // now everybody + cfg.partition([]int{0, 1, 2}, []int{}) + + Put(cfg, ck, "e", "E", nil, -1) + check(cfg, t, ck, "c", "C") + check(cfg, t, ck, "e", "E") + check(cfg, t, ck, "1", "1") + + cfg.end() +} + +// are the snapshots not too huge? 500 bytes is a generous bound for the +// operations we're doing here. +func TestSnapshotSize4B(t *testing.T) { + const nservers = 3 + maxraftstate := 1000 + maxsnapshotstate := 500 + cfg := make_config(t, nservers, false, maxraftstate) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.All()) + + cfg.begin("Test: snapshot size is reasonable (4B)") + + for i := 0; i < 200; i++ { + Put(cfg, ck, "x", "0", nil, -1) + check(cfg, t, ck, "x", "0") + Put(cfg, ck, "x", "1", nil, -1) + check(cfg, t, ck, "x", "1") + } + + // check that servers have thrown away most of their log entries + sz := cfg.LogSize() + if sz > 8*maxraftstate { + t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, maxraftstate) + } + + // check that the snapshots are not unreasonably large + ssz := cfg.SnapshotSize() + if ssz > maxsnapshotstate { + t.Fatalf("snapshot too large (%v > %v)", ssz, maxsnapshotstate) + } + + cfg.end() +} + +func TestSpeed4B(t *testing.T) { + GenericTestSpeed(t, "4B", 1000) +} + +func TestSnapshotRecover4B(t *testing.T) { + // Test: restarts, snapshots, one client (4B) ... + GenericTest(t, "4B", 1, 5, false, true, false, 1000, false) +} + +func TestSnapshotRecoverManyClients4B(t *testing.T) { + // Test: restarts, snapshots, many clients (4B) ... + GenericTest(t, "4B", 20, 5, false, true, false, 1000, false) +} + +func TestSnapshotUnreliable4B(t *testing.T) { + // Test: unreliable net, snapshots, many clients (4B) ... + GenericTest(t, "4B", 5, 5, true, false, false, 1000, false) +} + +func TestSnapshotUnreliableRecover4B(t *testing.T) { + // Test: unreliable net, restarts, snapshots, many clients (4B) ... + GenericTest(t, "4B", 5, 5, true, true, false, 1000, false) +} + +func TestSnapshotUnreliableRecoverConcurrentPartition4B(t *testing.T) { + // Test: unreliable net, restarts, partitions, snapshots, many clients (4B) ... + GenericTest(t, "4B", 5, 5, true, true, true, 1000, false) +} + +func TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4B(t *testing.T) { + // Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ... + GenericTest(t, "4B", 15, 7, true, true, true, 1000, true) +} diff --git a/kvsrv/client.go b/kvsrv/client.go new file mode 100644 index 0000000..9e11c27 --- /dev/null +++ b/kvsrv/client.go @@ -0,0 +1,63 @@ +package kvsrv + +import "6.5840/labrpc" +import "crypto/rand" +import "math/big" + + +type Clerk struct { + server *labrpc.ClientEnd + // You will have to modify this struct. +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(server *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.server = server + // You'll have to add code here. + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + + // You will have to modify this function. + return "" +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) string { + // You will have to modify this function. + return "" +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} + +// Append value to key's value and return that value +func (ck *Clerk) Append(key string, value string) string { + return ck.PutAppend(key, value, "Append") +} diff --git a/kvsrv/common.go b/kvsrv/common.go new file mode 100644 index 0000000..8081d93 --- /dev/null +++ b/kvsrv/common.go @@ -0,0 +1,23 @@ +package kvsrv + +// Put or Append +type PutAppendArgs struct { + Key string + Value string + // You'll have to add definitions here. + // Field names must start with capital letters, + // otherwise RPC will break. +} + +type PutAppendReply struct { + Value string +} + +type GetArgs struct { + Key string + // You'll have to add definitions here. +} + +type GetReply struct { + Value string +} diff --git a/kvsrv/config.go b/kvsrv/config.go new file mode 100644 index 0000000..6e82f5e --- /dev/null +++ b/kvsrv/config.go @@ -0,0 +1,170 @@ +package kvsrv + +import "6.5840/labrpc" +import "testing" +import "os" + +//import "log" +import crand "crypto/rand" +import "math/big" +import "math/rand" +import "encoding/base64" +import "sync" +import "runtime" +import "fmt" +import "time" +import "sync/atomic" + +const SERVERID = 0 + +func randstring(n int) string { + b := make([]byte, 2*n) + crand.Read(b) + s := base64.URLEncoding.EncodeToString(b) + return s[0:n] +} + +func makeSeed() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := crand.Int(crand.Reader, max) + x := bigx.Int64() + return x +} + +type config struct { + mu sync.Mutex + t *testing.T + net *labrpc.Network + kvserver *KVServer + endname string // name of the server's sending ClientEnd + clerks map[*Clerk]string + nextClientId int + start time.Time // time at which make_config() was called + // begin()/end() statistics + t0 time.Time // time at which test_test.go called cfg.begin() + rpcs0 int // rpcTotal() at start of test + ops int32 // number of clerk get/put/append method calls +} + +func (cfg *config) checkTimeout() { + // enforce a two minute real-time limit on each test + if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { + cfg.t.Fatal("test took longer than 120 seconds") + } +} + +func (cfg *config) cleanup() { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.net.Cleanup() + cfg.checkTimeout() +} + +// Create a clerk with clerk specific server name. +func (cfg *config) makeClient() *Clerk { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + // a fresh ClientEnds + endname := randstring(20) + end := cfg.net.MakeEnd(endname) + cfg.net.Connect(endname, SERVERID) + + ck := MakeClerk(end) + cfg.clerks[ck] = endname + cfg.nextClientId++ + cfg.ConnectClientUnlocked(ck) + return ck +} + +func (cfg *config) deleteClient(ck *Clerk) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + v := cfg.clerks[ck] + for i := 0; i < len(v); i++ { + os.Remove(v) + } + cfg.net.DeleteEnd(v) + delete(cfg.clerks, ck) +} + +// caller should hold cfg.mu +func (cfg *config) ConnectClientUnlocked(ck *Clerk) { + //log.Printf("ConnectClient %v\n", ck) + endname := cfg.clerks[ck] + cfg.net.Enable(endname, true) +} + +func (cfg *config) ConnectClient(ck *Clerk) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.ConnectClientUnlocked(ck) +} + +func (cfg *config) StartServer() { + cfg.kvserver = StartKVServer() + + kvsvc := labrpc.MakeService(cfg.kvserver) + srv := labrpc.MakeServer() + srv.AddService(kvsvc) + cfg.net.AddServer(0, srv) +} + +var ncpu_once sync.Once + +func make_config(t *testing.T, unreliable bool) *config { + ncpu_once.Do(func() { + if runtime.NumCPU() < 2 { + fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") + } + rand.Seed(makeSeed()) + }) + runtime.GOMAXPROCS(4) + cfg := &config{} + cfg.t = t + cfg.net = labrpc.MakeNetwork() + cfg.clerks = make(map[*Clerk]string) + cfg.nextClientId = SERVERID + 1 + cfg.start = time.Now() + + cfg.StartServer() + + cfg.net.Reliable(!unreliable) + + return cfg +} + +func (cfg *config) rpcTotal() int { + return cfg.net.GetTotalCount() +} + +// start a Test. +// print the Test message. +// e.g. cfg.begin("Test (2B): RPC counts aren't too high") +func (cfg *config) begin(description string) { + fmt.Printf("%s ...\n", description) + cfg.t0 = time.Now() + cfg.rpcs0 = cfg.rpcTotal() + atomic.StoreInt32(&cfg.ops, 0) +} + +func (cfg *config) op() { + atomic.AddInt32(&cfg.ops, 1) +} + +// end a Test -- the fact that we got here means there +// was no failure. +// print the Passed message, +// and some performance numbers. +func (cfg *config) end() { + cfg.checkTimeout() + if cfg.t.Failed() == false { + t := time.Since(cfg.t0).Seconds() // real time + nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends + ops := atomic.LoadInt32(&cfg.ops) // number of clerk get/put/append calls + + fmt.Printf(" ... Passed --") + fmt.Printf(" t %4.1f nrpc %5d ops %4d\n", t, nrpc, ops) + } +} diff --git a/kvsrv/server.go b/kvsrv/server.go new file mode 100644 index 0000000..6d0841e --- /dev/null +++ b/kvsrv/server.go @@ -0,0 +1,43 @@ +package kvsrv + +import ( + "log" + "sync" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + + +type KVServer struct { + mu sync.Mutex + + // Your definitions here. +} + + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + // Your code here. +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + // Your code here. +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + // Your code here. +} + +func StartKVServer() *KVServer { + kv := new(KVServer) + + // You may need initialization code here. + + return kv +} diff --git a/kvsrv/test_test.go b/kvsrv/test_test.go new file mode 100644 index 0000000..b46304c --- /dev/null +++ b/kvsrv/test_test.go @@ -0,0 +1,606 @@ +package kvsrv + +import ( + "6.5840/models" + "6.5840/porcupine" + + "fmt" + "io/ioutil" + "log" + "math/rand" + "runtime" + "strconv" + "strings" + "sync" + "sync/atomic" + "testing" + "time" +) + +const linearizabilityCheckTimeout = 1 * time.Second + +type OpLog struct { + operations []porcupine.Operation + sync.Mutex +} + +func (log *OpLog) Append(op porcupine.Operation) { + log.Lock() + defer log.Unlock() + log.operations = append(log.operations, op) +} + +func (log *OpLog) Read() []porcupine.Operation { + log.Lock() + defer log.Unlock() + ops := make([]porcupine.Operation, len(log.operations)) + copy(ops, log.operations) + return ops +} + +// to make sure timestamps use the monotonic clock, instead of computing +// absolute timestamps with `time.Now().UnixNano()` (which uses the wall +// clock), we measure time relative to `t0` using `time.Since(t0)`, which uses +// the monotonic clock +var t0 = time.Now() + +// get/put/putappend that keep counts +func Get(cfg *config, ck *Clerk, key string, log *OpLog, cli int) string { + start := int64(time.Since(t0)) + v := ck.Get(key) + end := int64(time.Since(t0)) + cfg.op() + if log != nil { + log.Append(porcupine.Operation{ + Input: models.KvInput{Op: 0, Key: key}, + Output: models.KvOutput{Value: v}, + Call: start, + Return: end, + ClientId: cli, + }) + } + + return v +} + +func Put(cfg *config, ck *Clerk, key string, value string, log *OpLog, cli int) { + start := int64(time.Since(t0)) + ck.Put(key, value) + end := int64(time.Since(t0)) + cfg.op() + if log != nil { + log.Append(porcupine.Operation{ + Input: models.KvInput{Op: 1, Key: key, Value: value}, + Output: models.KvOutput{}, + Call: start, + Return: end, + ClientId: cli, + }) + } +} + +func Append(cfg *config, ck *Clerk, key string, value string, log *OpLog, cli int) string { + start := int64(time.Since(t0)) + last := ck.Append(key, value) + end := int64(time.Since(t0)) + cfg.op() + if log != nil { + log.Append(porcupine.Operation{ + Input: models.KvInput{Op: 3, Key: key, Value: value}, + Output: models.KvOutput{Value: last}, + Call: start, + Return: end, + ClientId: cli, + }) + } + return last +} + +// a client runs the function f and then signals it is done +func run_client(t *testing.T, cfg *config, me int, ca chan bool, fn func(me int, ck *Clerk, t *testing.T)) { + ok := false + defer func() { ca <- ok }() + ck := cfg.makeClient() + fn(me, ck, t) + ok = true + cfg.deleteClient(ck) +} + +// spawn ncli clients and wait until they are all done +func spawn_clients_and_wait(t *testing.T, cfg *config, ncli int, fn func(me int, ck *Clerk, t *testing.T)) { + ca := make([]chan bool, ncli) + for cli := 0; cli < ncli; cli++ { + ca[cli] = make(chan bool) + go run_client(t, cfg, cli, ca[cli], fn) + } + //log.Printf("spawn_clients_and_wait: waiting for clients") + for cli := 0; cli < ncli; cli++ { + ok := <-ca[cli] + //log.Printf("spawn_clients_and_wait: client %d is done\n", cli) + if ok == false { + t.Fatalf("failure") + } + } +} + +// predict effect of Append(k, val) if old value is prev. +func NextValue(prev string, val string) string { + return prev + val +} + +// check that for a specific client all known appends are present in a value, +// and in order +func checkClntAppends(t *testing.T, clnt int, v string, count int) { + lastoff := -1 + for j := 0; j < count; j++ { + wanted := "x " + strconv.Itoa(clnt) + " " + strconv.Itoa(j) + " y" + off := strings.Index(v, wanted) + if off < 0 { + t.Fatalf("%v missing element %v in Append result %v", clnt, wanted, v) + } + off1 := strings.LastIndex(v, wanted) + if off1 != off { + t.Fatalf("duplicate element %v in Append result", wanted) + } + if off <= lastoff { + t.Fatalf("wrong order for element %v in Append result", wanted) + } + lastoff = off + } +} + +// check that all known appends are present in a value, +// and are in order for each concurrent client. +func checkConcurrentAppends(t *testing.T, v string, counts []int) { + nclients := len(counts) + for i := 0; i < nclients; i++ { + lastoff := -1 + for j := 0; j < counts[i]; j++ { + wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y" + off := strings.Index(v, wanted) + if off < 0 { + t.Fatalf("%v missing element %v in Append result %v", i, wanted, v) + } + off1 := strings.LastIndex(v, wanted) + if off1 != off { + t.Fatalf("duplicate element %v in Append result", wanted) + } + if off <= lastoff { + t.Fatalf("wrong order for element %v in Append result", wanted) + } + lastoff = off + } + } +} + +// is ov in nv? +func inHistory(ov, nv string) bool { + return strings.Index(nv, ov) != -1 +} + +func randValue(n int) string { + const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + + b := make([]byte, n) + for i := range b { + b[i] = letterBytes[rand.Int63()%int64(len(letterBytes))] + } + return string(b) +} + +// Basic test is as follows: one or more clients submitting Append/Get +// operations to the server for some period of time. After the period +// is over, test checks that all appended values are present and in +// order for a particular key. If unreliable is set, RPCs may fail. +func GenericTest(t *testing.T, nclients int, unreliable bool, randomkeys bool) { + const ( + NITER = 3 + TIME = 1 + ) + + title := "Test: " + if unreliable { + // the network drops RPC requests and replies. + title = title + "unreliable net, " + } + if randomkeys { + title = title + "random keys, " + } + if nclients > 1 { + title = title + "many clients" + } else { + title = title + "one client" + } + cfg := make_config(t, unreliable) + defer cfg.cleanup() + + cfg.begin(title) + opLog := &OpLog{} + + ck := cfg.makeClient() + + done_clients := int32(0) + clnts := make([]chan int, nclients) + for i := 0; i < nclients; i++ { + clnts[i] = make(chan int) + } + for i := 0; i < NITER; i++ { + //log.Printf("Iteration %v\n", i) + atomic.StoreInt32(&done_clients, 0) + go spawn_clients_and_wait(t, cfg, nclients, func(cli int, myck *Clerk, t *testing.T) { + j := 0 + defer func() { + clnts[cli] <- j + }() + last := "" // only used when not randomkeys + if !randomkeys { + Put(cfg, myck, strconv.Itoa(cli), last, opLog, cli) + } + for atomic.LoadInt32(&done_clients) == 0 { + var key string + if randomkeys { + key = strconv.Itoa(rand.Intn(nclients)) + } else { + key = strconv.Itoa(cli) + } + nv := "x " + strconv.Itoa(cli) + " " + strconv.Itoa(j) + " y" + if (rand.Int() % 1000) < 500 { + //log.Printf("%d: client new append %v\n", cli, nv) + l := Append(cfg, myck, key, nv, opLog, cli) + if !randomkeys { + if j > 0 { + o := "x " + strconv.Itoa(cli) + " " + strconv.Itoa(j-1) + " y" + if !inHistory(o, l) { + t.Fatalf("error: old %v not in return\n%v\n", o, l) + } + } + if inHistory(nv, l) { + t.Fatalf("error: new value %v in returned values\n%v\n", nv, l) + } + last = NextValue(last, nv) + } + j++ + } else if randomkeys && (rand.Int()%1000) < 100 { + // we only do this when using random keys, because it would break the + // check done after Get() operations + Put(cfg, myck, key, nv, opLog, cli) + j++ + } else { + //log.Printf("%d: client new get %v\n", cli, key) + v := Get(cfg, myck, key, opLog, cli) + // the following check only makes sense when we're not using random keys + if !randomkeys && v != last { + t.Fatalf("get wrong value, key %v, wanted:\n%v\n, got\n%v\n", key, last, v) + } + } + } + }) + + time.Sleep(TIME * time.Second) + + atomic.StoreInt32(&done_clients, 1) // tell clients to quit + + for i := 0; i < nclients; i++ { + j := <-clnts[i] + // if j < 10 { + // log.Printf("Warning: client %d managed to perform only %d put operations in 1 sec?\n", i, j) + // } + key := strconv.Itoa(i) + //log.Printf("Check %v for client %d\n", j, i) + v := Get(cfg, ck, key, opLog, 0) + if !randomkeys { + checkClntAppends(t, i, v, j) + } + } + + } + + res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), linearizabilityCheckTimeout) + if res == porcupine.Illegal { + file, err := ioutil.TempFile("", "*.html") + if err != nil { + fmt.Printf("info: failed to create temp file for visualization") + } else { + err = porcupine.Visualize(models.KvModel, info, file) + if err != nil { + fmt.Printf("info: failed to write history visualization to %s\n", file.Name()) + } else { + fmt.Printf("info: wrote history visualization to %s\n", file.Name()) + } + } + t.Fatal("history is not linearizable") + } else if res == porcupine.Unknown { + fmt.Println("info: linearizability check timed out, assuming history is ok") + } + + cfg.end() +} + +// Test one client +func TestBasic2(t *testing.T) { + GenericTest(t, 1, false, false) +} + +// Test many clients +func TestConcurrent2(t *testing.T) { + GenericTest(t, 5, false, false) +} + +// Test: unreliable net, many clients +func TestUnreliable2(t *testing.T) { + GenericTest(t, 5, true, false) +} + +// Test: unreliable net, many clients, one key +func TestUnreliableOneKey2(t *testing.T) { + cfg := make_config(t, true) + defer cfg.cleanup() + + ck := cfg.makeClient() + + cfg.begin("Test: concurrent append to same key, unreliable") + + Put(cfg, ck, "k", "", nil, -1) + + const nclient = 5 + const upto = 10 + spawn_clients_and_wait(t, cfg, nclient, func(me int, myck *Clerk, t *testing.T) { + n := 0 + for n < upto { + nv := "x " + strconv.Itoa(me) + " " + strconv.Itoa(n) + " y" + ov := Append(cfg, myck, "k", nv, nil, -1) + n++ + // log.Printf("%d: append nv %v ov %v\n", me, nv, ov) + if inHistory(nv, ov) { + t.Fatalf("error: nv %v in returned values\n%v\n", nv, ov) + } + } + }) + + var counts []int + for i := 0; i < nclient; i++ { + counts = append(counts, upto) + } + + vx := Get(cfg, ck, "k", nil, -1) + checkConcurrentAppends(t, vx, counts) + + cfg.end() +} + +const ( + MiB = 1 << 20 +) + +func TestMemGet2(t *testing.T) { + const MEM = 10 // in MiB + + cfg := make_config(t, true) + defer cfg.cleanup() + + ck0 := cfg.makeClient() + ck1 := cfg.makeClient() + + cfg.begin("Test: memory use get") + + rdVal := randValue(MiB * MEM) + ck0.Put("k", rdVal) + + if v := ck0.Get("k"); len(v) != len(rdVal) { + t.Fatalf("error: incorrect len %d\n", len(v)) + } + if v := ck1.Get("k"); len(v) != len(rdVal) { + t.Fatalf("error: incorrect len %d\n", len(v)) + } + + ck0.Put("k", "0") + + runtime.GC() + var st runtime.MemStats + + runtime.ReadMemStats(&st) + m := st.HeapAlloc / MiB + if m >= MEM { + t.Fatalf("error: server using too much memory %d\n", m) + } + + cfg.end() +} + +func TestMemPut2(t *testing.T) { + const MEM = 10 // in MiB + + cfg := make_config(t, false) + defer cfg.cleanup() + + cfg.begin("Test: memory use put") + + ck0 := cfg.makeClient() + ck1 := cfg.makeClient() + + rdVal := randValue(MiB * MEM) + ck0.Put("k", rdVal) + ck1.Put("k", "") + + runtime.GC() + + var st runtime.MemStats + runtime.ReadMemStats(&st) + m := st.HeapAlloc / MiB + if m >= MEM { + t.Fatalf("error: server using too much memory %d\n", m) + } + cfg.end() +} + +func TestMemAppend2(t *testing.T) { + const MEM = 10 // in MiB + + cfg := make_config(t, false) + defer cfg.cleanup() + + cfg.begin("Test: memory use append") + + ck0 := cfg.makeClient() + ck1 := cfg.makeClient() + + rdVal0 := randValue(MiB * MEM) + ck0.Append("k", rdVal0) + rdVal1 := randValue(MiB * MEM) + ck1.Append("k", rdVal1) + + runtime.GC() + var st runtime.MemStats + runtime.ReadMemStats(&st) + m := st.HeapAlloc / MiB + if m > 3*MEM { + t.Fatalf("error: server using too much memory %d\n", m) + } + cfg.end() +} + +func TestMemPutManyClients(t *testing.T) { + const ( + NCLIENT = 100_000 + MEM = 1000 + ) + + cfg := make_config(t, false) + defer cfg.cleanup() + + v := randValue(MEM) + + cks := make([]*Clerk, NCLIENT) + for i, _ := range cks { + cks[i] = cfg.makeClient() + } + + // allow threads started by labrpc to start + time.Sleep(1 * time.Second) + + cfg.begin("Test: memory use many put clients") + + runtime.GC() + runtime.GC() + + var st runtime.MemStats + runtime.ReadMemStats(&st) + m0 := st.HeapAlloc + + for i := 0; i < NCLIENT; i++ { + cks[i].Put("k", v) + } + + runtime.GC() + time.Sleep(1 * time.Second) + runtime.GC() + + runtime.ReadMemStats(&st) + m1 := st.HeapAlloc + f := (float64(m1) - float64(m0)) / NCLIENT + if m1 > m0+(NCLIENT*200) { + t.Fatalf("error: server using too much memory %d %d (%.2f per client)\n", m0, m1, f) + } + + for _, ck := range cks { + cfg.deleteClient(ck) + } + + cfg.end() +} + +func TestMemGetManyClients(t *testing.T) { + const ( + NCLIENT = 100_000 + ) + + cfg := make_config(t, false) + defer cfg.cleanup() + + cfg.begin("Test: memory use many get client") + + ck := cfg.makeClient() + ck.Put("0", "") + cfg.deleteClient(ck) + + cks := make([]*Clerk, NCLIENT) + for i, _ := range cks { + cks[i] = cfg.makeClient() + } + + // allow threads started by labrpc to start + time.Sleep(1 * time.Second) + + runtime.GC() + runtime.GC() + + var st runtime.MemStats + runtime.ReadMemStats(&st) + m0 := st.HeapAlloc + + for i := 0; i < NCLIENT; i++ { + cks[i].Get("0") + } + + runtime.GC() + + time.Sleep(1 * time.Second) + + runtime.GC() + + runtime.ReadMemStats(&st) + m1 := st.HeapAlloc + f := (float64(m1) - float64(m0)) / NCLIENT + if m1 >= m0+NCLIENT*10 { + t.Fatalf("error: server using too much memory m0 %d m1 %d (%.2f per client)\n", m0, m1, f) + } + + for _, ck := range cks { + cfg.deleteClient(ck) + } + + cfg.end() +} + +func TestMemManyAppends(t *testing.T) { + const ( + N = 1000 + MEM = 1000 + ) + + cfg := make_config(t, false) + defer cfg.cleanup() + + cfg.begin("Test: memory use many appends") + + ck := cfg.makeClient() + rdVal := randValue(MEM) + + runtime.GC() + runtime.GC() + + var st runtime.MemStats + runtime.ReadMemStats(&st) + m0 := st.HeapAlloc + + for i := 0; i < N; i++ { + ck.Append("k", rdVal) + } + + runtime.GC() + + time.Sleep(1 * time.Second) + + runtime.GC() + + runtime.ReadMemStats(&st) + m1 := st.HeapAlloc + if m1 >= 3*MEM*N { + t.Fatalf("error: server using too much memory m0 %d m1 %d\n", m0, m1) + } + + log.Printf("m0 %d m1 %d\n", m0, m1) + + cfg.deleteClient(ck) + cfg.end() +} diff --git a/labgob/labgob.go b/labgob/labgob.go new file mode 100644 index 0000000..22cb91a --- /dev/null +++ b/labgob/labgob.go @@ -0,0 +1,177 @@ +package labgob + +// +// trying to send non-capitalized fields over RPC produces a range of +// misbehavior, including both mysterious incorrect computation and +// outright crashes. so this wrapper around Go's encoding/gob warns +// about non-capitalized field names. +// + +import "encoding/gob" +import "io" +import "reflect" +import "fmt" +import "sync" +import "unicode" +import "unicode/utf8" + +var mu sync.Mutex +var errorCount int // for TestCapital +var checked map[reflect.Type]bool + +type LabEncoder struct { + gob *gob.Encoder +} + +func NewEncoder(w io.Writer) *LabEncoder { + enc := &LabEncoder{} + enc.gob = gob.NewEncoder(w) + return enc +} + +func (enc *LabEncoder) Encode(e interface{}) error { + checkValue(e) + return enc.gob.Encode(e) +} + +func (enc *LabEncoder) EncodeValue(value reflect.Value) error { + checkValue(value.Interface()) + return enc.gob.EncodeValue(value) +} + +type LabDecoder struct { + gob *gob.Decoder +} + +func NewDecoder(r io.Reader) *LabDecoder { + dec := &LabDecoder{} + dec.gob = gob.NewDecoder(r) + return dec +} + +func (dec *LabDecoder) Decode(e interface{}) error { + checkValue(e) + checkDefault(e) + return dec.gob.Decode(e) +} + +func Register(value interface{}) { + checkValue(value) + gob.Register(value) +} + +func RegisterName(name string, value interface{}) { + checkValue(value) + gob.RegisterName(name, value) +} + +func checkValue(value interface{}) { + checkType(reflect.TypeOf(value)) +} + +func checkType(t reflect.Type) { + k := t.Kind() + + mu.Lock() + // only complain once, and avoid recursion. + if checked == nil { + checked = map[reflect.Type]bool{} + } + if checked[t] { + mu.Unlock() + return + } + checked[t] = true + mu.Unlock() + + switch k { + case reflect.Struct: + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + rune, _ := utf8.DecodeRuneInString(f.Name) + if unicode.IsUpper(rune) == false { + // ta da + fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n", + f.Name, t.Name()) + mu.Lock() + errorCount += 1 + mu.Unlock() + } + checkType(f.Type) + } + return + case reflect.Slice, reflect.Array, reflect.Ptr: + checkType(t.Elem()) + return + case reflect.Map: + checkType(t.Elem()) + checkType(t.Key()) + return + default: + return + } +} + +// +// warn if the value contains non-default values, +// as it would if one sent an RPC but the reply +// struct was already modified. if the RPC reply +// contains default values, GOB won't overwrite +// the non-default value. +// +func checkDefault(value interface{}) { + if value == nil { + return + } + checkDefault1(reflect.ValueOf(value), 1, "") +} + +func checkDefault1(value reflect.Value, depth int, name string) { + if depth > 3 { + return + } + + t := value.Type() + k := t.Kind() + + switch k { + case reflect.Struct: + for i := 0; i < t.NumField(); i++ { + vv := value.Field(i) + name1 := t.Field(i).Name + if name != "" { + name1 = name + "." + name1 + } + checkDefault1(vv, depth+1, name1) + } + return + case reflect.Ptr: + if value.IsNil() { + return + } + checkDefault1(value.Elem(), depth+1, name) + return + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Uintptr, reflect.Float32, reflect.Float64, + reflect.String: + if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false { + mu.Lock() + if errorCount < 1 { + what := name + if what == "" { + what = t.Name() + } + // this warning typically arises if code re-uses the same RPC reply + // variable for multiple RPC calls, or if code restores persisted + // state into variable that already have non-default values. + fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n", + what) + } + errorCount += 1 + mu.Unlock() + } + return + } +} diff --git a/labgob/test_test.go b/labgob/test_test.go new file mode 100644 index 0000000..3bf2b9e --- /dev/null +++ b/labgob/test_test.go @@ -0,0 +1,166 @@ +package labgob + +import "testing" + +import "bytes" + +type T1 struct { + T1int0 int + T1int1 int + T1string0 string + T1string1 string +} + +type T2 struct { + T2slice []T1 + T2map map[int]*T1 + T2t3 interface{} +} + +type T3 struct { + T3int999 int +} + +// test that we didn't break GOB. +func TestGOB(t *testing.T) { + e0 := errorCount + + w := new(bytes.Buffer) + + Register(T3{}) + + { + x0 := 0 + x1 := 1 + t1 := T1{} + t1.T1int1 = 1 + t1.T1string1 = "6.5840" + t2 := T2{} + t2.T2slice = []T1{T1{}, t1} + t2.T2map = map[int]*T1{} + t2.T2map[99] = &T1{1, 2, "x", "y"} + t2.T2t3 = T3{999} + + e := NewEncoder(w) + e.Encode(x0) + e.Encode(x1) + e.Encode(t1) + e.Encode(t2) + } + data := w.Bytes() + + { + var x0 int + var x1 int + var t1 T1 + var t2 T2 + + r := bytes.NewBuffer(data) + d := NewDecoder(r) + if d.Decode(&x0) != nil || + d.Decode(&x1) != nil || + d.Decode(&t1) != nil || + d.Decode(&t2) != nil { + t.Fatalf("Decode failed") + } + + if x0 != 0 { + t.Fatalf("wrong x0 %v\n", x0) + } + if x1 != 1 { + t.Fatalf("wrong x1 %v\n", x1) + } + if t1.T1int0 != 0 { + t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0) + } + if t1.T1int1 != 1 { + t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1) + } + if t1.T1string0 != "" { + t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0) + } + if t1.T1string1 != "6.5840" { + t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1) + } + if len(t2.T2slice) != 2 { + t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice)) + } + if t2.T2slice[1].T1int1 != 1 { + t.Fatalf("wrong slice value\n") + } + if len(t2.T2map) != 1 { + t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map)) + } + if t2.T2map[99].T1string1 != "y" { + t.Fatalf("wrong map value\n") + } + t3 := (t2.T2t3).(T3) + if t3.T3int999 != 999 { + t.Fatalf("wrong t2.T2t3.T3int999\n") + } + } + + if errorCount != e0 { + t.Fatalf("there were errors, but should not have been") + } +} + +type T4 struct { + Yes int + no int +} + +// make sure we check capitalization +// labgob prints one warning during this test. +func TestCapital(t *testing.T) { + e0 := errorCount + + v := []map[*T4]int{} + + w := new(bytes.Buffer) + e := NewEncoder(w) + e.Encode(v) + data := w.Bytes() + + var v1 []map[T4]int + r := bytes.NewBuffer(data) + d := NewDecoder(r) + d.Decode(&v1) + + if errorCount != e0+1 { + t.Fatalf("failed to warn about lower-case field") + } +} + +// check that we warn when someone sends a default value over +// RPC but the target into which we're decoding holds a non-default +// value, which GOB seems not to overwrite as you'd expect. +// +// labgob does not print a warning. +func TestDefault(t *testing.T) { + e0 := errorCount + + type DD struct { + X int + } + + // send a default value... + dd1 := DD{} + + w := new(bytes.Buffer) + e := NewEncoder(w) + e.Encode(dd1) + data := w.Bytes() + + // and receive it into memory that already + // holds non-default values. + reply := DD{99} + + r := bytes.NewBuffer(data) + d := NewDecoder(r) + d.Decode(&reply) + + if errorCount != e0+1 { + t.Fatalf("failed to warn about decoding into non-default value") + } +} diff --git a/labrpc/labrpc.go b/labrpc/labrpc.go new file mode 100644 index 0000000..4ac60c2 --- /dev/null +++ b/labrpc/labrpc.go @@ -0,0 +1,523 @@ +package labrpc + +// +// channel-based RPC, for 6.5840 labs. +// +// simulates a network that can lose requests, lose replies, +// delay messages, and entirely disconnect particular hosts. +// +// we will use the original labrpc.go to test your code for grading. +// so, while you can modify this code to help you debug, please +// test against the original before submitting. +// +// adapted from Go net/rpc/server.go. +// +// sends labgob-encoded values to ensure that RPCs +// don't include references to program objects. +// +// net := MakeNetwork() -- holds network, clients, servers. +// end := net.MakeEnd(endname) -- create a client end-point, to talk to one server. +// net.AddServer(servername, server) -- adds a named server to network. +// net.DeleteServer(servername) -- eliminate the named server. +// net.Connect(endname, servername) -- connect a client to a server. +// net.Enable(endname, enabled) -- enable/disable a client. +// net.Reliable(bool) -- false means drop/delay messages +// +// end.Call("Raft.AppendEntries", &args, &reply) -- send an RPC, wait for reply. +// the "Raft" is the name of the server struct to be called. +// the "AppendEntries" is the name of the method to be called. +// Call() returns true to indicate that the server executed the request +// and the reply is valid. +// Call() returns false if the network lost the request or reply +// or the server is down. +// It is OK to have multiple Call()s in progress at the same time on the +// same ClientEnd. +// Concurrent calls to Call() may be delivered to the server out of order, +// since the network may re-order messages. +// Call() is guaranteed to return (perhaps after a delay) *except* if the +// handler function on the server side does not return. +// the server RPC handler function must declare its args and reply arguments +// as pointers, so that their types exactly match the types of the arguments +// to Call(). +// +// srv := MakeServer() +// srv.AddService(svc) -- a server can have multiple services, e.g. Raft and k/v +// pass srv to net.AddServer() +// +// svc := MakeService(receiverObject) -- obj's methods will handle RPCs +// much like Go's rpcs.Register() +// pass svc to srv.AddService() +// + +import "6.5840/labgob" +import "bytes" +import "reflect" +import "sync" +import "log" +import "strings" +import "math/rand" +import "time" +import "sync/atomic" + +type reqMsg struct { + endname interface{} // name of sending ClientEnd + svcMeth string // e.g. "Raft.AppendEntries" + argsType reflect.Type + args []byte + replyCh chan replyMsg +} + +type replyMsg struct { + ok bool + reply []byte +} + +type ClientEnd struct { + endname interface{} // this end-point's name + ch chan reqMsg // copy of Network.endCh + done chan struct{} // closed when Network is cleaned up +} + +// send an RPC, wait for the reply. +// the return value indicates success; false means that +// no reply was received from the server. +func (e *ClientEnd) Call(svcMeth string, args interface{}, reply interface{}) bool { + req := reqMsg{} + req.endname = e.endname + req.svcMeth = svcMeth + req.argsType = reflect.TypeOf(args) + req.replyCh = make(chan replyMsg) + + qb := new(bytes.Buffer) + qe := labgob.NewEncoder(qb) + if err := qe.Encode(args); err != nil { + panic(err) + } + req.args = qb.Bytes() + + // + // send the request. + // + select { + case e.ch <- req: + // the request has been sent. + case <-e.done: + // entire Network has been destroyed. + return false + } + + // + // wait for the reply. + // + rep := <-req.replyCh + if rep.ok { + rb := bytes.NewBuffer(rep.reply) + rd := labgob.NewDecoder(rb) + if err := rd.Decode(reply); err != nil { + log.Fatalf("ClientEnd.Call(): decode reply: %v\n", err) + } + return true + } else { + return false + } +} + +type Network struct { + mu sync.Mutex + reliable bool + longDelays bool // pause a long time on send on disabled connection + longReordering bool // sometimes delay replies a long time + ends map[interface{}]*ClientEnd // ends, by name + enabled map[interface{}]bool // by end name + servers map[interface{}]*Server // servers, by name + connections map[interface{}]interface{} // endname -> servername + endCh chan reqMsg + done chan struct{} // closed when Network is cleaned up + count int32 // total RPC count, for statistics + bytes int64 // total bytes send, for statistics +} + +func MakeNetwork() *Network { + rn := &Network{} + rn.reliable = true + rn.ends = map[interface{}]*ClientEnd{} + rn.enabled = map[interface{}]bool{} + rn.servers = map[interface{}]*Server{} + rn.connections = map[interface{}](interface{}){} + rn.endCh = make(chan reqMsg) + rn.done = make(chan struct{}) + + // single goroutine to handle all ClientEnd.Call()s + go func() { + for { + select { + case xreq := <-rn.endCh: + atomic.AddInt32(&rn.count, 1) + atomic.AddInt64(&rn.bytes, int64(len(xreq.args))) + go rn.processReq(xreq) + case <-rn.done: + return + } + } + }() + + return rn +} + +func (rn *Network) Cleanup() { + close(rn.done) +} + +func (rn *Network) Reliable(yes bool) { + rn.mu.Lock() + defer rn.mu.Unlock() + + rn.reliable = yes +} + +func (rn *Network) LongReordering(yes bool) { + rn.mu.Lock() + defer rn.mu.Unlock() + + rn.longReordering = yes +} + +func (rn *Network) LongDelays(yes bool) { + rn.mu.Lock() + defer rn.mu.Unlock() + + rn.longDelays = yes +} + +func (rn *Network) readEndnameInfo(endname interface{}) (enabled bool, + servername interface{}, server *Server, reliable bool, longreordering bool, +) { + rn.mu.Lock() + defer rn.mu.Unlock() + + enabled = rn.enabled[endname] + servername = rn.connections[endname] + if servername != nil { + server = rn.servers[servername] + } + reliable = rn.reliable + longreordering = rn.longReordering + return +} + +func (rn *Network) isServerDead(endname interface{}, servername interface{}, server *Server) bool { + rn.mu.Lock() + defer rn.mu.Unlock() + + if rn.enabled[endname] == false || rn.servers[servername] != server { + return true + } + return false +} + +func (rn *Network) processReq(req reqMsg) { + enabled, servername, server, reliable, longreordering := rn.readEndnameInfo(req.endname) + + if enabled && servername != nil && server != nil { + if reliable == false { + // short delay + ms := (rand.Int() % 27) + time.Sleep(time.Duration(ms) * time.Millisecond) + } + + if reliable == false && (rand.Int()%1000) < 100 { + // drop the request, return as if timeout + req.replyCh <- replyMsg{false, nil} + return + } + + // execute the request (call the RPC handler). + // in a separate thread so that we can periodically check + // if the server has been killed and the RPC should get a + // failure reply. + ech := make(chan replyMsg) + go func() { + r := server.dispatch(req) + ech <- r + }() + + // wait for handler to return, + // but stop waiting if DeleteServer() has been called, + // and return an error. + var reply replyMsg + replyOK := false + serverDead := false + for replyOK == false && serverDead == false { + select { + case reply = <-ech: + replyOK = true + case <-time.After(100 * time.Millisecond): + serverDead = rn.isServerDead(req.endname, servername, server) + if serverDead { + go func() { + <-ech // drain channel to let the goroutine created earlier terminate + }() + } + } + } + + // do not reply if DeleteServer() has been called, i.e. + // the server has been killed. this is needed to avoid + // situation in which a client gets a positive reply + // to an Append, but the server persisted the update + // into the old Persister. config.go is careful to call + // DeleteServer() before superseding the Persister. + serverDead = rn.isServerDead(req.endname, servername, server) + + if replyOK == false || serverDead == true { + // server was killed while we were waiting; return error. + req.replyCh <- replyMsg{false, nil} + } else if reliable == false && (rand.Int()%1000) < 100 { + // drop the reply, return as if timeout + req.replyCh <- replyMsg{false, nil} + } else if longreordering == true && rand.Intn(900) < 600 { + // delay the response for a while + ms := 200 + rand.Intn(1+rand.Intn(2000)) + // Russ points out that this timer arrangement will decrease + // the number of goroutines, so that the race + // detector is less likely to get upset. + time.AfterFunc(time.Duration(ms)*time.Millisecond, func() { + atomic.AddInt64(&rn.bytes, int64(len(reply.reply))) + req.replyCh <- reply + }) + } else { + atomic.AddInt64(&rn.bytes, int64(len(reply.reply))) + req.replyCh <- reply + } + } else { + // simulate no reply and eventual timeout. + ms := 0 + if rn.longDelays { + // let Raft tests check that leader doesn't send + // RPCs synchronously. + ms = (rand.Int() % 7000) + } else { + // many kv tests require the client to try each + // server in fairly rapid succession. + ms = (rand.Int() % 100) + } + time.AfterFunc(time.Duration(ms)*time.Millisecond, func() { + req.replyCh <- replyMsg{false, nil} + }) + } + +} + +// create a client end-point. +// start the thread that listens and delivers. +func (rn *Network) MakeEnd(endname interface{}) *ClientEnd { + rn.mu.Lock() + defer rn.mu.Unlock() + + if _, ok := rn.ends[endname]; ok { + log.Fatalf("MakeEnd: %v already exists\n", endname) + } + + e := &ClientEnd{} + e.endname = endname + e.ch = rn.endCh + e.done = rn.done + rn.ends[endname] = e + rn.enabled[endname] = false + rn.connections[endname] = nil + + return e +} + +func (rn *Network) DeleteEnd(endname interface{}) { + rn.mu.Lock() + defer rn.mu.Unlock() + + if _, ok := rn.ends[endname]; !ok { + log.Fatalf("MakeEnd: %v doesn't exists\n", endname) + } + delete(rn.ends, endname) + delete(rn.enabled, endname) + delete(rn.connections, endname) +} + +func (rn *Network) AddServer(servername interface{}, rs *Server) { + rn.mu.Lock() + defer rn.mu.Unlock() + + rn.servers[servername] = rs +} + +func (rn *Network) DeleteServer(servername interface{}) { + rn.mu.Lock() + defer rn.mu.Unlock() + + rn.servers[servername] = nil +} + +// connect a ClientEnd to a server. +// a ClientEnd can only be connected once in its lifetime. +func (rn *Network) Connect(endname interface{}, servername interface{}) { + rn.mu.Lock() + defer rn.mu.Unlock() + + rn.connections[endname] = servername +} + +// enable/disable a ClientEnd. +func (rn *Network) Enable(endname interface{}, enabled bool) { + rn.mu.Lock() + defer rn.mu.Unlock() + + rn.enabled[endname] = enabled +} + +// get a server's count of incoming RPCs. +func (rn *Network) GetCount(servername interface{}) int { + rn.mu.Lock() + defer rn.mu.Unlock() + + svr := rn.servers[servername] + return svr.GetCount() +} + +func (rn *Network) GetTotalCount() int { + x := atomic.LoadInt32(&rn.count) + return int(x) +} + +func (rn *Network) GetTotalBytes() int64 { + x := atomic.LoadInt64(&rn.bytes) + return x +} + +// a server is a collection of services, all sharing +// the same rpc dispatcher. so that e.g. both a Raft +// and a k/v server can listen to the same rpc endpoint. +type Server struct { + mu sync.Mutex + services map[string]*Service + count int // incoming RPCs +} + +func MakeServer() *Server { + rs := &Server{} + rs.services = map[string]*Service{} + return rs +} + +func (rs *Server) AddService(svc *Service) { + rs.mu.Lock() + defer rs.mu.Unlock() + rs.services[svc.name] = svc +} + +func (rs *Server) dispatch(req reqMsg) replyMsg { + rs.mu.Lock() + + rs.count += 1 + + // split Raft.AppendEntries into service and method + dot := strings.LastIndex(req.svcMeth, ".") + serviceName := req.svcMeth[:dot] + methodName := req.svcMeth[dot+1:] + + service, ok := rs.services[serviceName] + + rs.mu.Unlock() + + if ok { + return service.dispatch(methodName, req) + } else { + choices := []string{} + for k, _ := range rs.services { + choices = append(choices, k) + } + log.Fatalf("labrpc.Server.dispatch(): unknown service %v in %v.%v; expecting one of %v\n", + serviceName, serviceName, methodName, choices) + return replyMsg{false, nil} + } +} + +func (rs *Server) GetCount() int { + rs.mu.Lock() + defer rs.mu.Unlock() + return rs.count +} + +// an object with methods that can be called via RPC. +// a single server may have more than one Service. +type Service struct { + name string + rcvr reflect.Value + typ reflect.Type + methods map[string]reflect.Method +} + +func MakeService(rcvr interface{}) *Service { + svc := &Service{} + svc.typ = reflect.TypeOf(rcvr) + svc.rcvr = reflect.ValueOf(rcvr) + svc.name = reflect.Indirect(svc.rcvr).Type().Name() + svc.methods = map[string]reflect.Method{} + + for m := 0; m < svc.typ.NumMethod(); m++ { + method := svc.typ.Method(m) + mtype := method.Type + mname := method.Name + + //fmt.Printf("%v pp %v ni %v 1k %v 2k %v no %v\n", + // mname, method.PkgPath, mtype.NumIn(), mtype.In(1).Kind(), mtype.In(2).Kind(), mtype.NumOut()) + + if method.PkgPath != "" || // capitalized? + mtype.NumIn() != 3 || + //mtype.In(1).Kind() != reflect.Ptr || + mtype.In(2).Kind() != reflect.Ptr || + mtype.NumOut() != 0 { + // the method is not suitable for a handler + //fmt.Printf("bad method: %v\n", mname) + } else { + // the method looks like a handler + svc.methods[mname] = method + } + } + + return svc +} + +func (svc *Service) dispatch(methname string, req reqMsg) replyMsg { + if method, ok := svc.methods[methname]; ok { + // prepare space into which to read the argument. + // the Value's type will be a pointer to req.argsType. + args := reflect.New(req.argsType) + + // decode the argument. + ab := bytes.NewBuffer(req.args) + ad := labgob.NewDecoder(ab) + ad.Decode(args.Interface()) + + // allocate space for the reply. + replyType := method.Type.In(2) + replyType = replyType.Elem() + replyv := reflect.New(replyType) + + // call the method. + function := method.Func + function.Call([]reflect.Value{svc.rcvr, args.Elem(), replyv}) + + // encode the reply. + rb := new(bytes.Buffer) + re := labgob.NewEncoder(rb) + re.EncodeValue(replyv) + + return replyMsg{true, rb.Bytes()} + } else { + choices := []string{} + for k, _ := range svc.methods { + choices = append(choices, k) + } + log.Fatalf("labrpc.Service.dispatch(): unknown method %v in %v; expecting one of %v\n", + methname, req.svcMeth, choices) + return replyMsg{false, nil} + } +} diff --git a/labrpc/test_test.go b/labrpc/test_test.go new file mode 100644 index 0000000..1ec3e65 --- /dev/null +++ b/labrpc/test_test.go @@ -0,0 +1,597 @@ +package labrpc + +import "testing" +import "strconv" +import "sync" +import "runtime" +import "time" +import "fmt" + +type JunkArgs struct { + X int +} +type JunkReply struct { + X string +} + +type JunkServer struct { + mu sync.Mutex + log1 []string + log2 []int +} + +func (js *JunkServer) Handler1(args string, reply *int) { + js.mu.Lock() + defer js.mu.Unlock() + js.log1 = append(js.log1, args) + *reply, _ = strconv.Atoi(args) +} + +func (js *JunkServer) Handler2(args int, reply *string) { + js.mu.Lock() + defer js.mu.Unlock() + js.log2 = append(js.log2, args) + *reply = "handler2-" + strconv.Itoa(args) +} + +func (js *JunkServer) Handler3(args int, reply *int) { + js.mu.Lock() + defer js.mu.Unlock() + time.Sleep(20 * time.Second) + *reply = -args +} + +// args is a pointer +func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) { + reply.X = "pointer" +} + +// args is a not pointer +func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) { + reply.X = "no pointer" +} + +func (js *JunkServer) Handler6(args string, reply *int) { + js.mu.Lock() + defer js.mu.Unlock() + *reply = len(args) +} + +func (js *JunkServer) Handler7(args int, reply *string) { + js.mu.Lock() + defer js.mu.Unlock() + *reply = "" + for i := 0; i < args; i++ { + *reply = *reply + "y" + } +} + +func TestBasic(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + e := rn.MakeEnd("end1-99") + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer("server99", rs) + + rn.Connect("end1-99", "server99") + rn.Enable("end1-99", true) + + { + reply := "" + e.Call("JunkServer.Handler2", 111, &reply) + if reply != "handler2-111" { + t.Fatalf("wrong reply from Handler2") + } + } + + { + reply := 0 + e.Call("JunkServer.Handler1", "9099", &reply) + if reply != 9099 { + t.Fatalf("wrong reply from Handler1") + } + } +} + +func TestTypes(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + e := rn.MakeEnd("end1-99") + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer("server99", rs) + + rn.Connect("end1-99", "server99") + rn.Enable("end1-99", true) + + { + var args JunkArgs + var reply JunkReply + // args must match type (pointer or not) of handler. + e.Call("JunkServer.Handler4", &args, &reply) + if reply.X != "pointer" { + t.Fatalf("wrong reply from Handler4") + } + } + + { + var args JunkArgs + var reply JunkReply + // args must match type (pointer or not) of handler. + e.Call("JunkServer.Handler5", args, &reply) + if reply.X != "no pointer" { + t.Fatalf("wrong reply from Handler5") + } + } +} + +// +// does net.Enable(endname, false) really disconnect a client? +// +func TestDisconnect(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + e := rn.MakeEnd("end1-99") + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer("server99", rs) + + rn.Connect("end1-99", "server99") + + { + reply := "" + e.Call("JunkServer.Handler2", 111, &reply) + if reply != "" { + t.Fatalf("unexpected reply from Handler2") + } + } + + rn.Enable("end1-99", true) + + { + reply := 0 + e.Call("JunkServer.Handler1", "9099", &reply) + if reply != 9099 { + t.Fatalf("wrong reply from Handler1") + } + } +} + +// +// test net.GetCount() +// +func TestCounts(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + e := rn.MakeEnd("end1-99") + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer(99, rs) + + rn.Connect("end1-99", 99) + rn.Enable("end1-99", true) + + for i := 0; i < 17; i++ { + reply := "" + e.Call("JunkServer.Handler2", i, &reply) + wanted := "handler2-" + strconv.Itoa(i) + if reply != wanted { + t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) + } + } + + n := rn.GetCount(99) + if n != 17 { + t.Fatalf("wrong GetCount() %v, expected 17\n", n) + } +} + +// +// test net.GetTotalBytes() +// +func TestBytes(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + e := rn.MakeEnd("end1-99") + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer(99, rs) + + rn.Connect("end1-99", 99) + rn.Enable("end1-99", true) + + for i := 0; i < 17; i++ { + args := "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + args = args + args + args = args + args + reply := 0 + e.Call("JunkServer.Handler6", args, &reply) + wanted := len(args) + if reply != wanted { + t.Fatalf("wrong reply %v from Handler6, expecting %v", reply, wanted) + } + } + + n := rn.GetTotalBytes() + if n < 4828 || n > 6000 { + t.Fatalf("wrong GetTotalBytes() %v, expected about 5000\n", n) + } + + for i := 0; i < 17; i++ { + args := 107 + reply := "" + e.Call("JunkServer.Handler7", args, &reply) + wanted := args + if len(reply) != wanted { + t.Fatalf("wrong reply len=%v from Handler6, expecting %v", len(reply), wanted) + } + } + + nn := rn.GetTotalBytes() - n + if nn < 1800 || nn > 2500 { + t.Fatalf("wrong GetTotalBytes() %v, expected about 2000\n", nn) + } +} + +// +// test RPCs from concurrent ClientEnds +// +func TestConcurrentMany(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer(1000, rs) + + ch := make(chan int) + + nclients := 20 + nrpcs := 10 + for ii := 0; ii < nclients; ii++ { + go func(i int) { + n := 0 + defer func() { ch <- n }() + + e := rn.MakeEnd(i) + rn.Connect(i, 1000) + rn.Enable(i, true) + + for j := 0; j < nrpcs; j++ { + arg := i*100 + j + reply := "" + e.Call("JunkServer.Handler2", arg, &reply) + wanted := "handler2-" + strconv.Itoa(arg) + if reply != wanted { + t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) + } + n += 1 + } + }(ii) + } + + total := 0 + for ii := 0; ii < nclients; ii++ { + x := <-ch + total += x + } + + if total != nclients*nrpcs { + t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs) + } + + n := rn.GetCount(1000) + if n != total { + t.Fatalf("wrong GetCount() %v, expected %v\n", n, total) + } +} + +// +// test unreliable +// +func TestUnreliable(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + rn.Reliable(false) + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer(1000, rs) + + ch := make(chan int) + + nclients := 300 + for ii := 0; ii < nclients; ii++ { + go func(i int) { + n := 0 + defer func() { ch <- n }() + + e := rn.MakeEnd(i) + rn.Connect(i, 1000) + rn.Enable(i, true) + + arg := i * 100 + reply := "" + ok := e.Call("JunkServer.Handler2", arg, &reply) + if ok { + wanted := "handler2-" + strconv.Itoa(arg) + if reply != wanted { + t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) + } + n += 1 + } + }(ii) + } + + total := 0 + for ii := 0; ii < nclients; ii++ { + x := <-ch + total += x + } + + if total == nclients || total == 0 { + t.Fatalf("all RPCs succeeded despite unreliable") + } +} + +// +// test concurrent RPCs from a single ClientEnd +// +func TestConcurrentOne(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer(1000, rs) + + e := rn.MakeEnd("c") + rn.Connect("c", 1000) + rn.Enable("c", true) + + ch := make(chan int) + + nrpcs := 20 + for ii := 0; ii < nrpcs; ii++ { + go func(i int) { + n := 0 + defer func() { ch <- n }() + + arg := 100 + i + reply := "" + e.Call("JunkServer.Handler2", arg, &reply) + wanted := "handler2-" + strconv.Itoa(arg) + if reply != wanted { + t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted) + } + n += 1 + }(ii) + } + + total := 0 + for ii := 0; ii < nrpcs; ii++ { + x := <-ch + total += x + } + + if total != nrpcs { + t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs) + } + + js.mu.Lock() + defer js.mu.Unlock() + if len(js.log2) != nrpcs { + t.Fatalf("wrong number of RPCs delivered") + } + + n := rn.GetCount(1000) + if n != total { + t.Fatalf("wrong GetCount() %v, expected %v\n", n, total) + } +} + +// +// regression: an RPC that's delayed during Enabled=false +// should not delay subsequent RPCs (e.g. after Enabled=true). +// +func TestRegression1(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer(1000, rs) + + e := rn.MakeEnd("c") + rn.Connect("c", 1000) + + // start some RPCs while the ClientEnd is disabled. + // they'll be delayed. + rn.Enable("c", false) + ch := make(chan bool) + nrpcs := 20 + for ii := 0; ii < nrpcs; ii++ { + go func(i int) { + ok := false + defer func() { ch <- ok }() + + arg := 100 + i + reply := "" + // this call ought to return false. + e.Call("JunkServer.Handler2", arg, &reply) + ok = true + }(ii) + } + + time.Sleep(100 * time.Millisecond) + + // now enable the ClientEnd and check that an RPC completes quickly. + t0 := time.Now() + rn.Enable("c", true) + { + arg := 99 + reply := "" + e.Call("JunkServer.Handler2", arg, &reply) + wanted := "handler2-" + strconv.Itoa(arg) + if reply != wanted { + t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted) + } + } + dur := time.Since(t0).Seconds() + + if dur > 0.03 { + t.Fatalf("RPC took too long (%v) after Enable", dur) + } + + for ii := 0; ii < nrpcs; ii++ { + <-ch + } + + js.mu.Lock() + defer js.mu.Unlock() + if len(js.log2) != 1 { + t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2)) + } + + n := rn.GetCount(1000) + if n != 1 { + t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1) + } +} + +// +// if an RPC is stuck in a server, and the server +// is killed with DeleteServer(), does the RPC +// get un-stuck? +// +func TestKilled(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + e := rn.MakeEnd("end1-99") + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer("server99", rs) + + rn.Connect("end1-99", "server99") + rn.Enable("end1-99", true) + + doneCh := make(chan bool) + go func() { + reply := 0 + ok := e.Call("JunkServer.Handler3", 99, &reply) + doneCh <- ok + }() + + time.Sleep(1000 * time.Millisecond) + + select { + case <-doneCh: + t.Fatalf("Handler3 should not have returned yet") + case <-time.After(100 * time.Millisecond): + } + + rn.DeleteServer("server99") + + select { + case x := <-doneCh: + if x != false { + t.Fatalf("Handler3 returned successfully despite DeleteServer()") + } + case <-time.After(100 * time.Millisecond): + t.Fatalf("Handler3 should return after DeleteServer()") + } +} + +func TestBenchmark(t *testing.T) { + runtime.GOMAXPROCS(4) + + rn := MakeNetwork() + defer rn.Cleanup() + + e := rn.MakeEnd("end1-99") + + js := &JunkServer{} + svc := MakeService(js) + + rs := MakeServer() + rs.AddService(svc) + rn.AddServer("server99", rs) + + rn.Connect("end1-99", "server99") + rn.Enable("end1-99", true) + + t0 := time.Now() + n := 100000 + for iters := 0; iters < n; iters++ { + reply := "" + e.Call("JunkServer.Handler2", 111, &reply) + if reply != "handler2-111" { + t.Fatalf("wrong reply from Handler2") + } + } + fmt.Printf("%v for %v\n", time.Since(t0), n) + // march 2016, rtm laptop, 22 microseconds per RPC +} diff --git a/main/diskvd.go b/main/diskvd.go new file mode 100644 index 0000000..606f88c --- /dev/null +++ b/main/diskvd.go @@ -0,0 +1,74 @@ +package main + +// +// start a diskvd server. it's a member of some replica +// group, which has other members, and it needs to know +// how to talk to the members of the shardmaster service. +// used by ../diskv/test_test.go +// +// arguments: +// -g groupid +// -m masterport1 -m masterport2 ... +// -s replicaport1 -s replicaport2 ... +// -i my-index-in-server-port-list +// -u unreliable +// -d directory +// -r restart + +import "time" +import "6.5840/diskv" +import "os" +import "fmt" +import "strconv" +import "runtime" + +func usage() { + fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n") + os.Exit(1) +} + +func main() { + var gid int64 = -1 // my replica group ID + masters := []string{} // ports of shardmasters + replicas := []string{} // ports of servers in my replica group + me := -1 // my index in replicas[] + unreliable := false + dir := "" // store persistent data here + restart := false + + for i := 1; i+1 < len(os.Args); i += 2 { + a0 := os.Args[i] + a1 := os.Args[i+1] + if a0 == "-g" { + gid, _ = strconv.ParseInt(a1, 10, 64) + } else if a0 == "-m" { + masters = append(masters, a1) + } else if a0 == "-s" { + replicas = append(replicas, a1) + } else if a0 == "-i" { + me, _ = strconv.Atoi(a1) + } else if a0 == "-u" { + unreliable, _ = strconv.ParseBool(a1) + } else if a0 == "-d" { + dir = a1 + } else if a0 == "-r" { + restart, _ = strconv.ParseBool(a1) + } else { + usage() + } + } + + if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" { + usage() + } + + runtime.GOMAXPROCS(4) + + srv := diskv.StartServer(gid, masters, replicas, me, dir, restart) + srv.Setunreliable(unreliable) + + // for safety, force quit after 10 minutes. + time.Sleep(10 * 60 * time.Second) + mep, _ := os.FindProcess(os.Getpid()) + mep.Kill() +} diff --git a/main/lockc.go b/main/lockc.go new file mode 100644 index 0000000..b69c9e5 --- /dev/null +++ b/main/lockc.go @@ -0,0 +1,31 @@ +package main + +// +// see comments in lockd.go +// + +import "6.5840/lockservice" +import "os" +import "fmt" + +func usage() { + fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n") + os.Exit(1) +} + +func main() { + if len(os.Args) == 5 { + ck := lockservice.MakeClerk(os.Args[2], os.Args[3]) + var ok bool + if os.Args[1] == "-l" { + ok = ck.Lock(os.Args[4]) + } else if os.Args[1] == "-u" { + ok = ck.Unlock(os.Args[4]) + } else { + usage() + } + fmt.Printf("reply: %v\n", ok) + } else { + usage() + } +} diff --git a/main/lockd.go b/main/lockd.go new file mode 100644 index 0000000..bdae2bf --- /dev/null +++ b/main/lockd.go @@ -0,0 +1,31 @@ +package main + +// export GOPATH=~/6.5840 +// go build lockd.go +// go build lockc.go +// ./lockd -p a b & +// ./lockd -b a b & +// ./lockc -l a b lx +// ./lockc -u a b lx +// +// on Athena, use /tmp/myname-a and /tmp/myname-b +// instead of a and b. + +import "time" +import "6.5840/lockservice" +import "os" +import "fmt" + +func main() { + if len(os.Args) == 4 && os.Args[1] == "-p" { + lockservice.StartServer(os.Args[2], os.Args[3], true) + } else if len(os.Args) == 4 && os.Args[1] == "-b" { + lockservice.StartServer(os.Args[2], os.Args[3], false) + } else { + fmt.Printf("Usage: lockd -p|-b primaryport backupport\n") + os.Exit(1) + } + for { + time.Sleep(100 * time.Second) + } +} diff --git a/main/mrcoordinator.go b/main/mrcoordinator.go new file mode 100644 index 0000000..be0a0fd --- /dev/null +++ b/main/mrcoordinator.go @@ -0,0 +1,29 @@ +package main + +// +// start the coordinator process, which is implemented +// in ../mr/coordinator.go +// +// go run mrcoordinator.go pg*.txt +// +// Please do not change this file. +// + +import "6.5840/mr" +import "time" +import "os" +import "fmt" + +func main() { + if len(os.Args) < 2 { + fmt.Fprintf(os.Stderr, "Usage: mrcoordinator inputfiles...\n") + os.Exit(1) + } + + m := mr.MakeCoordinator(os.Args[1:], 10) + for m.Done() == false { + time.Sleep(time.Second) + } + + time.Sleep(time.Second) +} diff --git a/main/mrsequential.go b/main/mrsequential.go new file mode 100644 index 0000000..38de498 --- /dev/null +++ b/main/mrsequential.go @@ -0,0 +1,108 @@ +package main + +// +// simple sequential MapReduce. +// +// go run mrsequential.go wc.so pg*.txt +// + +import "fmt" +import "6.5840/mr" +import "plugin" +import "os" +import "log" +import "io/ioutil" +import "sort" + +// for sorting by key. +type ByKey []mr.KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +func main() { + if len(os.Args) < 3 { + fmt.Fprintf(os.Stderr, "Usage: mrsequential xxx.so inputfiles...\n") + os.Exit(1) + } + + mapf, reducef := loadPlugin(os.Args[1]) + + // + // read each input file, + // pass it to Map, + // accumulate the intermediate Map output. + // + intermediate := []mr.KeyValue{} + for _, filename := range os.Args[2:] { + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + content, err := ioutil.ReadAll(file) + if err != nil { + log.Fatalf("cannot read %v", filename) + } + file.Close() + kva := mapf(filename, string(content)) + intermediate = append(intermediate, kva...) + } + + // + // a big difference from real MapReduce is that all the + // intermediate data is in one place, intermediate[], + // rather than being partitioned into NxM buckets. + // + + sort.Sort(ByKey(intermediate)) + + oname := "mr-out-0" + ofile, _ := os.Create(oname) + + // + // call Reduce on each distinct key in intermediate[], + // and print the result to mr-out-0. + // + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + ofile.Close() +} + +// load the application Map and Reduce functions +// from a plugin file, e.g. ../mrapps/wc.so +func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) { + p, err := plugin.Open(filename) + if err != nil { + log.Fatalf("cannot load plugin %v", filename) + } + xmapf, err := p.Lookup("Map") + if err != nil { + log.Fatalf("cannot find Map in %v", filename) + } + mapf := xmapf.(func(string, string) []mr.KeyValue) + xreducef, err := p.Lookup("Reduce") + if err != nil { + log.Fatalf("cannot find Reduce in %v", filename) + } + reducef := xreducef.(func(string, []string) string) + + return mapf, reducef +} diff --git a/main/mrworker.go b/main/mrworker.go new file mode 100644 index 0000000..3e43139 --- /dev/null +++ b/main/mrworker.go @@ -0,0 +1,49 @@ +package main + +// +// start a worker process, which is implemented +// in ../mr/worker.go. typically there will be +// multiple worker processes, talking to one coordinator. +// +// go run mrworker.go wc.so +// +// Please do not change this file. +// + +import "6.5840/mr" +import "plugin" +import "os" +import "fmt" +import "log" + +func main() { + if len(os.Args) != 2 { + fmt.Fprintf(os.Stderr, "Usage: mrworker xxx.so\n") + os.Exit(1) + } + + mapf, reducef := loadPlugin(os.Args[1]) + + mr.Worker(mapf, reducef) +} + +// load the application Map and Reduce functions +// from a plugin file, e.g. ../mrapps/wc.so +func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) { + p, err := plugin.Open(filename) + if err != nil { + log.Fatalf("cannot load plugin %v", filename) + } + xmapf, err := p.Lookup("Map") + if err != nil { + log.Fatalf("cannot find Map in %v", filename) + } + mapf := xmapf.(func(string, string) []mr.KeyValue) + xreducef, err := p.Lookup("Reduce") + if err != nil { + log.Fatalf("cannot find Reduce in %v", filename) + } + reducef := xreducef.(func(string, []string) string) + + return mapf, reducef +} diff --git a/main/pbc.go b/main/pbc.go new file mode 100644 index 0000000..791b4fe --- /dev/null +++ b/main/pbc.go @@ -0,0 +1,44 @@ +package main + +// +// pbservice client application +// +// export GOPATH=~/6.5840 +// go build viewd.go +// go build pbd.go +// go build pbc.go +// ./viewd /tmp/rtm-v & +// ./pbd /tmp/rtm-v /tmp/rtm-1 & +// ./pbd /tmp/rtm-v /tmp/rtm-2 & +// ./pbc /tmp/rtm-v key1 value1 +// ./pbc /tmp/rtm-v key1 +// +// change "rtm" to your user name. +// start the pbd programs in separate windows and kill +// and restart them to exercise fault tolerance. +// + +import "6.5840/pbservice" +import "os" +import "fmt" + +func usage() { + fmt.Printf("Usage: pbc viewport key\n") + fmt.Printf(" pbc viewport key value\n") + os.Exit(1) +} + +func main() { + if len(os.Args) == 3 { + // get + ck := pbservice.MakeClerk(os.Args[1], "") + v := ck.Get(os.Args[2]) + fmt.Printf("%v\n", v) + } else if len(os.Args) == 4 { + // put + ck := pbservice.MakeClerk(os.Args[1], "") + ck.Put(os.Args[2], os.Args[3]) + } else { + usage() + } +} diff --git a/main/pbd.go b/main/pbd.go new file mode 100644 index 0000000..6d54e00 --- /dev/null +++ b/main/pbd.go @@ -0,0 +1,23 @@ +package main + +// +// see directions in pbc.go +// + +import "time" +import "6.5840/pbservice" +import "os" +import "fmt" + +func main() { + if len(os.Args) != 3 { + fmt.Printf("Usage: pbd viewport myport\n") + os.Exit(1) + } + + pbservice.StartServer(os.Args[1], os.Args[2]) + + for { + time.Sleep(100 * time.Second) + } +} diff --git a/main/test-mr-many.sh b/main/test-mr-many.sh new file mode 100644 index 0000000..c31155d --- /dev/null +++ b/main/test-mr-many.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +if [ $# -ne 1 ]; then + echo "Usage: $0 numTrials" + exit 1 +fi + +trap 'kill -INT -$pid; exit 1' INT + +# Note: because the socketID is based on the current userID, +# ./test-mr.sh cannot be run in parallel +runs=$1 +chmod +x test-mr.sh + +for i in $(seq 1 $runs); do + timeout -k 2s 900s ./test-mr.sh & + pid=$! + if ! wait $pid; then + echo '***' FAILED TESTS IN TRIAL $i + exit 1 + fi +done +echo '***' PASSED ALL $i TESTING TRIALS diff --git a/main/test-mr.sh b/main/test-mr.sh new file mode 100644 index 0000000..210019f --- /dev/null +++ b/main/test-mr.sh @@ -0,0 +1,338 @@ +#!/usr/bin/env bash + +# +# map-reduce tests +# + +# un-comment this to run the tests with the Go race detector. +# RACE=-race + +if [[ "$OSTYPE" = "darwin"* ]] +then + if go version | grep 'go1.17.[012345]' + then + # -race with plug-ins on x86 MacOS 12 with + # go1.17 before 1.17.6 sometimes crash. + RACE= + echo '*** Turning off -race since it may not work on a Mac' + echo ' with ' `go version` + fi +fi + +ISQUIET=$1 +maybe_quiet() { + if [ "$ISQUIET" == "quiet" ]; then + "$@" > /dev/null 2>&1 + else + "$@" + fi +} + + +TIMEOUT=timeout +TIMEOUT2="" +if timeout 2s sleep 1 > /dev/null 2>&1 +then + : +else + if gtimeout 2s sleep 1 > /dev/null 2>&1 + then + TIMEOUT=gtimeout + else + # no timeout command + TIMEOUT= + echo '*** Cannot find timeout command; proceeding without timeouts.' + fi +fi +if [ "$TIMEOUT" != "" ] +then + TIMEOUT2=$TIMEOUT + TIMEOUT2+=" -k 2s 120s " + TIMEOUT+=" -k 2s 45s " +fi + +# run the test in a fresh sub-directory. +rm -rf mr-tmp +mkdir mr-tmp || exit 1 +cd mr-tmp || exit 1 +rm -f mr-* + +# make sure software is freshly built. +(cd ../../mrapps && go clean) +(cd .. && go clean) +(cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1 +(cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1 +(cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1 +(cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1 +(cd ../../mrapps && go build $RACE -buildmode=plugin jobcount.go) || exit 1 +(cd ../../mrapps && go build $RACE -buildmode=plugin early_exit.go) || exit 1 +(cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1 +(cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1 +(cd .. && go build $RACE mrcoordinator.go) || exit 1 +(cd .. && go build $RACE mrworker.go) || exit 1 +(cd .. && go build $RACE mrsequential.go) || exit 1 + +failed_any=0 + +######################################################### +# first word-count + +# generate the correct output +../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1 +sort mr-out-0 > mr-correct-wc.txt +rm -f mr-out* + +echo '***' Starting wc test. + +maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt & +pid=$! + +# give the coordinator time to create the sockets. +sleep 1 + +# start multiple workers. +(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/wc.so) & +(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/wc.so) & +(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/wc.so) & + +# wait for the coordinator to exit. +wait $pid + +# since workers are required to exit when a job is completely finished, +# and not before, that means the job has finished. +sort mr-out* | grep . > mr-wc-all +if cmp mr-wc-all mr-correct-wc.txt +then + echo '---' wc test: PASS +else + echo '---' wc output is not the same as mr-correct-wc.txt + echo '---' wc test: FAIL + failed_any=1 +fi + +# wait for remaining workers and coordinator to exit. +wait + +######################################################### +# now indexer +rm -f mr-* + +# generate the correct output +../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1 +sort mr-out-0 > mr-correct-indexer.txt +rm -f mr-out* + +echo '***' Starting indexer test. + +maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt & +sleep 1 + +# start multiple workers +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/indexer.so & +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/indexer.so + +sort mr-out* | grep . > mr-indexer-all +if cmp mr-indexer-all mr-correct-indexer.txt +then + echo '---' indexer test: PASS +else + echo '---' indexer output is not the same as mr-correct-indexer.txt + echo '---' indexer test: FAIL + failed_any=1 +fi + +wait + +######################################################### +echo '***' Starting map parallelism test. + +rm -f mr-* + +maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt & +sleep 1 + +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/mtiming.so & +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/mtiming.so + +NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'` +if [ "$NT" != "2" ] +then + echo '---' saw "$NT" workers rather than 2 + echo '---' map parallelism test: FAIL + failed_any=1 +fi + +if cat mr-out* | grep '^parallel.* 2' > /dev/null +then + echo '---' map parallelism test: PASS +else + echo '---' map workers did not run in parallel + echo '---' map parallelism test: FAIL + failed_any=1 +fi + +wait + + +######################################################### +echo '***' Starting reduce parallelism test. + +rm -f mr-* + +maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt & +sleep 1 + +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/rtiming.so & +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/rtiming.so + +NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'` +if [ "$NT" -lt "2" ] +then + echo '---' too few parallel reduces. + echo '---' reduce parallelism test: FAIL + failed_any=1 +else + echo '---' reduce parallelism test: PASS +fi + +wait + +######################################################### +echo '***' Starting job count test. + +rm -f mr-* + +maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt & +sleep 1 + +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/jobcount.so & +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/jobcount.so +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/jobcount.so & +maybe_quiet $TIMEOUT ../mrworker ../../mrapps/jobcount.so + +NT=`cat mr-out* | awk '{print $2}'` +if [ "$NT" -eq "8" ] +then + echo '---' job count test: PASS +else + echo '---' map jobs ran incorrect number of times "($NT != 8)" + echo '---' job count test: FAIL + failed_any=1 +fi + +wait + +######################################################### +# test whether any worker or coordinator exits before the +# task has completed (i.e., all output files have been finalized) +rm -f mr-* + +echo '***' Starting early exit test. + +DF=anydone$$ +rm -f $DF + +(maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt; touch $DF) & + +# give the coordinator time to create the sockets. +sleep 1 + +# start multiple workers. +(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/early_exit.so; touch $DF) & +(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/early_exit.so; touch $DF) & +(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/early_exit.so; touch $DF) & + +# wait for any of the coord or workers to exit. +# `jobs` ensures that any completed old processes from other tests +# are not waited upon. +jobs &> /dev/null +if [[ "$OSTYPE" = "darwin"* ]] +then + # bash on the Mac doesn't have wait -n + while [ ! -e $DF ] + do + sleep 0.2 + done +else + # the -n causes wait to wait for just one child process, + # rather than waiting for all to finish. + wait -n +fi + +rm -f $DF + +# a process has exited. this means that the output should be finalized +# otherwise, either a worker or the coordinator exited early +sort mr-out* | grep . > mr-wc-all-initial + +# wait for remaining workers and coordinator to exit. +wait + +# compare initial and final outputs +sort mr-out* | grep . > mr-wc-all-final +if cmp mr-wc-all-final mr-wc-all-initial +then + echo '---' early exit test: PASS +else + echo '---' output changed after first worker exited + echo '---' early exit test: FAIL + failed_any=1 +fi +rm -f mr-* + +######################################################### +echo '***' Starting crash test. + +# generate the correct output +../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1 +sort mr-out-0 > mr-correct-crash.txt +rm -f mr-out* + +rm -f mr-done +((maybe_quiet $TIMEOUT2 ../mrcoordinator ../pg*txt); touch mr-done ) & +sleep 1 + +# start multiple workers +maybe_quiet $TIMEOUT2 ../mrworker ../../mrapps/crash.so & + +# mimic rpc.go's coordinatorSock() +SOCKNAME=/var/tmp/5840-mr-`id -u` + +( while [ -e $SOCKNAME -a ! -f mr-done ] + do + maybe_quiet $TIMEOUT2 ../mrworker ../../mrapps/crash.so + sleep 1 + done ) & + +( while [ -e $SOCKNAME -a ! -f mr-done ] + do + maybe_quiet $TIMEOUT2 ../mrworker ../../mrapps/crash.so + sleep 1 + done ) & + +while [ -e $SOCKNAME -a ! -f mr-done ] +do + maybe_quiet $TIMEOUT2 ../mrworker ../../mrapps/crash.so + sleep 1 +done + +wait + +rm $SOCKNAME +sort mr-out* | grep . > mr-crash-all +if cmp mr-crash-all mr-correct-crash.txt +then + echo '---' crash test: PASS +else + echo '---' crash output is not the same as mr-correct-crash.txt + echo '---' crash test: FAIL + failed_any=1 +fi + +######################################################### +if [ $failed_any -eq 0 ]; then + echo '***' PASSED ALL TESTS +else + echo '***' FAILED SOME TESTS + exit 1 +fi diff --git a/main/viewd.go b/main/viewd.go new file mode 100644 index 0000000..2ca2bd4 --- /dev/null +++ b/main/viewd.go @@ -0,0 +1,23 @@ +package main + +// +// see directions in pbc.go +// + +import "time" +import "6.5840/viewservice" +import "os" +import "fmt" + +func main() { + if len(os.Args) != 2 { + fmt.Printf("Usage: viewd port\n") + os.Exit(1) + } + + viewservice.StartServer(os.Args[1]) + + for { + time.Sleep(100 * time.Second) + } +} diff --git a/models/kv.go b/models/kv.go new file mode 100644 index 0000000..97a02f7 --- /dev/null +++ b/models/kv.go @@ -0,0 +1,72 @@ +package models + +import "6.5840/porcupine" +import "fmt" +import "sort" + +type KvInput struct { + Op uint8 // 0 => get, 1 => put, 2 => append + Key string + Value string +} + +type KvOutput struct { + Value string +} + +var KvModel = porcupine.Model{ + Partition: func(history []porcupine.Operation) [][]porcupine.Operation { + m := make(map[string][]porcupine.Operation) + for _, v := range history { + key := v.Input.(KvInput).Key + m[key] = append(m[key], v) + } + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + ret := make([][]porcupine.Operation, 0, len(keys)) + for _, k := range keys { + ret = append(ret, m[k]) + } + return ret + }, + Init: func() interface{} { + // note: we are modeling a single key's value here; + // we're partitioning by key, so this is okay + return "" + }, + Step: func(state, input, output interface{}) (bool, interface{}) { + inp := input.(KvInput) + out := output.(KvOutput) + st := state.(string) + if inp.Op == 0 { + // get + return out.Value == st, state + } else if inp.Op == 1 { + // put + return true, inp.Value + } else if inp.Op == 2 { + // append + return true, (st + inp.Value) + } else { + // append with return value + return out.Value == st, (st + inp.Value) + } + }, + DescribeOperation: func(input, output interface{}) string { + inp := input.(KvInput) + out := output.(KvOutput) + switch inp.Op { + case 0: + return fmt.Sprintf("get('%s') -> '%s'", inp.Key, out.Value) + case 1: + return fmt.Sprintf("put('%s', '%s')", inp.Key, inp.Value) + case 2: + return fmt.Sprintf("append('%s', '%s')", inp.Key, inp.Value) + default: + return "" + } + }, +} diff --git a/mr/coordinator.go b/mr/coordinator.go new file mode 100644 index 0000000..93fb0cd --- /dev/null +++ b/mr/coordinator.go @@ -0,0 +1,170 @@ +package mr + +import ( + "log" + "net" + "net/http" + "net/rpc" + "os" + "sync" + "time" +) + +type Coordinator struct { + // Your definitions here. + files []string // files to be processed + filesStatus map[string]string // status of files (0: not maped, 1: maped) + allMapsDone bool // all map tasks are done + nReduce int // number of reduce tasks + reduceTasks []int // reduce task + reduceStatus []string // status of reduce tasks + allReducesDone bool // all reduce tasks are done + mutex sync.Mutex // mutex +} + +// Your code here -- RPC handlers for the worker to call. + +// an example RPC handler. +// +// the RPC argument and reply types are defined in rpc.go. +func (c *Coordinator) Example(args *ExampleArgs, reply *ExampleReply) error { + reply.Y = args.X + 1 + return nil +} + +func (c *Coordinator) Assign(args *AssignArgs, reply *AssignReply) error { + c.mutex.Lock() + defer c.mutex.Unlock() + // assign map task to worker + if len(c.files) > 0 { + reply.TaskType = 0 + reply.NReduce = c.nReduce + reply.File = c.files[0] + c.filesStatus[c.files[0]] = time.Now().Format(time.RFC3339) + c.files = c.files[1:] + } else if c.allMapsDone && len(c.reduceTasks) > 0 { + reply.TaskType = 1 + reply.NReduce = c.reduceTasks[0] + c.reduceStatus[c.reduceTasks[0]] = time.Now().Format(time.RFC3339) + c.reduceTasks = c.reduceTasks[1:] + } else { + reply.TaskType = 2 + } + return nil +} + +func (c *Coordinator) FinishMap(args *FinishMapArgs, reply *FinishMapReply) error { + c.mutex.Lock() + defer c.mutex.Unlock() + // mark map task as done + c.filesStatus[args.File] = "DONE" + // log.Println("FinishMap: ", args.File) + // check if all map tasks are done + for _, status := range c.filesStatus { + if status != "DONE" { + c.allMapsDone = false + return nil + } + } + c.allMapsDone = true + return nil +} + +func (c *Coordinator) FinishReduce(args *FinishReduceArgs, reply *FinishReduceReply) error { + c.mutex.Lock() + defer c.mutex.Unlock() + // mark reduce task as done + c.reduceStatus[args.Index] = "DONE" + // log.Println("FinishReduce: ", args.Index) + // check if all reduce tasks are done + for _, status := range c.reduceStatus { + if status != "DONE" { + return nil + } + } + c.allReducesDone = true + return nil +} + +// start a thread that listens for RPCs from worker.go +func (c *Coordinator) server() { + rpc.Register(c) + rpc.HandleHTTP() + //l, e := net.Listen("tcp", ":1234") + sockname := coordinatorSock() + os.Remove(sockname) + l, e := net.Listen("unix", sockname) + if e != nil { + log.Fatal("listen error:", e) + } + go http.Serve(l, nil) +} + +// main/mrcoordinator.go calls Done() periodically to find out +// if the entire job has finished. +func (c *Coordinator) Done() bool { + ret := false + + // Your code here. + c.mutex.Lock() + defer c.mutex.Unlock() + ret = c.allReducesDone + + return ret +} + +func (c *Coordinator) checkWorkersStatus() { + for { + time.Sleep(1 * time.Second) + c.mutex.Lock() + if !c.allMapsDone { + for i, status := range c.filesStatus { + if status != "DONE" && status != "TODO" { + t, _ := time.Parse(time.RFC3339, status) + if time.Since(t) > 10*time.Second { + c.filesStatus[i] = "TODO" + c.files = append(c.files, i) + } + } + } + } + if c.allMapsDone && !c.allReducesDone { + for i, status := range c.reduceStatus { + if status != "DONE" && status != "TODO" { + t, _ := time.Parse(time.RFC3339, status) + if time.Since(t) > 10*time.Second { + c.reduceStatus[i] = "TODO" + c.reduceTasks = append(c.reduceTasks, i) + } + } + } + + } + c.mutex.Unlock() + } +} + +// create a Coordinator. +// main/mrcoordinator.go calls this function. +// nReduce is the number of reduce tasks to use. +func MakeCoordinator(files []string, nReduce int) *Coordinator { + c := Coordinator{} + + // Your code here. + c.files = files + c.filesStatus = make(map[string]string) + for _, file := range files { + c.filesStatus[file] = "TODO" + } + c.nReduce = nReduce + c.allMapsDone = false + c.reduceTasks = make([]int, nReduce) + c.reduceStatus = make([]string, nReduce) + for i := 0; i < nReduce; i++ { + c.reduceTasks[i] = i + c.reduceStatus[i] = "TODO" + } + go c.checkWorkersStatus() + c.server() + return &c +} diff --git a/mr/rpc.go b/mr/rpc.go new file mode 100644 index 0000000..53003f6 --- /dev/null +++ b/mr/rpc.go @@ -0,0 +1,62 @@ +package mr + +// +// RPC definitions. +// +// remember to capitalize all names. +// + +import ( + "os" + "strconv" +) + +// +// example to show how to declare the arguments +// and reply for an RPC. +// + +type ExampleArgs struct { + X int +} + +type ExampleReply struct { + Y int +} + +// Add your RPC definitions here. + +type AssignArgs struct { +} + +type AssignReply struct { + TaskType int // 0: map, 1: reduce, 2: done + // actually there are 2 different meanings for NReduce + // if this is a map task, it means the number of reduce tasks + // if this is a reduce task, it means the index of the reduce task + NReduce int + File string +} + +type FinishMapArgs struct { + File string +} + +type FinishMapReply struct { +} + +type FinishReduceArgs struct { + Index int +} + +type FinishReduceReply struct{} + +// Cook up a unique-ish UNIX-domain socket name +// in /var/tmp, for the coordinator. +// Can't use the current directory since +// Athena AFS doesn't support UNIX-domain sockets. +func coordinatorSock() string { + s := "/var/tmp/5840-mr-" + s += strconv.Itoa(os.Getuid()) + return s +} diff --git a/mr/worker.go b/mr/worker.go new file mode 100644 index 0000000..d0fb464 --- /dev/null +++ b/mr/worker.go @@ -0,0 +1,221 @@ +package mr + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io" + "log" + "net/rpc" + "os" + "path/filepath" + "sort" + "strings" + "time" +) + +// Map functions return a slice of KeyValue. +type KeyValue struct { + Key string + Value string +} + +type ByKey []KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +// use ihash(key) % NReduce to choose the reduce +// task number for each KeyValue emitted by Map. +func ihash(key string) int { + h := fnv.New32a() + h.Write([]byte(key)) + return int(h.Sum32() & 0x7fffffff) +} + +// main/mrworker.go calls this function. +func Worker(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + + // Your worker implementation here. + // CallAssign() + for { + CallAssign(mapf, reducef) + time.Sleep(100 * time.Millisecond) + } + // uncomment to send the Example RPC to the coordinator. + // CallExample() + +} + +// example function to show how to make an RPC call to the coordinator. +// +// the RPC argument and reply types are defined in rpc.go. +func CallExample() { + + // declare an argument structure. + args := ExampleArgs{} + + // fill in the argument(s). + args.X = 99 + + // declare a reply structure. + reply := ExampleReply{} + + // send the RPC request, wait for the reply. + // the "Coordinator.Example" tells the + // receiving server that we'd like to call + // the Example() method of struct Coordinator. + ok := call("Coordinator.Example", &args, &reply) + if ok { + // reply.Y should be 100. + fmt.Printf("reply.Y %v\n", reply.Y) + } else { + fmt.Printf("call failed!\n") + } +} + +func CallAssign(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + args := AssignArgs{} + reply := AssignReply{} + ok := call("Coordinator.Assign", &args, &reply) + if !ok { + fmt.Printf("call failed!\n") + } + if reply.TaskType == 0 { + // fmt.Printf("Map task: %v\n", reply.File) + mapTask(mapf, reply.File, reply.NReduce) + } else if reply.TaskType == 1 { + // fmt.Printf("Reduce task: %v\n", reply.NReduce) + ReduceTask(reducef, reply.NReduce) + } else if reply.TaskType == 2 { + return + } +} + +func CallFinishMap(filename string) { + args := FinishMapArgs{filename} + reply := FinishMapReply{} + ok := call("Coordinator.FinishMap", &args, &reply) + if !ok { + fmt.Printf("call failed!\n") + } +} + +func CallFinishReduce(index int) { + args := FinishReduceArgs{index} + reply := FinishReduceReply{} + ok := call("Coordinator.FinishReduce", &args, &reply) + if !ok { + fmt.Printf("call failed!\n") + } +} + +func mapTask(mapf func(string, string) []KeyValue, path string, nReduce int) { + file, err := os.Open(path) + filename := filepath.Base(path) + filename = strings.TrimSuffix(filename, filepath.Ext(filename)) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + content, err := io.ReadAll(file) + if err != nil { + log.Fatalf("cannot read %v", filename) + } + file.Close() + kva := mapf(path, string(content)) + // Create intermediate files + intermediate := make([]*os.File, nReduce) + for i := 0; i < nReduce; i++ { + oname := fmt.Sprintf("mr-%v-%v.json", i, filename) + ofile, err := os.Create(oname) + if err != nil { + log.Fatalf("cannot create %v", oname) + } + intermediate[i] = ofile + } + for _, kv := range kva { + i := ihash(kv.Key) % nReduce + encoder := json.NewEncoder(intermediate[i]) + err := encoder.Encode(&kv) + if err != nil { + log.Print(err) + log.Fatalf("cannot encode %v", kv) + } + } + CallFinishMap(path) +} + +func ReduceTask(reducef func(string, []string) string, nReduce int) { + target := fmt.Sprintf("mr-%v-*", nReduce) + filenames, err := filepath.Glob(target) + if err != nil { + log.Fatalf("cannot glob %v", target) + return + } + intermediate := []KeyValue{} + for _, filename := range filenames { + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + decoder := json.NewDecoder(file) + for { + var kv KeyValue + if err := decoder.Decode(&kv); err != nil { + break + } + intermediate = append(intermediate, kv) + } + file.Close() + } + // shuffer + sort.Sort(ByKey(intermediate)) + oname := fmt.Sprintf("mr-out-%v", nReduce) + ofile, _ := os.Create(oname) + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + ofile.Close() + CallFinishReduce(nReduce) +} + +// send an RPC request to the coordinator, wait for the response. +// usually returns true. +// returns false if something goes wrong. +func call(rpcname string, args interface{}, reply interface{}) bool { + // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") + sockname := coordinatorSock() + c, err := rpc.DialHTTP("unix", sockname) + if err != nil { + // log.Fatal("dialing:", err) + os.Exit(0) + } + defer c.Close() + + err = c.Call(rpcname, args, reply) + if err == nil { + return true + } + + fmt.Println(err) + return false +} diff --git a/mrapps/crash.go b/mrapps/crash.go new file mode 100644 index 0000000..5b9bc85 --- /dev/null +++ b/mrapps/crash.go @@ -0,0 +1,55 @@ +package main + +// +// a MapReduce pseudo-application that sometimes crashes, +// and sometimes takes a long time, +// to test MapReduce's ability to recover. +// +// go build -buildmode=plugin crash.go +// + +import "6.5840/mr" +import crand "crypto/rand" +import "math/big" +import "strings" +import "os" +import "sort" +import "strconv" +import "time" + +func maybeCrash() { + max := big.NewInt(1000) + rr, _ := crand.Int(crand.Reader, max) + if rr.Int64() < 330 { + // crash! + os.Exit(1) + } else if rr.Int64() < 660 { + // delay for a while. + maxms := big.NewInt(10 * 1000) + ms, _ := crand.Int(crand.Reader, maxms) + time.Sleep(time.Duration(ms.Int64()) * time.Millisecond) + } +} + +func Map(filename string, contents string) []mr.KeyValue { + maybeCrash() + + kva := []mr.KeyValue{} + kva = append(kva, mr.KeyValue{"a", filename}) + kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))}) + kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))}) + kva = append(kva, mr.KeyValue{"d", "xyzzy"}) + return kva +} + +func Reduce(key string, values []string) string { + maybeCrash() + + // sort values to ensure deterministic output. + vv := make([]string, len(values)) + copy(vv, values) + sort.Strings(vv) + + val := strings.Join(vv, " ") + return val +} diff --git a/mrapps/early_exit.go b/mrapps/early_exit.go new file mode 100644 index 0000000..f050234 --- /dev/null +++ b/mrapps/early_exit.go @@ -0,0 +1,36 @@ +package main + +// +// a word-count application "plugin" for MapReduce. +// +// go build -buildmode=plugin wc_long.go +// + +import ( + "strconv" + "strings" + "time" + + "6.5840/mr" +) + +// The map function is called once for each file of input. +// This map function just returns 1 for each file +func Map(filename string, contents string) []mr.KeyValue { + kva := []mr.KeyValue{} + kva = append(kva, mr.KeyValue{filename, "1"}) + return kva +} + +// The reduce function is called once for each key generated by the +// map tasks, with a list of all the values created for that key by +// any map task. +func Reduce(key string, values []string) string { + // some reduce tasks sleep for a long time; potentially seeing if + // a worker will accidentally exit early + if strings.Contains(key, "sherlock") || strings.Contains(key, "tom") { + time.Sleep(time.Duration(3 * time.Second)) + } + // return the number of occurrences of this file. + return strconv.Itoa(len(values)) +} diff --git a/mrapps/indexer.go b/mrapps/indexer.go new file mode 100644 index 0000000..1fceddf --- /dev/null +++ b/mrapps/indexer.go @@ -0,0 +1,39 @@ +package main + +// +// an indexing application "plugin" for MapReduce. +// +// go build -buildmode=plugin indexer.go +// + +import "fmt" +import "6.5840/mr" + +import "strings" +import "unicode" +import "sort" + +// The mapping function is called once for each piece of the input. +// In this framework, the key is the name of the file that is being processed, +// and the value is the file's contents. The return value should be a slice of +// key/value pairs, each represented by a mr.KeyValue. +func Map(document string, value string) (res []mr.KeyValue) { + m := make(map[string]bool) + words := strings.FieldsFunc(value, func(x rune) bool { return !unicode.IsLetter(x) }) + for _, w := range words { + m[w] = true + } + for w := range m { + kv := mr.KeyValue{w, document} + res = append(res, kv) + } + return +} + +// The reduce function is called once for each key generated by Map, with a +// list of that key's string value (merged across all inputs). The return value +// should be a single output value for that key. +func Reduce(key string, values []string) string { + sort.Strings(values) + return fmt.Sprintf("%d %s", len(values), strings.Join(values, ",")) +} diff --git a/mrapps/jobcount.go b/mrapps/jobcount.go new file mode 100644 index 0000000..3472026 --- /dev/null +++ b/mrapps/jobcount.go @@ -0,0 +1,46 @@ +package main + +// +// a MapReduce pseudo-application that counts the number of times map/reduce +// tasks are run, to test whether jobs are assigned multiple times even when +// there is no failure. +// +// go build -buildmode=plugin crash.go +// + +import "6.5840/mr" +import "math/rand" +import "strings" +import "strconv" +import "time" +import "fmt" +import "os" +import "io/ioutil" + +var count int + +func Map(filename string, contents string) []mr.KeyValue { + me := os.Getpid() + f := fmt.Sprintf("mr-worker-jobcount-%d-%d", me, count) + count++ + err := ioutil.WriteFile(f, []byte("x"), 0666) + if err != nil { + panic(err) + } + time.Sleep(time.Duration(2000+rand.Intn(3000)) * time.Millisecond) + return []mr.KeyValue{mr.KeyValue{"a", "x"}} +} + +func Reduce(key string, values []string) string { + files, err := ioutil.ReadDir(".") + if err != nil { + panic(err) + } + invocations := 0 + for _, f := range files { + if strings.HasPrefix(f.Name(), "mr-worker-jobcount") { + invocations++ + } + } + return strconv.Itoa(invocations) +} diff --git a/mrapps/mtiming.go b/mrapps/mtiming.go new file mode 100644 index 0000000..d838510 --- /dev/null +++ b/mrapps/mtiming.go @@ -0,0 +1,91 @@ +package main + +// +// a MapReduce pseudo-application to test that workers +// execute map tasks in parallel. +// +// go build -buildmode=plugin mtiming.go +// + +import "6.5840/mr" +import "strings" +import "fmt" +import "os" +import "syscall" +import "time" +import "sort" +import "io/ioutil" + +func nparallel(phase string) int { + // create a file so that other workers will see that + // we're running at the same time as them. + pid := os.Getpid() + myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid) + err := ioutil.WriteFile(myfilename, []byte("x"), 0666) + if err != nil { + panic(err) + } + + // are any other workers running? + // find their PIDs by scanning directory for mr-worker-XXX files. + dd, err := os.Open(".") + if err != nil { + panic(err) + } + names, err := dd.Readdirnames(1000000) + if err != nil { + panic(err) + } + ret := 0 + for _, name := range names { + var xpid int + pat := fmt.Sprintf("mr-worker-%s-%%d", phase) + n, err := fmt.Sscanf(name, pat, &xpid) + if n == 1 && err == nil { + err := syscall.Kill(xpid, 0) + if err == nil { + // if err == nil, xpid is alive. + ret += 1 + } + } + } + dd.Close() + + time.Sleep(1 * time.Second) + + err = os.Remove(myfilename) + if err != nil { + panic(err) + } + + return ret +} + +func Map(filename string, contents string) []mr.KeyValue { + t0 := time.Now() + ts := float64(t0.Unix()) + (float64(t0.Nanosecond()) / 1000000000.0) + pid := os.Getpid() + + n := nparallel("map") + + kva := []mr.KeyValue{} + kva = append(kva, mr.KeyValue{ + fmt.Sprintf("times-%v", pid), + fmt.Sprintf("%.1f", ts)}) + kva = append(kva, mr.KeyValue{ + fmt.Sprintf("parallel-%v", pid), + fmt.Sprintf("%d", n)}) + return kva +} + +func Reduce(key string, values []string) string { + //n := nparallel("reduce") + + // sort values to ensure deterministic output. + vv := make([]string, len(values)) + copy(vv, values) + sort.Strings(vv) + + val := strings.Join(vv, " ") + return val +} diff --git a/mrapps/nocrash.go b/mrapps/nocrash.go new file mode 100644 index 0000000..11bbb87 --- /dev/null +++ b/mrapps/nocrash.go @@ -0,0 +1,47 @@ +package main + +// +// same as crash.go but doesn't actually crash. +// +// go build -buildmode=plugin nocrash.go +// + +import "6.5840/mr" +import crand "crypto/rand" +import "math/big" +import "strings" +import "os" +import "sort" +import "strconv" + +func maybeCrash() { + max := big.NewInt(1000) + rr, _ := crand.Int(crand.Reader, max) + if false && rr.Int64() < 500 { + // crash! + os.Exit(1) + } +} + +func Map(filename string, contents string) []mr.KeyValue { + maybeCrash() + + kva := []mr.KeyValue{} + kva = append(kva, mr.KeyValue{"a", filename}) + kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))}) + kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))}) + kva = append(kva, mr.KeyValue{"d", "xyzzy"}) + return kva +} + +func Reduce(key string, values []string) string { + maybeCrash() + + // sort values to ensure deterministic output. + vv := make([]string, len(values)) + copy(vv, values) + sort.Strings(vv) + + val := strings.Join(vv, " ") + return val +} diff --git a/mrapps/rtiming.go b/mrapps/rtiming.go new file mode 100644 index 0000000..635f5e6 --- /dev/null +++ b/mrapps/rtiming.go @@ -0,0 +1,84 @@ +package main + +// +// a MapReduce pseudo-application to test that workers +// execute reduce tasks in parallel. +// +// go build -buildmode=plugin rtiming.go +// + +import "6.5840/mr" +import "fmt" +import "os" +import "syscall" +import "time" +import "io/ioutil" + +func nparallel(phase string) int { + // create a file so that other workers will see that + // we're running at the same time as them. + pid := os.Getpid() + myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid) + err := ioutil.WriteFile(myfilename, []byte("x"), 0666) + if err != nil { + panic(err) + } + + // are any other workers running? + // find their PIDs by scanning directory for mr-worker-XXX files. + dd, err := os.Open(".") + if err != nil { + panic(err) + } + names, err := dd.Readdirnames(1000000) + if err != nil { + panic(err) + } + ret := 0 + for _, name := range names { + var xpid int + pat := fmt.Sprintf("mr-worker-%s-%%d", phase) + n, err := fmt.Sscanf(name, pat, &xpid) + if n == 1 && err == nil { + err := syscall.Kill(xpid, 0) + if err == nil { + // if err == nil, xpid is alive. + ret += 1 + } + } + } + dd.Close() + + time.Sleep(1 * time.Second) + + err = os.Remove(myfilename) + if err != nil { + panic(err) + } + + return ret +} + +func Map(filename string, contents string) []mr.KeyValue { + + kva := []mr.KeyValue{} + kva = append(kva, mr.KeyValue{"a", "1"}) + kva = append(kva, mr.KeyValue{"b", "1"}) + kva = append(kva, mr.KeyValue{"c", "1"}) + kva = append(kva, mr.KeyValue{"d", "1"}) + kva = append(kva, mr.KeyValue{"e", "1"}) + kva = append(kva, mr.KeyValue{"f", "1"}) + kva = append(kva, mr.KeyValue{"g", "1"}) + kva = append(kva, mr.KeyValue{"h", "1"}) + kva = append(kva, mr.KeyValue{"i", "1"}) + kva = append(kva, mr.KeyValue{"j", "1"}) + return kva +} + +func Reduce(key string, values []string) string { + n := nparallel("reduce") + + val := fmt.Sprintf("%d", n) + + return val +} diff --git a/mrapps/wc.go b/mrapps/wc.go new file mode 100644 index 0000000..32ce12e --- /dev/null +++ b/mrapps/wc.go @@ -0,0 +1,40 @@ +package main + +// +// a word-count application "plugin" for MapReduce. +// +// go build -buildmode=plugin wc.go +// + +import "6.5840/mr" +import "unicode" +import "strings" +import "strconv" + +// The map function is called once for each file of input. The first +// argument is the name of the input file, and the second is the +// file's complete contents. You should ignore the input file name, +// and look only at the contents argument. The return value is a slice +// of key/value pairs. +func Map(filename string, contents string) []mr.KeyValue { + // function to detect word separators. + ff := func(r rune) bool { return !unicode.IsLetter(r) } + + // split contents into an array of words. + words := strings.FieldsFunc(contents, ff) + + kva := []mr.KeyValue{} + for _, w := range words { + kv := mr.KeyValue{w, "1"} + kva = append(kva, kv) + } + return kva +} + +// The reduce function is called once for each key generated by the +// map tasks, with a list of all the values created for that key by +// any map task. +func Reduce(key string, values []string) string { + // return the number of occurrences of this word. + return strconv.Itoa(len(values)) +} diff --git a/porcupine/bitset.go b/porcupine/bitset.go new file mode 100644 index 0000000..087744e --- /dev/null +++ b/porcupine/bitset.go @@ -0,0 +1,72 @@ +package porcupine + +import "math/bits" + +type bitset []uint64 + +// data layout: +// bits 0-63 are in data[0], the next are in data[1], etc. + +func newBitset(bits uint) bitset { + extra := uint(0) + if bits%64 != 0 { + extra = 1 + } + chunks := bits/64 + extra + return bitset(make([]uint64, chunks)) +} + +func (b bitset) clone() bitset { + dataCopy := make([]uint64, len(b)) + copy(dataCopy, b) + return bitset(dataCopy) +} + +func bitsetIndex(pos uint) (uint, uint) { + return pos / 64, pos % 64 +} + +func (b bitset) set(pos uint) bitset { + major, minor := bitsetIndex(pos) + b[major] |= (1 << minor) + return b +} + +func (b bitset) clear(pos uint) bitset { + major, minor := bitsetIndex(pos) + b[major] &^= (1 << minor) + return b +} + +func (b bitset) get(pos uint) bool { + major, minor := bitsetIndex(pos) + return b[major]&(1<= 0; i-- { + elem := entries[i] + if elem.kind == returnEntry { + entry := &node{value: elem.value, match: nil, id: elem.id} + match[elem.id] = entry + insertBefore(entry, root) + root = entry + } else { + entry := &node{value: elem.value, match: match[elem.id], id: elem.id} + insertBefore(entry, root) + root = entry + } + } + return root +} + +type cacheEntry struct { + linearized bitset + state interface{} +} + +func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool { + for _, elem := range cache[entry.linearized.hash()] { + if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) { + return true + } + } + return false +} + +type callsEntry struct { + entry *node + state interface{} +} + +func lift(entry *node) { + entry.prev.next = entry.next + entry.next.prev = entry.prev + match := entry.match + match.prev.next = match.next + if match.next != nil { + match.next.prev = match.prev + } +} + +func unlift(entry *node) { + match := entry.match + match.prev.next = match + if match.next != nil { + match.next.prev = match + } + entry.prev.next = entry + entry.next.prev = entry +} + +func checkSingle(model Model, history []entry, computePartial bool, kill *int32) (bool, []*[]int) { + entry := makeLinkedEntries(history) + n := length(entry) / 2 + linearized := newBitset(uint(n)) + cache := make(map[uint64][]cacheEntry) // map from hash to cache entry + var calls []callsEntry + // longest linearizable prefix that includes the given entry + longest := make([]*[]int, n) + + state := model.Init() + headEntry := insertBefore(&node{value: nil, match: nil, id: -1}, entry) + for headEntry.next != nil { + if atomic.LoadInt32(kill) != 0 { + return false, longest + } + if entry.match != nil { + matching := entry.match // the return entry + ok, newState := model.Step(state, entry.value, matching.value) + if ok { + newLinearized := linearized.clone().set(uint(entry.id)) + newCacheEntry := cacheEntry{newLinearized, newState} + if !cacheContains(model, cache, newCacheEntry) { + hash := newLinearized.hash() + cache[hash] = append(cache[hash], newCacheEntry) + calls = append(calls, callsEntry{entry, state}) + state = newState + linearized.set(uint(entry.id)) + lift(entry) + entry = headEntry.next + } else { + entry = entry.next + } + } else { + entry = entry.next + } + } else { + if len(calls) == 0 { + return false, longest + } + // longest + if computePartial { + callsLen := len(calls) + var seq []int = nil + for _, v := range calls { + if longest[v.entry.id] == nil || callsLen > len(*longest[v.entry.id]) { + // create seq lazily + if seq == nil { + seq = make([]int, len(calls)) + for i, v := range calls { + seq[i] = v.entry.id + } + } + longest[v.entry.id] = &seq + } + } + } + callsTop := calls[len(calls)-1] + entry = callsTop.entry + state = callsTop.state + linearized.clear(uint(entry.id)) + calls = calls[:len(calls)-1] + unlift(entry) + entry = entry.next + } + } + // longest linearization is the complete linearization, which is calls + seq := make([]int, len(calls)) + for i, v := range calls { + seq[i] = v.entry.id + } + for i := 0; i < n; i++ { + longest[i] = &seq + } + return true, longest +} + +func fillDefault(model Model) Model { + if model.Partition == nil { + model.Partition = NoPartition + } + if model.PartitionEvent == nil { + model.PartitionEvent = NoPartitionEvent + } + if model.Equal == nil { + model.Equal = ShallowEqual + } + if model.DescribeOperation == nil { + model.DescribeOperation = DefaultDescribeOperation + } + if model.DescribeState == nil { + model.DescribeState = DefaultDescribeState + } + return model +} + +func checkParallel(model Model, history [][]entry, computeInfo bool, timeout time.Duration) (CheckResult, linearizationInfo) { + ok := true + timedOut := false + results := make(chan bool, len(history)) + longest := make([][]*[]int, len(history)) + kill := int32(0) + for i, subhistory := range history { + go func(i int, subhistory []entry) { + ok, l := checkSingle(model, subhistory, computeInfo, &kill) + longest[i] = l + results <- ok + }(i, subhistory) + } + var timeoutChan <-chan time.Time + if timeout > 0 { + timeoutChan = time.After(timeout) + } + count := 0 +loop: + for { + select { + case result := <-results: + count++ + ok = ok && result + if !ok && !computeInfo { + atomic.StoreInt32(&kill, 1) + break loop + } + if count >= len(history) { + break loop + } + case <-timeoutChan: + timedOut = true + atomic.StoreInt32(&kill, 1) + break loop // if we time out, we might get a false positive + } + } + var info linearizationInfo + if computeInfo { + // make sure we've waited for all goroutines to finish, + // otherwise we might race on access to longest[] + for count < len(history) { + <-results + count++ + } + // return longest linearizable prefixes that include each history element + partialLinearizations := make([][][]int, len(history)) + for i := 0; i < len(history); i++ { + var partials [][]int + // turn longest into a set of unique linearizations + set := make(map[*[]int]struct{}) + for _, v := range longest[i] { + if v != nil { + set[v] = struct{}{} + } + } + for k := range set { + arr := make([]int, len(*k)) + for i, v := range *k { + arr[i] = v + } + partials = append(partials, arr) + } + partialLinearizations[i] = partials + } + info.history = history + info.partialLinearizations = partialLinearizations + } + var result CheckResult + if !ok { + result = Illegal + } else { + if timedOut { + result = Unknown + } else { + result = Ok + } + } + return result, info +} + +func checkEvents(model Model, history []Event, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) { + model = fillDefault(model) + partitions := model.PartitionEvent(history) + l := make([][]entry, len(partitions)) + for i, subhistory := range partitions { + l[i] = convertEntries(renumber(subhistory)) + } + return checkParallel(model, l, verbose, timeout) +} + +func checkOperations(model Model, history []Operation, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) { + model = fillDefault(model) + partitions := model.Partition(history) + l := make([][]entry, len(partitions)) + for i, subhistory := range partitions { + l[i] = makeEntries(subhistory) + } + return checkParallel(model, l, verbose, timeout) +} diff --git a/porcupine/model.go b/porcupine/model.go new file mode 100644 index 0000000..ba3d21c --- /dev/null +++ b/porcupine/model.go @@ -0,0 +1,77 @@ +package porcupine + +import "fmt" + +type Operation struct { + ClientId int // optional, unless you want a visualization; zero-indexed + Input interface{} + Call int64 // invocation time + Output interface{} + Return int64 // response time +} + +type EventKind bool + +const ( + CallEvent EventKind = false + ReturnEvent EventKind = true +) + +type Event struct { + ClientId int // optional, unless you want a visualization; zero-indexed + Kind EventKind + Value interface{} + Id int +} + +type Model struct { + // Partition functions, such that a history is linearizable if and only + // if each partition is linearizable. If you don't want to implement + // this, you can always use the `NoPartition` functions implemented + // below. + Partition func(history []Operation) [][]Operation + PartitionEvent func(history []Event) [][]Event + // Initial state of the system. + Init func() interface{} + // Step function for the system. Returns whether or not the system + // could take this step with the given inputs and outputs and also + // returns the new state. This should not mutate the existing state. + Step func(state interface{}, input interface{}, output interface{}) (bool, interface{}) + // Equality on states. If you are using a simple data type for states, + // you can use the `ShallowEqual` function implemented below. + Equal func(state1, state2 interface{}) bool + // For visualization, describe an operation as a string. + // For example, "Get('x') -> 'y'". + DescribeOperation func(input interface{}, output interface{}) string + // For visualization purposes, describe a state as a string. + // For example, "{'x' -> 'y', 'z' -> 'w'}" + DescribeState func(state interface{}) string +} + +func NoPartition(history []Operation) [][]Operation { + return [][]Operation{history} +} + +func NoPartitionEvent(history []Event) [][]Event { + return [][]Event{history} +} + +func ShallowEqual(state1, state2 interface{}) bool { + return state1 == state2 +} + +func DefaultDescribeOperation(input interface{}, output interface{}) string { + return fmt.Sprintf("%v -> %v", input, output) +} + +func DefaultDescribeState(state interface{}) string { + return fmt.Sprintf("%v", state) +} + +type CheckResult string + +const ( + Unknown CheckResult = "Unknown" // timed out + Ok = "Ok" + Illegal = "Illegal" +) diff --git a/porcupine/porcupine.go b/porcupine/porcupine.go new file mode 100644 index 0000000..eb3b0f3 --- /dev/null +++ b/porcupine/porcupine.go @@ -0,0 +1,39 @@ +package porcupine + +import "time" + +func CheckOperations(model Model, history []Operation) bool { + res, _ := checkOperations(model, history, false, 0) + return res == Ok +} + +// timeout = 0 means no timeout +// if this operation times out, then a false positive is possible +func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) CheckResult { + res, _ := checkOperations(model, history, false, timeout) + return res +} + +// timeout = 0 means no timeout +// if this operation times out, then a false positive is possible +func CheckOperationsVerbose(model Model, history []Operation, timeout time.Duration) (CheckResult, linearizationInfo) { + return checkOperations(model, history, true, timeout) +} + +func CheckEvents(model Model, history []Event) bool { + res, _ := checkEvents(model, history, false, 0) + return res == Ok +} + +// timeout = 0 means no timeout +// if this operation times out, then a false positive is possible +func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) CheckResult { + res, _ := checkEvents(model, history, false, timeout) + return res +} + +// timeout = 0 means no timeout +// if this operation times out, then a false positive is possible +func CheckEventsVerbose(model Model, history []Event, timeout time.Duration) (CheckResult, linearizationInfo) { + return checkEvents(model, history, true, timeout) +} diff --git a/porcupine/visualization.go b/porcupine/visualization.go new file mode 100644 index 0000000..43e3a17 --- /dev/null +++ b/porcupine/visualization.go @@ -0,0 +1,897 @@ +package porcupine + +import ( + "encoding/json" + "fmt" + "io" + "os" + "sort" +) + +type historyElement struct { + ClientId int + Start int64 + End int64 + Description string +} + +type linearizationStep struct { + Index int + StateDescription string +} + +type partialLinearization = []linearizationStep + +type partitionVisualizationData struct { + History []historyElement + PartialLinearizations []partialLinearization + Largest map[int]int +} + +type visualizationData = []partitionVisualizationData + +func computeVisualizationData(model Model, info linearizationInfo) visualizationData { + model = fillDefault(model) + data := make(visualizationData, len(info.history)) + for partition := 0; partition < len(info.history); partition++ { + // history + n := len(info.history[partition]) / 2 + history := make([]historyElement, n) + callValue := make(map[int]interface{}) + returnValue := make(map[int]interface{}) + for _, elem := range info.history[partition] { + switch elem.kind { + case callEntry: + history[elem.id].ClientId = elem.clientId + history[elem.id].Start = elem.time + callValue[elem.id] = elem.value + case returnEntry: + history[elem.id].End = elem.time + history[elem.id].Description = model.DescribeOperation(callValue[elem.id], elem.value) + returnValue[elem.id] = elem.value + } + } + // partial linearizations + largestIndex := make(map[int]int) + largestSize := make(map[int]int) + linearizations := make([]partialLinearization, len(info.partialLinearizations[partition])) + partials := info.partialLinearizations[partition] + sort.Slice(partials, func(i, j int) bool { + return len(partials[i]) > len(partials[j]) + }) + for i, partial := range partials { + linearization := make(partialLinearization, len(partial)) + state := model.Init() + for j, histId := range partial { + var ok bool + ok, state = model.Step(state, callValue[histId], returnValue[histId]) + if ok != true { + panic("valid partial linearization returned non-ok result from model step") + } + stateDesc := model.DescribeState(state) + linearization[j] = linearizationStep{histId, stateDesc} + if largestSize[histId] < len(partial) { + largestSize[histId] = len(partial) + largestIndex[histId] = i + } + } + linearizations[i] = linearization + } + data[partition] = partitionVisualizationData{ + History: history, + PartialLinearizations: linearizations, + Largest: largestIndex, + } + } + return data +} + +func Visualize(model Model, info linearizationInfo, output io.Writer) error { + data := computeVisualizationData(model, info) + jsonData, err := json.Marshal(data) + if err != nil { + return err + } + _, err = fmt.Fprintf(output, html, jsonData) + if err != nil { + return err + } + return nil +} + +func VisualizePath(model Model, info linearizationInfo, path string) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + return Visualize(model, info, f) +} + +const html = ` + + + Porcupine + + + +
+ + Clients + + Time + + + + Valid LP + + Invalid LP + [ jump to first error ] + +
+
+
+
+
+ + + +` diff --git a/raft/config.go b/raft/config.go new file mode 100644 index 0000000..c23280c --- /dev/null +++ b/raft/config.go @@ -0,0 +1,648 @@ +package raft + +// +// support for Raft tester. +// +// we will use the original config.go to test your code for grading. +// so, while you can modify this code to help you debug, please +// test with the original before submitting. +// + +import "6.5840/labgob" +import "6.5840/labrpc" +import "bytes" +import "log" +import "sync" +import "sync/atomic" +import "testing" +import "runtime" +import "math/rand" +import crand "crypto/rand" +import "math/big" +import "encoding/base64" +import "time" +import "fmt" + +func randstring(n int) string { + b := make([]byte, 2*n) + crand.Read(b) + s := base64.URLEncoding.EncodeToString(b) + return s[0:n] +} + +func makeSeed() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := crand.Int(crand.Reader, max) + x := bigx.Int64() + return x +} + +type config struct { + mu sync.Mutex + t *testing.T + finished int32 + net *labrpc.Network + n int + rafts []*Raft + applyErr []string // from apply channel readers + connected []bool // whether each server is on the net + saved []*Persister + endnames [][]string // the port file names each sends to + logs []map[int]interface{} // copy of each server's committed entries + lastApplied []int + start time.Time // time at which make_config() was called + // begin()/end() statistics + t0 time.Time // time at which test_test.go called cfg.begin() + rpcs0 int // rpcTotal() at start of test + cmds0 int // number of agreements + bytes0 int64 + maxIndex int + maxIndex0 int +} + +var ncpu_once sync.Once + +func make_config(t *testing.T, n int, unreliable bool, snapshot bool) *config { + ncpu_once.Do(func() { + if runtime.NumCPU() < 2 { + fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") + } + rand.Seed(makeSeed()) + }) + runtime.GOMAXPROCS(4) + cfg := &config{} + cfg.t = t + cfg.net = labrpc.MakeNetwork() + cfg.n = n + cfg.applyErr = make([]string, cfg.n) + cfg.rafts = make([]*Raft, cfg.n) + cfg.connected = make([]bool, cfg.n) + cfg.saved = make([]*Persister, cfg.n) + cfg.endnames = make([][]string, cfg.n) + cfg.logs = make([]map[int]interface{}, cfg.n) + cfg.lastApplied = make([]int, cfg.n) + cfg.start = time.Now() + + cfg.setunreliable(unreliable) + + cfg.net.LongDelays(true) + + applier := cfg.applier + if snapshot { + applier = cfg.applierSnap + } + // create a full set of Rafts. + for i := 0; i < cfg.n; i++ { + cfg.logs[i] = map[int]interface{}{} + cfg.start1(i, applier) + } + + // connect everyone + for i := 0; i < cfg.n; i++ { + cfg.connect(i) + } + + return cfg +} + +// shut down a Raft server but save its persistent state. +func (cfg *config) crash1(i int) { + cfg.disconnect(i) + cfg.net.DeleteServer(i) // disable client connections to the server. + + cfg.mu.Lock() + defer cfg.mu.Unlock() + + // a fresh persister, in case old instance + // continues to update the Persister. + // but copy old persister's content so that we always + // pass Make() the last persisted state. + if cfg.saved[i] != nil { + cfg.saved[i] = cfg.saved[i].Copy() + } + + rf := cfg.rafts[i] + if rf != nil { + cfg.mu.Unlock() + rf.Kill() + cfg.mu.Lock() + cfg.rafts[i] = nil + } + + if cfg.saved[i] != nil { + raftlog := cfg.saved[i].ReadRaftState() + snapshot := cfg.saved[i].ReadSnapshot() + cfg.saved[i] = &Persister{} + cfg.saved[i].Save(raftlog, snapshot) + } +} + +func (cfg *config) checkLogs(i int, m ApplyMsg) (string, bool) { + err_msg := "" + v := m.Command + for j := 0; j < len(cfg.logs); j++ { + if old, oldok := cfg.logs[j][m.CommandIndex]; oldok && old != v { + log.Printf("%v: log %v; server %v\n", i, cfg.logs[i], cfg.logs[j]) + // some server has already committed a different value for this entry! + err_msg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v", + m.CommandIndex, i, m.Command, j, old) + } + } + _, prevok := cfg.logs[i][m.CommandIndex-1] + cfg.logs[i][m.CommandIndex] = v + if m.CommandIndex > cfg.maxIndex { + cfg.maxIndex = m.CommandIndex + } + return err_msg, prevok +} + +// applier reads message from apply ch and checks that they match the log +// contents +func (cfg *config) applier(i int, applyCh chan ApplyMsg) { + for m := range applyCh { + if m.CommandValid == false { + // ignore other types of ApplyMsg + } else { + cfg.mu.Lock() + err_msg, prevok := cfg.checkLogs(i, m) + cfg.mu.Unlock() + if m.CommandIndex > 1 && prevok == false { + err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex) + } + if err_msg != "" { + log.Fatalf("apply error: %v", err_msg) + cfg.applyErr[i] = err_msg + // keep reading after error so that Raft doesn't block + // holding locks... + } + } + } +} + +// returns "" or error string +func (cfg *config) ingestSnap(i int, snapshot []byte, index int) string { + if snapshot == nil { + log.Fatalf("nil snapshot") + return "nil snapshot" + } + r := bytes.NewBuffer(snapshot) + d := labgob.NewDecoder(r) + var lastIncludedIndex int + var xlog []interface{} + if d.Decode(&lastIncludedIndex) != nil || + d.Decode(&xlog) != nil { + log.Fatalf("snapshot decode error") + return "snapshot Decode() error" + } + if index != -1 && index != lastIncludedIndex { + err := fmt.Sprintf("server %v snapshot doesn't match m.SnapshotIndex", i) + return err + } + cfg.logs[i] = map[int]interface{}{} + for j := 0; j < len(xlog); j++ { + cfg.logs[i][j] = xlog[j] + } + cfg.lastApplied[i] = lastIncludedIndex + return "" +} + +const SnapShotInterval = 10 + +// periodically snapshot raft state +func (cfg *config) applierSnap(i int, applyCh chan ApplyMsg) { + cfg.mu.Lock() + rf := cfg.rafts[i] + cfg.mu.Unlock() + if rf == nil { + return // ??? + } + + for m := range applyCh { + err_msg := "" + if m.SnapshotValid { + cfg.mu.Lock() + err_msg = cfg.ingestSnap(i, m.Snapshot, m.SnapshotIndex) + cfg.mu.Unlock() + } else if m.CommandValid { + if m.CommandIndex != cfg.lastApplied[i]+1 { + err_msg = fmt.Sprintf("server %v apply out of order, expected index %v, got %v", i, cfg.lastApplied[i]+1, m.CommandIndex) + } + + if err_msg == "" { + cfg.mu.Lock() + var prevok bool + err_msg, prevok = cfg.checkLogs(i, m) + cfg.mu.Unlock() + if m.CommandIndex > 1 && prevok == false { + err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex) + } + } + + cfg.mu.Lock() + cfg.lastApplied[i] = m.CommandIndex + cfg.mu.Unlock() + + if (m.CommandIndex+1)%SnapShotInterval == 0 { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(m.CommandIndex) + var xlog []interface{} + for j := 0; j <= m.CommandIndex; j++ { + xlog = append(xlog, cfg.logs[i][j]) + } + e.Encode(xlog) + rf.Snapshot(m.CommandIndex, w.Bytes()) + } + } else { + // Ignore other types of ApplyMsg. + } + if err_msg != "" { + log.Fatalf("apply error: %v", err_msg) + cfg.applyErr[i] = err_msg + // keep reading after error so that Raft doesn't block + // holding locks... + } + } +} + +// start or re-start a Raft. +// if one already exists, "kill" it first. +// allocate new outgoing port file names, and a new +// state persister, to isolate previous instance of +// this server. since we cannot really kill it. +func (cfg *config) start1(i int, applier func(int, chan ApplyMsg)) { + cfg.crash1(i) + + // a fresh set of outgoing ClientEnd names. + // so that old crashed instance's ClientEnds can't send. + cfg.endnames[i] = make([]string, cfg.n) + for j := 0; j < cfg.n; j++ { + cfg.endnames[i][j] = randstring(20) + } + + // a fresh set of ClientEnds. + ends := make([]*labrpc.ClientEnd, cfg.n) + for j := 0; j < cfg.n; j++ { + ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) + cfg.net.Connect(cfg.endnames[i][j], j) + } + + cfg.mu.Lock() + + cfg.lastApplied[i] = 0 + + // a fresh persister, so old instance doesn't overwrite + // new instance's persisted state. + // but copy old persister's content so that we always + // pass Make() the last persisted state. + if cfg.saved[i] != nil { + cfg.saved[i] = cfg.saved[i].Copy() + + snapshot := cfg.saved[i].ReadSnapshot() + if snapshot != nil && len(snapshot) > 0 { + // mimic KV server and process snapshot now. + // ideally Raft should send it up on applyCh... + err := cfg.ingestSnap(i, snapshot, -1) + if err != "" { + cfg.t.Fatal(err) + } + } + } else { + cfg.saved[i] = MakePersister() + } + + cfg.mu.Unlock() + + applyCh := make(chan ApplyMsg) + + rf := Make(ends, i, cfg.saved[i], applyCh) + + cfg.mu.Lock() + cfg.rafts[i] = rf + cfg.mu.Unlock() + + go applier(i, applyCh) + + svc := labrpc.MakeService(rf) + srv := labrpc.MakeServer() + srv.AddService(svc) + cfg.net.AddServer(i, srv) +} + +func (cfg *config) checkTimeout() { + // enforce a two minute real-time limit on each test + if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { + cfg.t.Fatal("test took longer than 120 seconds") + } +} + +func (cfg *config) checkFinished() bool { + z := atomic.LoadInt32(&cfg.finished) + return z != 0 +} + +func (cfg *config) cleanup() { + atomic.StoreInt32(&cfg.finished, 1) + for i := 0; i < len(cfg.rafts); i++ { + if cfg.rafts[i] != nil { + cfg.rafts[i].Kill() + } + } + cfg.net.Cleanup() + cfg.checkTimeout() +} + +// attach server i to the net. +func (cfg *config) connect(i int) { + // fmt.Printf("connect(%d)\n", i) + + cfg.connected[i] = true + + // outgoing ClientEnds + for j := 0; j < cfg.n; j++ { + if cfg.connected[j] { + endname := cfg.endnames[i][j] + cfg.net.Enable(endname, true) + } + } + + // incoming ClientEnds + for j := 0; j < cfg.n; j++ { + if cfg.connected[j] { + endname := cfg.endnames[j][i] + cfg.net.Enable(endname, true) + } + } +} + +// detach server i from the net. +func (cfg *config) disconnect(i int) { + // fmt.Printf("disconnect(%d)\n", i) + + cfg.connected[i] = false + + // outgoing ClientEnds + for j := 0; j < cfg.n; j++ { + if cfg.endnames[i] != nil { + endname := cfg.endnames[i][j] + cfg.net.Enable(endname, false) + } + } + + // incoming ClientEnds + for j := 0; j < cfg.n; j++ { + if cfg.endnames[j] != nil { + endname := cfg.endnames[j][i] + cfg.net.Enable(endname, false) + } + } +} + +func (cfg *config) rpcCount(server int) int { + return cfg.net.GetCount(server) +} + +func (cfg *config) rpcTotal() int { + return cfg.net.GetTotalCount() +} + +func (cfg *config) setunreliable(unrel bool) { + cfg.net.Reliable(!unrel) +} + +func (cfg *config) bytesTotal() int64 { + return cfg.net.GetTotalBytes() +} + +func (cfg *config) setlongreordering(longrel bool) { + cfg.net.LongReordering(longrel) +} + +// check that one of the connected servers thinks +// it is the leader, and that no other connected +// server thinks otherwise. +// +// try a few times in case re-elections are needed. +func (cfg *config) checkOneLeader() int { + for iters := 0; iters < 10; iters++ { + ms := 450 + (rand.Int63() % 100) + time.Sleep(time.Duration(ms) * time.Millisecond) + + leaders := make(map[int][]int) + for i := 0; i < cfg.n; i++ { + if cfg.connected[i] { + if term, leader := cfg.rafts[i].GetState(); leader { + leaders[term] = append(leaders[term], i) + } + } + } + + lastTermWithLeader := -1 + for term, leaders := range leaders { + if len(leaders) > 1 { + cfg.t.Fatalf("term %d has %d (>1) leaders", term, len(leaders)) + } + if term > lastTermWithLeader { + lastTermWithLeader = term + } + } + + if len(leaders) != 0 { + return leaders[lastTermWithLeader][0] + } + } + cfg.t.Fatalf("expected one leader, got none") + return -1 +} + +// check that everyone agrees on the term. +func (cfg *config) checkTerms() int { + term := -1 + for i := 0; i < cfg.n; i++ { + if cfg.connected[i] { + xterm, _ := cfg.rafts[i].GetState() + if term == -1 { + term = xterm + } else if term != xterm { + cfg.t.Fatalf("servers disagree on term") + } + } + } + return term +} + +// check that none of the connected servers +// thinks it is the leader. +func (cfg *config) checkNoLeader() { + for i := 0; i < cfg.n; i++ { + if cfg.connected[i] { + _, is_leader := cfg.rafts[i].GetState() + if is_leader { + cfg.t.Fatalf("expected no leader among connected servers, but %v claims to be leader", i) + } + } + } +} + +// how many servers think a log entry is committed? +func (cfg *config) nCommitted(index int) (int, interface{}) { + count := 0 + var cmd interface{} = nil + for i := 0; i < len(cfg.rafts); i++ { + if cfg.applyErr[i] != "" { + cfg.t.Fatal(cfg.applyErr[i]) + } + + cfg.mu.Lock() + cmd1, ok := cfg.logs[i][index] + cfg.mu.Unlock() + + if ok { + if count > 0 && cmd != cmd1 { + cfg.t.Fatalf("committed values do not match: index %v, %v, %v", + index, cmd, cmd1) + } + count += 1 + cmd = cmd1 + } + } + return count, cmd +} + +// wait for at least n servers to commit. +// but don't wait forever. +func (cfg *config) wait(index int, n int, startTerm int) interface{} { + to := 10 * time.Millisecond + for iters := 0; iters < 30; iters++ { + nd, _ := cfg.nCommitted(index) + if nd >= n { + break + } + time.Sleep(to) + if to < time.Second { + to *= 2 + } + if startTerm > -1 { + for _, r := range cfg.rafts { + if t, _ := r.GetState(); t > startTerm { + // someone has moved on + // can no longer guarantee that we'll "win" + return -1 + } + } + } + } + nd, cmd := cfg.nCommitted(index) + if nd < n { + cfg.t.Fatalf("only %d decided for index %d; wanted %d", + nd, index, n) + } + return cmd +} + +// do a complete agreement. +// it might choose the wrong leader initially, +// and have to re-submit after giving up. +// entirely gives up after about 10 seconds. +// indirectly checks that the servers agree on the +// same value, since nCommitted() checks this, +// as do the threads that read from applyCh. +// returns index. +// if retry==true, may submit the command multiple +// times, in case a leader fails just after Start(). +// if retry==false, calls Start() only once, in order +// to simplify the early Lab 3B tests. +func (cfg *config) one(cmd interface{}, expectedServers int, retry bool) int { + t0 := time.Now() + starts := 0 + for time.Since(t0).Seconds() < 10 && cfg.checkFinished() == false { + // try all the servers, maybe one is the leader. + index := -1 + for si := 0; si < cfg.n; si++ { + starts = (starts + 1) % cfg.n + var rf *Raft + cfg.mu.Lock() + if cfg.connected[starts] { + rf = cfg.rafts[starts] + } + cfg.mu.Unlock() + if rf != nil { + index1, _, ok := rf.Start(cmd) + if ok { + index = index1 + break + } + } + } + + if index != -1 { + // somebody claimed to be the leader and to have + // submitted our command; wait a while for agreement. + t1 := time.Now() + for time.Since(t1).Seconds() < 2 { + nd, cmd1 := cfg.nCommitted(index) + if nd > 0 && nd >= expectedServers { + // committed + if cmd1 == cmd { + // and it was the command we submitted. + return index + } + } + time.Sleep(20 * time.Millisecond) + } + if retry == false { + cfg.t.Fatalf("one(%v) failed to reach agreement", cmd) + } + } else { + time.Sleep(50 * time.Millisecond) + } + } + if cfg.checkFinished() == false { + cfg.t.Fatalf("one(%v) failed to reach agreement", cmd) + } + return -1 +} + +// start a Test. +// print the Test message. +// e.g. cfg.begin("Test (3B): RPC counts aren't too high") +func (cfg *config) begin(description string) { + fmt.Printf("%s ...\n", description) + cfg.t0 = time.Now() + cfg.rpcs0 = cfg.rpcTotal() + cfg.bytes0 = cfg.bytesTotal() + cfg.cmds0 = 0 + cfg.maxIndex0 = cfg.maxIndex +} + +// end a Test -- the fact that we got here means there +// was no failure. +// print the Passed message, +// and some performance numbers. +func (cfg *config) end() { + cfg.checkTimeout() + if cfg.t.Failed() == false { + cfg.mu.Lock() + t := time.Since(cfg.t0).Seconds() // real time + npeers := cfg.n // number of Raft peers + nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends + nbytes := cfg.bytesTotal() - cfg.bytes0 // number of bytes + ncmds := cfg.maxIndex - cfg.maxIndex0 // number of Raft agreements reported + cfg.mu.Unlock() + + fmt.Printf(" ... Passed --") + fmt.Printf(" %4.1f %d %4d %7d %4d\n", t, npeers, nrpc, nbytes, ncmds) + } +} + +// Maximum log size across all servers +func (cfg *config) LogSize() int { + logsize := 0 + for i := 0; i < cfg.n; i++ { + n := cfg.saved[i].RaftStateSize() + if n > logsize { + logsize = n + } + } + return logsize +} diff --git a/raft/persister.go b/raft/persister.go new file mode 100644 index 0000000..c5f816c --- /dev/null +++ b/raft/persister.go @@ -0,0 +1,70 @@ +package raft + +// +// support for Raft and kvraft to save persistent +// Raft state (log &c) and k/v server snapshots. +// +// we will use the original persister.go to test your code for grading. +// so, while you can modify this code to help you debug, please +// test with the original before submitting. +// + +import "sync" + +type Persister struct { + mu sync.Mutex + raftstate []byte + snapshot []byte +} + +func MakePersister() *Persister { + return &Persister{} +} + +func clone(orig []byte) []byte { + x := make([]byte, len(orig)) + copy(x, orig) + return x +} + +func (ps *Persister) Copy() *Persister { + ps.mu.Lock() + defer ps.mu.Unlock() + np := MakePersister() + np.raftstate = ps.raftstate + np.snapshot = ps.snapshot + return np +} + +func (ps *Persister) ReadRaftState() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.raftstate) +} + +func (ps *Persister) RaftStateSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.raftstate) +} + +// Save both Raft state and K/V snapshot as a single atomic action, +// to help avoid them getting out of sync. +func (ps *Persister) Save(raftstate []byte, snapshot []byte) { + ps.mu.Lock() + defer ps.mu.Unlock() + ps.raftstate = clone(raftstate) + ps.snapshot = clone(snapshot) +} + +func (ps *Persister) ReadSnapshot() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.snapshot) +} + +func (ps *Persister) SnapshotSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.snapshot) +} diff --git a/raft/raft.go b/raft/raft.go new file mode 100644 index 0000000..264d77e --- /dev/null +++ b/raft/raft.go @@ -0,0 +1,259 @@ +package raft + +// +// this is an outline of the API that raft must expose to +// the service (or tester). see comments below for +// each of these functions for more details. +// +// rf = Make(...) +// create a new Raft server. +// rf.Start(command interface{}) (index, term, isleader) +// start agreement on a new log entry +// rf.GetState() (term, isLeader) +// ask a Raft for its current term, and whether it thinks it is leader +// ApplyMsg +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester) +// in the same server. +// + +import ( + // "bytes" + "math/rand" + "sync" + "sync/atomic" + "time" + + // "6.5840/labgob" + "6.5840/labrpc" +) + + +// as each Raft peer becomes aware that successive log entries are +// committed, the peer should send an ApplyMsg to the service (or +// tester) on the same server, via the applyCh passed to Make(). set +// CommandValid to true to indicate that the ApplyMsg contains a newly +// committed log entry. +// +// in part 3D you'll want to send other kinds of messages (e.g., +// snapshots) on the applyCh, but set CommandValid to false for these +// other uses. +type ApplyMsg struct { + CommandValid bool + Command interface{} + CommandIndex int + + // For 3D: + SnapshotValid bool + Snapshot []byte + SnapshotTerm int + SnapshotIndex int +} + +// A Go object implementing a single Raft peer. +type Raft struct { + mu sync.Mutex // Lock to protect shared access to this peer's state + peers []*labrpc.ClientEnd // RPC end points of all peers + persister *Persister // Object to hold this peer's persisted state + me int // this peer's index into peers[] + dead int32 // set by Kill() + + // Your data here (3A, 3B, 3C). + // Look at the paper's Figure 2 for a description of what + // state a Raft server must maintain. + +} + +// return currentTerm and whether this server +// believes it is the leader. +func (rf *Raft) GetState() (int, bool) { + + var term int + var isleader bool + // Your code here (3A). + return term, isleader +} + +// save Raft's persistent state to stable storage, +// where it can later be retrieved after a crash and restart. +// see paper's Figure 2 for a description of what should be persistent. +// before you've implemented snapshots, you should pass nil as the +// second argument to persister.Save(). +// after you've implemented snapshots, pass the current snapshot +// (or nil if there's not yet a snapshot). +func (rf *Raft) persist() { + // Your code here (3C). + // Example: + // w := new(bytes.Buffer) + // e := labgob.NewEncoder(w) + // e.Encode(rf.xxx) + // e.Encode(rf.yyy) + // raftstate := w.Bytes() + // rf.persister.Save(raftstate, nil) +} + + +// restore previously persisted state. +func (rf *Raft) readPersist(data []byte) { + if data == nil || len(data) < 1 { // bootstrap without any state? + return + } + // Your code here (3C). + // Example: + // r := bytes.NewBuffer(data) + // d := labgob.NewDecoder(r) + // var xxx + // var yyy + // if d.Decode(&xxx) != nil || + // d.Decode(&yyy) != nil { + // error... + // } else { + // rf.xxx = xxx + // rf.yyy = yyy + // } +} + + +// the service says it has created a snapshot that has +// all info up to and including index. this means the +// service no longer needs the log through (and including) +// that index. Raft should now trim its log as much as possible. +func (rf *Raft) Snapshot(index int, snapshot []byte) { + // Your code here (3D). + +} + + +// example RequestVote RPC arguments structure. +// field names must start with capital letters! +type RequestVoteArgs struct { + // Your data here (3A, 3B). +} + +// example RequestVote RPC reply structure. +// field names must start with capital letters! +type RequestVoteReply struct { + // Your data here (3A). +} + +// example RequestVote RPC handler. +func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { + // Your code here (3A, 3B). +} + +// example code to send a RequestVote RPC to a server. +// server is the index of the target server in rf.peers[]. +// expects RPC arguments in args. +// fills in *reply with RPC reply, so caller should +// pass &reply. +// the types of the args and reply passed to Call() must be +// the same as the types of the arguments declared in the +// handler function (including whether they are pointers). +// +// The labrpc package simulates a lossy network, in which servers +// may be unreachable, and in which requests and replies may be lost. +// Call() sends a request and waits for a reply. If a reply arrives +// within a timeout interval, Call() returns true; otherwise +// Call() returns false. Thus Call() may not return for a while. +// A false return can be caused by a dead server, a live server that +// can't be reached, a lost request, or a lost reply. +// +// Call() is guaranteed to return (perhaps after a delay) *except* if the +// handler function on the server side does not return. Thus there +// is no need to implement your own timeouts around Call(). +// +// look at the comments in ../labrpc/labrpc.go for more details. +// +// if you're having trouble getting RPC to work, check that you've +// capitalized all field names in structs passed over RPC, and +// that the caller passes the address of the reply struct with &, not +// the struct itself. +func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply) bool { + ok := rf.peers[server].Call("Raft.RequestVote", args, reply) + return ok +} + + +// the service using Raft (e.g. a k/v server) wants to start +// agreement on the next command to be appended to Raft's log. if this +// server isn't the leader, returns false. otherwise start the +// agreement and return immediately. there is no guarantee that this +// command will ever be committed to the Raft log, since the leader +// may fail or lose an election. even if the Raft instance has been killed, +// this function should return gracefully. +// +// the first return value is the index that the command will appear at +// if it's ever committed. the second return value is the current +// term. the third return value is true if this server believes it is +// the leader. +func (rf *Raft) Start(command interface{}) (int, int, bool) { + index := -1 + term := -1 + isLeader := true + + // Your code here (3B). + + + return index, term, isLeader +} + +// the tester doesn't halt goroutines created by Raft after each test, +// but it does call the Kill() method. your code can use killed() to +// check whether Kill() has been called. the use of atomic avoids the +// need for a lock. +// +// the issue is that long-running goroutines use memory and may chew +// up CPU time, perhaps causing later tests to fail and generating +// confusing debug output. any goroutine with a long-running loop +// should call killed() to check whether it should stop. +func (rf *Raft) Kill() { + atomic.StoreInt32(&rf.dead, 1) + // Your code here, if desired. +} + +func (rf *Raft) killed() bool { + z := atomic.LoadInt32(&rf.dead) + return z == 1 +} + +func (rf *Raft) ticker() { + for rf.killed() == false { + + // Your code here (3A) + // Check if a leader election should be started. + + + // pause for a random amount of time between 50 and 350 + // milliseconds. + ms := 50 + (rand.Int63() % 300) + time.Sleep(time.Duration(ms) * time.Millisecond) + } +} + +// the service or tester wants to create a Raft server. the ports +// of all the Raft servers (including this one) are in peers[]. this +// server's port is peers[me]. all the servers' peers[] arrays +// have the same order. persister is a place for this server to +// save its persistent state, and also initially holds the most +// recent saved state, if any. applyCh is a channel on which the +// tester or service expects Raft to send ApplyMsg messages. +// Make() must return quickly, so it should start goroutines +// for any long-running work. +func Make(peers []*labrpc.ClientEnd, me int, + persister *Persister, applyCh chan ApplyMsg) *Raft { + rf := &Raft{} + rf.peers = peers + rf.persister = persister + rf.me = me + + // Your initialization code here (3A, 3B, 3C). + + // initialize from state persisted before a crash + rf.readPersist(persister.ReadRaftState()) + + // start ticker goroutine to start elections + go rf.ticker() + + + return rf +} diff --git a/raft/test_test.go b/raft/test_test.go new file mode 100644 index 0000000..6eccdaa --- /dev/null +++ b/raft/test_test.go @@ -0,0 +1,1270 @@ +package raft + +// +// Raft tests. +// +// we will use the original test_test.go to test your code for grading. +// so, while you can modify this code to help you debug, please +// test with the original before submitting. +// + +import "testing" +import "fmt" +import "time" +import "math/rand" +import "sync/atomic" +import "sync" + +// The tester generously allows solutions to complete elections in one second +// (much more than the paper's range of timeouts). +const RaftElectionTimeout = 1000 * time.Millisecond + +func TestInitialElection3A(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3A): initial election") + + // is a leader elected? + cfg.checkOneLeader() + + // sleep a bit to avoid racing with followers learning of the + // election, then check that all peers agree on the term. + time.Sleep(50 * time.Millisecond) + term1 := cfg.checkTerms() + if term1 < 1 { + t.Fatalf("term is %v, but should be at least 1", term1) + } + + // does the leader+term stay the same if there is no network failure? + time.Sleep(2 * RaftElectionTimeout) + term2 := cfg.checkTerms() + if term1 != term2 { + fmt.Printf("warning: term changed even though there were no failures") + } + + // there should still be a leader. + cfg.checkOneLeader() + + cfg.end() +} + +func TestReElection3A(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3A): election after network failure") + + leader1 := cfg.checkOneLeader() + + // if the leader disconnects, a new one should be elected. + cfg.disconnect(leader1) + cfg.checkOneLeader() + + // if the old leader rejoins, that shouldn't + // disturb the new leader. and the old leader + // should switch to follower. + cfg.connect(leader1) + leader2 := cfg.checkOneLeader() + + // if there's no quorum, no new leader should + // be elected. + cfg.disconnect(leader2) + cfg.disconnect((leader2 + 1) % servers) + time.Sleep(2 * RaftElectionTimeout) + + // check that the one connected server + // does not think it is the leader. + cfg.checkNoLeader() + + // if a quorum arises, it should elect a leader. + cfg.connect((leader2 + 1) % servers) + cfg.checkOneLeader() + + // re-join of last node shouldn't prevent leader from existing. + cfg.connect(leader2) + cfg.checkOneLeader() + + cfg.end() +} + +func TestManyElections3A(t *testing.T) { + servers := 7 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3A): multiple elections") + + cfg.checkOneLeader() + + iters := 10 + for ii := 1; ii < iters; ii++ { + // disconnect three nodes + i1 := rand.Int() % servers + i2 := rand.Int() % servers + i3 := rand.Int() % servers + cfg.disconnect(i1) + cfg.disconnect(i2) + cfg.disconnect(i3) + + // either the current leader should still be alive, + // or the remaining four should elect a new one. + cfg.checkOneLeader() + + cfg.connect(i1) + cfg.connect(i2) + cfg.connect(i3) + } + + cfg.checkOneLeader() + + cfg.end() +} + +func TestBasicAgree3B(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): basic agreement") + + iters := 3 + for index := 1; index < iters+1; index++ { + nd, _ := cfg.nCommitted(index) + if nd > 0 { + t.Fatalf("some have committed before Start()") + } + + xindex := cfg.one(index*100, servers, false) + if xindex != index { + t.Fatalf("got index %v but expected %v", xindex, index) + } + } + + cfg.end() +} + +// check, based on counting bytes of RPCs, that +// each command is sent to each peer just once. +func TestRPCBytes3B(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): RPC byte count") + + cfg.one(99, servers, false) + bytes0 := cfg.bytesTotal() + + iters := 10 + var sent int64 = 0 + for index := 2; index < iters+2; index++ { + cmd := randstring(5000) + xindex := cfg.one(cmd, servers, false) + if xindex != index { + t.Fatalf("got index %v but expected %v", xindex, index) + } + sent += int64(len(cmd)) + } + + bytes1 := cfg.bytesTotal() + got := bytes1 - bytes0 + expected := int64(servers) * sent + if got > expected+50000 { + t.Fatalf("too many RPC bytes; got %v, expected %v", got, expected) + } + + cfg.end() +} + +// test just failure of followers. +func TestFollowerFailure3B(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): test progressive failure of followers") + + cfg.one(101, servers, false) + + // disconnect one follower from the network. + leader1 := cfg.checkOneLeader() + cfg.disconnect((leader1 + 1) % servers) + + // the leader and remaining follower should be + // able to agree despite the disconnected follower. + cfg.one(102, servers-1, false) + time.Sleep(RaftElectionTimeout) + cfg.one(103, servers-1, false) + + // disconnect the remaining follower + leader2 := cfg.checkOneLeader() + cfg.disconnect((leader2 + 1) % servers) + cfg.disconnect((leader2 + 2) % servers) + + // submit a command. + index, _, ok := cfg.rafts[leader2].Start(104) + if ok != true { + t.Fatalf("leader rejected Start()") + } + if index != 4 { + t.Fatalf("expected index 4, got %v", index) + } + + time.Sleep(2 * RaftElectionTimeout) + + // check that command 104 did not commit. + n, _ := cfg.nCommitted(index) + if n > 0 { + t.Fatalf("%v committed but no majority", n) + } + + cfg.end() +} + +// test just failure of leaders. +func TestLeaderFailure3B(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): test failure of leaders") + + cfg.one(101, servers, false) + + // disconnect the first leader. + leader1 := cfg.checkOneLeader() + cfg.disconnect(leader1) + + // the remaining followers should elect + // a new leader. + cfg.one(102, servers-1, false) + time.Sleep(RaftElectionTimeout) + cfg.one(103, servers-1, false) + + // disconnect the new leader. + leader2 := cfg.checkOneLeader() + cfg.disconnect(leader2) + + // submit a command to each server. + for i := 0; i < servers; i++ { + cfg.rafts[i].Start(104) + } + + time.Sleep(2 * RaftElectionTimeout) + + // check that command 104 did not commit. + n, _ := cfg.nCommitted(4) + if n > 0 { + t.Fatalf("%v committed but no majority", n) + } + + cfg.end() +} + +// test that a follower participates after +// disconnect and re-connect. +func TestFailAgree3B(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): agreement after follower reconnects") + + cfg.one(101, servers, false) + + // disconnect one follower from the network. + leader := cfg.checkOneLeader() + cfg.disconnect((leader + 1) % servers) + + // the leader and remaining follower should be + // able to agree despite the disconnected follower. + cfg.one(102, servers-1, false) + cfg.one(103, servers-1, false) + time.Sleep(RaftElectionTimeout) + cfg.one(104, servers-1, false) + cfg.one(105, servers-1, false) + + // re-connect + cfg.connect((leader + 1) % servers) + + // the full set of servers should preserve + // previous agreements, and be able to agree + // on new commands. + cfg.one(106, servers, true) + time.Sleep(RaftElectionTimeout) + cfg.one(107, servers, true) + + cfg.end() +} + +func TestFailNoAgree3B(t *testing.T) { + servers := 5 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): no agreement if too many followers disconnect") + + cfg.one(10, servers, false) + + // 3 of 5 followers disconnect + leader := cfg.checkOneLeader() + cfg.disconnect((leader + 1) % servers) + cfg.disconnect((leader + 2) % servers) + cfg.disconnect((leader + 3) % servers) + + index, _, ok := cfg.rafts[leader].Start(20) + if ok != true { + t.Fatalf("leader rejected Start()") + } + if index != 2 { + t.Fatalf("expected index 2, got %v", index) + } + + time.Sleep(2 * RaftElectionTimeout) + + n, _ := cfg.nCommitted(index) + if n > 0 { + t.Fatalf("%v committed but no majority", n) + } + + // repair + cfg.connect((leader + 1) % servers) + cfg.connect((leader + 2) % servers) + cfg.connect((leader + 3) % servers) + + // the disconnected majority may have chosen a leader from + // among their own ranks, forgetting index 2. + leader2 := cfg.checkOneLeader() + index2, _, ok2 := cfg.rafts[leader2].Start(30) + if ok2 == false { + t.Fatalf("leader2 rejected Start()") + } + if index2 < 2 || index2 > 3 { + t.Fatalf("unexpected index %v", index2) + } + + cfg.one(1000, servers, true) + + cfg.end() +} + +func TestConcurrentStarts3B(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): concurrent Start()s") + + var success bool +loop: + for try := 0; try < 5; try++ { + if try > 0 { + // give solution some time to settle + time.Sleep(3 * time.Second) + } + + leader := cfg.checkOneLeader() + _, term, ok := cfg.rafts[leader].Start(1) + if !ok { + // leader moved on really quickly + continue + } + + iters := 5 + var wg sync.WaitGroup + is := make(chan int, iters) + for ii := 0; ii < iters; ii++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + i, term1, ok := cfg.rafts[leader].Start(100 + i) + if term1 != term { + return + } + if ok != true { + return + } + is <- i + }(ii) + } + + wg.Wait() + close(is) + + for j := 0; j < servers; j++ { + if t, _ := cfg.rafts[j].GetState(); t != term { + // term changed -- can't expect low RPC counts + continue loop + } + } + + failed := false + cmds := []int{} + for index := range is { + cmd := cfg.wait(index, servers, term) + if ix, ok := cmd.(int); ok { + if ix == -1 { + // peers have moved on to later terms + // so we can't expect all Start()s to + // have succeeded + failed = true + break + } + cmds = append(cmds, ix) + } else { + t.Fatalf("value %v is not an int", cmd) + } + } + + if failed { + // avoid leaking goroutines + go func() { + for range is { + } + }() + continue + } + + for ii := 0; ii < iters; ii++ { + x := 100 + ii + ok := false + for j := 0; j < len(cmds); j++ { + if cmds[j] == x { + ok = true + } + } + if ok == false { + t.Fatalf("cmd %v missing in %v", x, cmds) + } + } + + success = true + break + } + + if !success { + t.Fatalf("term changed too often") + } + + cfg.end() +} + +func TestRejoin3B(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): rejoin of partitioned leader") + + cfg.one(101, servers, true) + + // leader network failure + leader1 := cfg.checkOneLeader() + cfg.disconnect(leader1) + + // make old leader try to agree on some entries + cfg.rafts[leader1].Start(102) + cfg.rafts[leader1].Start(103) + cfg.rafts[leader1].Start(104) + + // new leader commits, also for index=2 + cfg.one(103, 2, true) + + // new leader network failure + leader2 := cfg.checkOneLeader() + cfg.disconnect(leader2) + + // old leader connected again + cfg.connect(leader1) + + cfg.one(104, 2, true) + + // all together now + cfg.connect(leader2) + + cfg.one(105, servers, true) + + cfg.end() +} + +func TestBackup3B(t *testing.T) { + servers := 5 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): leader backs up quickly over incorrect follower logs") + + cfg.one(rand.Int(), servers, true) + + // put leader and one follower in a partition + leader1 := cfg.checkOneLeader() + cfg.disconnect((leader1 + 2) % servers) + cfg.disconnect((leader1 + 3) % servers) + cfg.disconnect((leader1 + 4) % servers) + + // submit lots of commands that won't commit + for i := 0; i < 50; i++ { + cfg.rafts[leader1].Start(rand.Int()) + } + + time.Sleep(RaftElectionTimeout / 2) + + cfg.disconnect((leader1 + 0) % servers) + cfg.disconnect((leader1 + 1) % servers) + + // allow other partition to recover + cfg.connect((leader1 + 2) % servers) + cfg.connect((leader1 + 3) % servers) + cfg.connect((leader1 + 4) % servers) + + // lots of successful commands to new group. + for i := 0; i < 50; i++ { + cfg.one(rand.Int(), 3, true) + } + + // now another partitioned leader and one follower + leader2 := cfg.checkOneLeader() + other := (leader1 + 2) % servers + if leader2 == other { + other = (leader2 + 1) % servers + } + cfg.disconnect(other) + + // lots more commands that won't commit + for i := 0; i < 50; i++ { + cfg.rafts[leader2].Start(rand.Int()) + } + + time.Sleep(RaftElectionTimeout / 2) + + // bring original leader back to life, + for i := 0; i < servers; i++ { + cfg.disconnect(i) + } + cfg.connect((leader1 + 0) % servers) + cfg.connect((leader1 + 1) % servers) + cfg.connect(other) + + // lots of successful commands to new group. + for i := 0; i < 50; i++ { + cfg.one(rand.Int(), 3, true) + } + + // now everyone + for i := 0; i < servers; i++ { + cfg.connect(i) + } + cfg.one(rand.Int(), servers, true) + + cfg.end() +} + +func TestCount3B(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3B): RPC counts aren't too high") + + rpcs := func() (n int) { + for j := 0; j < servers; j++ { + n += cfg.rpcCount(j) + } + return + } + + leader := cfg.checkOneLeader() + + total1 := rpcs() + + if total1 > 30 || total1 < 1 { + t.Fatalf("too many or few RPCs (%v) to elect initial leader\n", total1) + } + + var total2 int + var success bool +loop: + for try := 0; try < 5; try++ { + if try > 0 { + // give solution some time to settle + time.Sleep(3 * time.Second) + } + + leader = cfg.checkOneLeader() + total1 = rpcs() + + iters := 10 + starti, term, ok := cfg.rafts[leader].Start(1) + if !ok { + // leader moved on really quickly + continue + } + cmds := []int{} + for i := 1; i < iters+2; i++ { + x := int(rand.Int31()) + cmds = append(cmds, x) + index1, term1, ok := cfg.rafts[leader].Start(x) + if term1 != term { + // Term changed while starting + continue loop + } + if !ok { + // No longer the leader, so term has changed + continue loop + } + if starti+i != index1 { + t.Fatalf("Start() failed") + } + } + + for i := 1; i < iters+1; i++ { + cmd := cfg.wait(starti+i, servers, term) + if ix, ok := cmd.(int); ok == false || ix != cmds[i-1] { + if ix == -1 { + // term changed -- try again + continue loop + } + t.Fatalf("wrong value %v committed for index %v; expected %v\n", cmd, starti+i, cmds) + } + } + + failed := false + total2 = 0 + for j := 0; j < servers; j++ { + if t, _ := cfg.rafts[j].GetState(); t != term { + // term changed -- can't expect low RPC counts + // need to keep going to update total2 + failed = true + } + total2 += cfg.rpcCount(j) + } + + if failed { + continue loop + } + + if total2-total1 > (iters+1+3)*3 { + t.Fatalf("too many RPCs (%v) for %v entries\n", total2-total1, iters) + } + + success = true + break + } + + if !success { + t.Fatalf("term changed too often") + } + + time.Sleep(RaftElectionTimeout) + + total3 := 0 + for j := 0; j < servers; j++ { + total3 += cfg.rpcCount(j) + } + + if total3-total2 > 3*20 { + t.Fatalf("too many RPCs (%v) for 1 second of idleness\n", total3-total2) + } + + cfg.end() +} + +func TestPersist13C(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3C): basic persistence") + + cfg.one(11, servers, true) + + // crash and re-start all + for i := 0; i < servers; i++ { + cfg.start1(i, cfg.applier) + } + for i := 0; i < servers; i++ { + cfg.disconnect(i) + cfg.connect(i) + } + + cfg.one(12, servers, true) + + leader1 := cfg.checkOneLeader() + cfg.disconnect(leader1) + cfg.start1(leader1, cfg.applier) + cfg.connect(leader1) + + cfg.one(13, servers, true) + + leader2 := cfg.checkOneLeader() + cfg.disconnect(leader2) + cfg.one(14, servers-1, true) + cfg.start1(leader2, cfg.applier) + cfg.connect(leader2) + + cfg.wait(4, servers, -1) // wait for leader2 to join before killing i3 + + i3 := (cfg.checkOneLeader() + 1) % servers + cfg.disconnect(i3) + cfg.one(15, servers-1, true) + cfg.start1(i3, cfg.applier) + cfg.connect(i3) + + cfg.one(16, servers, true) + + cfg.end() +} + +func TestPersist23C(t *testing.T) { + servers := 5 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3C): more persistence") + + index := 1 + for iters := 0; iters < 5; iters++ { + cfg.one(10+index, servers, true) + index++ + + leader1 := cfg.checkOneLeader() + + cfg.disconnect((leader1 + 1) % servers) + cfg.disconnect((leader1 + 2) % servers) + + cfg.one(10+index, servers-2, true) + index++ + + cfg.disconnect((leader1 + 0) % servers) + cfg.disconnect((leader1 + 3) % servers) + cfg.disconnect((leader1 + 4) % servers) + + cfg.start1((leader1+1)%servers, cfg.applier) + cfg.start1((leader1+2)%servers, cfg.applier) + cfg.connect((leader1 + 1) % servers) + cfg.connect((leader1 + 2) % servers) + + time.Sleep(RaftElectionTimeout) + + cfg.start1((leader1+3)%servers, cfg.applier) + cfg.connect((leader1 + 3) % servers) + + cfg.one(10+index, servers-2, true) + index++ + + cfg.connect((leader1 + 4) % servers) + cfg.connect((leader1 + 0) % servers) + } + + cfg.one(1000, servers, true) + + cfg.end() +} + +func TestPersist33C(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3C): partitioned leader and one follower crash, leader restarts") + + cfg.one(101, 3, true) + + leader := cfg.checkOneLeader() + cfg.disconnect((leader + 2) % servers) + + cfg.one(102, 2, true) + + cfg.crash1((leader + 0) % servers) + cfg.crash1((leader + 1) % servers) + cfg.connect((leader + 2) % servers) + cfg.start1((leader+0)%servers, cfg.applier) + cfg.connect((leader + 0) % servers) + + cfg.one(103, 2, true) + + cfg.start1((leader+1)%servers, cfg.applier) + cfg.connect((leader + 1) % servers) + + cfg.one(104, servers, true) + + cfg.end() +} + +// Test the scenarios described in Figure 8 of the extended Raft paper. Each +// iteration asks a leader, if there is one, to insert a command in the Raft +// log. If there is a leader, that leader will fail quickly with a high +// probability (perhaps without committing the command), or crash after a while +// with low probability (most likey committing the command). If the number of +// alive servers isn't enough to form a majority, perhaps start a new server. +// The leader in a new term may try to finish replicating log entries that +// haven't been committed yet. +func TestFigure83C(t *testing.T) { + servers := 5 + cfg := make_config(t, servers, false, false) + defer cfg.cleanup() + + cfg.begin("Test (3C): Figure 8") + + cfg.one(rand.Int(), 1, true) + + nup := servers + for iters := 0; iters < 1000; iters++ { + leader := -1 + for i := 0; i < servers; i++ { + if cfg.rafts[i] != nil { + _, _, ok := cfg.rafts[i].Start(rand.Int()) + if ok { + leader = i + } + } + } + + if (rand.Int() % 1000) < 100 { + ms := rand.Int63() % (int64(RaftElectionTimeout/time.Millisecond) / 2) + time.Sleep(time.Duration(ms) * time.Millisecond) + } else { + ms := (rand.Int63() % 13) + time.Sleep(time.Duration(ms) * time.Millisecond) + } + + if leader != -1 { + cfg.crash1(leader) + nup -= 1 + } + + if nup < 3 { + s := rand.Int() % servers + if cfg.rafts[s] == nil { + cfg.start1(s, cfg.applier) + cfg.connect(s) + nup += 1 + } + } + } + + for i := 0; i < servers; i++ { + if cfg.rafts[i] == nil { + cfg.start1(i, cfg.applier) + cfg.connect(i) + } + } + + cfg.one(rand.Int(), servers, true) + + cfg.end() +} + +func TestUnreliableAgree3C(t *testing.T) { + servers := 5 + cfg := make_config(t, servers, true, false) + defer cfg.cleanup() + + cfg.begin("Test (3C): unreliable agreement") + + var wg sync.WaitGroup + + for iters := 1; iters < 50; iters++ { + for j := 0; j < 4; j++ { + wg.Add(1) + go func(iters, j int) { + defer wg.Done() + cfg.one((100*iters)+j, 1, true) + }(iters, j) + } + cfg.one(iters, 1, true) + } + + cfg.setunreliable(false) + + wg.Wait() + + cfg.one(100, servers, true) + + cfg.end() +} + +func TestFigure8Unreliable3C(t *testing.T) { + servers := 5 + cfg := make_config(t, servers, true, false) + defer cfg.cleanup() + + cfg.begin("Test (3C): Figure 8 (unreliable)") + + cfg.one(rand.Int()%10000, 1, true) + + nup := servers + for iters := 0; iters < 1000; iters++ { + if iters == 200 { + cfg.setlongreordering(true) + } + leader := -1 + for i := 0; i < servers; i++ { + _, _, ok := cfg.rafts[i].Start(rand.Int() % 10000) + if ok && cfg.connected[i] { + leader = i + } + } + + if (rand.Int() % 1000) < 100 { + ms := rand.Int63() % (int64(RaftElectionTimeout/time.Millisecond) / 2) + time.Sleep(time.Duration(ms) * time.Millisecond) + } else { + ms := (rand.Int63() % 13) + time.Sleep(time.Duration(ms) * time.Millisecond) + } + + if leader != -1 && (rand.Int()%1000) < int(RaftElectionTimeout/time.Millisecond)/2 { + cfg.disconnect(leader) + nup -= 1 + } + + if nup < 3 { + s := rand.Int() % servers + if cfg.connected[s] == false { + cfg.connect(s) + nup += 1 + } + } + } + + for i := 0; i < servers; i++ { + if cfg.connected[i] == false { + cfg.connect(i) + } + } + + cfg.one(rand.Int()%10000, servers, true) + + cfg.end() +} + +func internalChurn(t *testing.T, unreliable bool) { + + servers := 5 + cfg := make_config(t, servers, unreliable, false) + defer cfg.cleanup() + + if unreliable { + cfg.begin("Test (3C): unreliable churn") + } else { + cfg.begin("Test (3C): churn") + } + + stop := int32(0) + + // create concurrent clients + cfn := func(me int, ch chan []int) { + var ret []int + ret = nil + defer func() { ch <- ret }() + values := []int{} + for atomic.LoadInt32(&stop) == 0 { + x := rand.Int() + index := -1 + ok := false + for i := 0; i < servers; i++ { + // try them all, maybe one of them is a leader + cfg.mu.Lock() + rf := cfg.rafts[i] + cfg.mu.Unlock() + if rf != nil { + index1, _, ok1 := rf.Start(x) + if ok1 { + ok = ok1 + index = index1 + } + } + } + if ok { + // maybe leader will commit our value, maybe not. + // but don't wait forever. + for _, to := range []int{10, 20, 50, 100, 200} { + nd, cmd := cfg.nCommitted(index) + if nd > 0 { + if xx, ok := cmd.(int); ok { + if xx == x { + values = append(values, x) + } + } else { + cfg.t.Fatalf("wrong command type") + } + break + } + time.Sleep(time.Duration(to) * time.Millisecond) + } + } else { + time.Sleep(time.Duration(79+me*17) * time.Millisecond) + } + } + ret = values + } + + ncli := 3 + cha := []chan []int{} + for i := 0; i < ncli; i++ { + cha = append(cha, make(chan []int)) + go cfn(i, cha[i]) + } + + for iters := 0; iters < 20; iters++ { + if (rand.Int() % 1000) < 200 { + i := rand.Int() % servers + cfg.disconnect(i) + } + + if (rand.Int() % 1000) < 500 { + i := rand.Int() % servers + if cfg.rafts[i] == nil { + cfg.start1(i, cfg.applier) + } + cfg.connect(i) + } + + if (rand.Int() % 1000) < 200 { + i := rand.Int() % servers + if cfg.rafts[i] != nil { + cfg.crash1(i) + } + } + + // Make crash/restart infrequent enough that the peers can often + // keep up, but not so infrequent that everything has settled + // down from one change to the next. Pick a value smaller than + // the election timeout, but not hugely smaller. + time.Sleep((RaftElectionTimeout * 7) / 10) + } + + time.Sleep(RaftElectionTimeout) + cfg.setunreliable(false) + for i := 0; i < servers; i++ { + if cfg.rafts[i] == nil { + cfg.start1(i, cfg.applier) + } + cfg.connect(i) + } + + atomic.StoreInt32(&stop, 1) + + values := []int{} + for i := 0; i < ncli; i++ { + vv := <-cha[i] + if vv == nil { + t.Fatal("client failed") + } + values = append(values, vv...) + } + + time.Sleep(RaftElectionTimeout) + + lastIndex := cfg.one(rand.Int(), servers, true) + + really := make([]int, lastIndex+1) + for index := 1; index <= lastIndex; index++ { + v := cfg.wait(index, servers, -1) + if vi, ok := v.(int); ok { + really = append(really, vi) + } else { + t.Fatalf("not an int") + } + } + + for _, v1 := range values { + ok := false + for _, v2 := range really { + if v1 == v2 { + ok = true + } + } + if ok == false { + cfg.t.Fatalf("didn't find a value") + } + } + + cfg.end() +} + +func TestReliableChurn3C(t *testing.T) { + internalChurn(t, false) +} + +func TestUnreliableChurn3C(t *testing.T) { + internalChurn(t, true) +} + +const MAXLOGSIZE = 2000 + +func snapcommon(t *testing.T, name string, disconnect bool, reliable bool, crash bool) { + iters := 30 + servers := 3 + cfg := make_config(t, servers, !reliable, true) + defer cfg.cleanup() + + cfg.begin(name) + + cfg.one(rand.Int(), servers, true) + leader1 := cfg.checkOneLeader() + + for i := 0; i < iters; i++ { + victim := (leader1 + 1) % servers + sender := leader1 + if i%3 == 1 { + sender = (leader1 + 1) % servers + victim = leader1 + } + + if disconnect { + cfg.disconnect(victim) + cfg.one(rand.Int(), servers-1, true) + } + if crash { + cfg.crash1(victim) + cfg.one(rand.Int(), servers-1, true) + } + + // perhaps send enough to get a snapshot + nn := (SnapShotInterval / 2) + (rand.Int() % SnapShotInterval) + for i := 0; i < nn; i++ { + cfg.rafts[sender].Start(rand.Int()) + } + + // let applier threads catch up with the Start()'s + if disconnect == false && crash == false { + // make sure all followers have caught up, so that + // an InstallSnapshot RPC isn't required for + // TestSnapshotBasic3D(). + cfg.one(rand.Int(), servers, true) + } else { + cfg.one(rand.Int(), servers-1, true) + } + + if cfg.LogSize() >= MAXLOGSIZE { + cfg.t.Fatalf("Log size too large") + } + if disconnect { + // reconnect a follower, who maybe behind and + // needs to rceive a snapshot to catch up. + cfg.connect(victim) + cfg.one(rand.Int(), servers, true) + leader1 = cfg.checkOneLeader() + } + if crash { + cfg.start1(victim, cfg.applierSnap) + cfg.connect(victim) + cfg.one(rand.Int(), servers, true) + leader1 = cfg.checkOneLeader() + } + } + cfg.end() +} + +func TestSnapshotBasic3D(t *testing.T) { + snapcommon(t, "Test (3D): snapshots basic", false, true, false) +} + +func TestSnapshotInstall3D(t *testing.T) { + snapcommon(t, "Test (3D): install snapshots (disconnect)", true, true, false) +} + +func TestSnapshotInstallUnreliable3D(t *testing.T) { + snapcommon(t, "Test (3D): install snapshots (disconnect+unreliable)", + true, false, false) +} + +func TestSnapshotInstallCrash3D(t *testing.T) { + snapcommon(t, "Test (3D): install snapshots (crash)", false, true, true) +} + +func TestSnapshotInstallUnCrash3D(t *testing.T) { + snapcommon(t, "Test (3D): install snapshots (unreliable+crash)", false, false, true) +} + +// do the servers persist the snapshots, and +// restart using snapshot along with the +// tail of the log? +func TestSnapshotAllCrash3D(t *testing.T) { + servers := 3 + iters := 5 + cfg := make_config(t, servers, false, true) + defer cfg.cleanup() + + cfg.begin("Test (3D): crash and restart all servers") + + cfg.one(rand.Int(), servers, true) + + for i := 0; i < iters; i++ { + // perhaps enough to get a snapshot + nn := (SnapShotInterval / 2) + (rand.Int() % SnapShotInterval) + for i := 0; i < nn; i++ { + cfg.one(rand.Int(), servers, true) + } + + index1 := cfg.one(rand.Int(), servers, true) + + // crash all + for i := 0; i < servers; i++ { + cfg.crash1(i) + } + + // revive all + for i := 0; i < servers; i++ { + cfg.start1(i, cfg.applierSnap) + cfg.connect(i) + } + + index2 := cfg.one(rand.Int(), servers, true) + if index2 < index1+1 { + t.Fatalf("index decreased from %v to %v", index1, index2) + } + } + cfg.end() +} + +// do servers correctly initialize their in-memory copy of the snapshot, making +// sure that future writes to persistent state don't lose state? +func TestSnapshotInit3D(t *testing.T) { + servers := 3 + cfg := make_config(t, servers, false, true) + defer cfg.cleanup() + + cfg.begin("Test (3D): snapshot initialization after crash") + cfg.one(rand.Int(), servers, true) + + // enough ops to make a snapshot + nn := SnapShotInterval + 1 + for i := 0; i < nn; i++ { + cfg.one(rand.Int(), servers, true) + } + + // crash all + for i := 0; i < servers; i++ { + cfg.crash1(i) + } + + // revive all + for i := 0; i < servers; i++ { + cfg.start1(i, cfg.applierSnap) + cfg.connect(i) + } + + // a single op, to get something to be written back to persistent storage. + cfg.one(rand.Int(), servers, true) + + // crash all + for i := 0; i < servers; i++ { + cfg.crash1(i) + } + + // revive all + for i := 0; i < servers; i++ { + cfg.start1(i, cfg.applierSnap) + cfg.connect(i) + } + + // do another op to trigger potential bug + cfg.one(rand.Int(), servers, true) + cfg.end() +} diff --git a/raft/util.go b/raft/util.go new file mode 100644 index 0000000..e064403 --- /dev/null +++ b/raft/util.go @@ -0,0 +1,12 @@ +package raft + +import "log" + +// Debugging +const Debug = false + +func DPrintf(format string, a ...interface{}) { + if Debug { + log.Printf(format, a...) + } +} diff --git a/shardctrler/client.go b/shardctrler/client.go new file mode 100644 index 0000000..74ab736 --- /dev/null +++ b/shardctrler/client.go @@ -0,0 +1,101 @@ +package shardctrler + +// +// Shardctrler clerk. +// + +import "6.5840/labrpc" +import "time" +import "crypto/rand" +import "math/big" + +type Clerk struct { + servers []*labrpc.ClientEnd + // Your data here. +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.servers = servers + // Your code here. + return ck +} + +func (ck *Clerk) Query(num int) Config { + args := &QueryArgs{} + // Your code here. + args.Num = num + for { + // try each known server. + for _, srv := range ck.servers { + var reply QueryReply + ok := srv.Call("ShardCtrler.Query", args, &reply) + if ok && reply.WrongLeader == false { + return reply.Config + } + } + time.Sleep(100 * time.Millisecond) + } +} + +func (ck *Clerk) Join(servers map[int][]string) { + args := &JoinArgs{} + // Your code here. + args.Servers = servers + + for { + // try each known server. + for _, srv := range ck.servers { + var reply JoinReply + ok := srv.Call("ShardCtrler.Join", args, &reply) + if ok && reply.WrongLeader == false { + return + } + } + time.Sleep(100 * time.Millisecond) + } +} + +func (ck *Clerk) Leave(gids []int) { + args := &LeaveArgs{} + // Your code here. + args.GIDs = gids + + for { + // try each known server. + for _, srv := range ck.servers { + var reply LeaveReply + ok := srv.Call("ShardCtrler.Leave", args, &reply) + if ok && reply.WrongLeader == false { + return + } + } + time.Sleep(100 * time.Millisecond) + } +} + +func (ck *Clerk) Move(shard int, gid int) { + args := &MoveArgs{} + // Your code here. + args.Shard = shard + args.GID = gid + + for { + // try each known server. + for _, srv := range ck.servers { + var reply MoveReply + ok := srv.Call("ShardCtrler.Move", args, &reply) + if ok && reply.WrongLeader == false { + return + } + } + time.Sleep(100 * time.Millisecond) + } +} diff --git a/shardctrler/common.go b/shardctrler/common.go new file mode 100644 index 0000000..47a95fb --- /dev/null +++ b/shardctrler/common.go @@ -0,0 +1,73 @@ +package shardctrler + +// +// Shard controller: assigns shards to replication groups. +// +// RPC interface: +// Join(servers) -- add a set of groups (gid -> server-list mapping). +// Leave(gids) -- delete a set of groups. +// Move(shard, gid) -- hand off one shard from current owner to gid. +// Query(num) -> fetch Config # num, or latest config if num==-1. +// +// A Config (configuration) describes a set of replica groups, and the +// replica group responsible for each shard. Configs are numbered. Config +// #0 is the initial configuration, with no groups and all shards +// assigned to group 0 (the invalid group). +// +// You will need to add fields to the RPC argument structs. +// + +// The number of shards. +const NShards = 10 + +// A configuration -- an assignment of shards to groups. +// Please don't change this. +type Config struct { + Num int // config number + Shards [NShards]int // shard -> gid + Groups map[int][]string // gid -> servers[] +} + +const ( + OK = "OK" +) + +type Err string + +type JoinArgs struct { + Servers map[int][]string // new GID -> servers mappings +} + +type JoinReply struct { + WrongLeader bool + Err Err +} + +type LeaveArgs struct { + GIDs []int +} + +type LeaveReply struct { + WrongLeader bool + Err Err +} + +type MoveArgs struct { + Shard int + GID int +} + +type MoveReply struct { + WrongLeader bool + Err Err +} + +type QueryArgs struct { + Num int // desired config number +} + +type QueryReply struct { + WrongLeader bool + Err Err + Config Config +} diff --git a/shardctrler/config.go b/shardctrler/config.go new file mode 100644 index 0000000..871a1b1 --- /dev/null +++ b/shardctrler/config.go @@ -0,0 +1,357 @@ +package shardctrler + +import "6.5840/labrpc" +import "6.5840/raft" +import "testing" +import "os" + +// import "log" +import crand "crypto/rand" +import "math/rand" +import "encoding/base64" +import "sync" +import "runtime" +import "time" + +func randstring(n int) string { + b := make([]byte, 2*n) + crand.Read(b) + s := base64.URLEncoding.EncodeToString(b) + return s[0:n] +} + +// Randomize server handles +func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { + sa := make([]*labrpc.ClientEnd, len(kvh)) + copy(sa, kvh) + for i := range sa { + j := rand.Intn(i + 1) + sa[i], sa[j] = sa[j], sa[i] + } + return sa +} + +type config struct { + mu sync.Mutex + t *testing.T + net *labrpc.Network + n int + servers []*ShardCtrler + saved []*raft.Persister + endnames [][]string // names of each server's sending ClientEnds + clerks map[*Clerk][]string + nextClientId int + start time.Time // time at which make_config() was called +} + +func (cfg *config) checkTimeout() { + // enforce a two minute real-time limit on each test + if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { + cfg.t.Fatal("test took longer than 120 seconds") + } +} + +func (cfg *config) cleanup() { + cfg.mu.Lock() + defer cfg.mu.Unlock() + for i := 0; i < len(cfg.servers); i++ { + if cfg.servers[i] != nil { + cfg.servers[i].Kill() + } + } + cfg.net.Cleanup() + cfg.checkTimeout() +} + +// Maximum log size across all servers +func (cfg *config) LogSize() int { + logsize := 0 + for i := 0; i < cfg.n; i++ { + n := cfg.saved[i].RaftStateSize() + if n > logsize { + logsize = n + } + } + return logsize +} + +// attach server i to servers listed in to +// caller must hold cfg.mu +func (cfg *config) connectUnlocked(i int, to []int) { + // log.Printf("connect peer %d to %v\n", i, to) + + // outgoing socket files + for j := 0; j < len(to); j++ { + endname := cfg.endnames[i][to[j]] + cfg.net.Enable(endname, true) + } + + // incoming socket files + for j := 0; j < len(to); j++ { + endname := cfg.endnames[to[j]][i] + cfg.net.Enable(endname, true) + } +} + +func (cfg *config) connect(i int, to []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.connectUnlocked(i, to) +} + +// detach server i from the servers listed in from +// caller must hold cfg.mu +func (cfg *config) disconnectUnlocked(i int, from []int) { + // log.Printf("disconnect peer %d from %v\n", i, from) + + // outgoing socket files + for j := 0; j < len(from); j++ { + if cfg.endnames[i] != nil { + endname := cfg.endnames[i][from[j]] + cfg.net.Enable(endname, false) + } + } + + // incoming socket files + for j := 0; j < len(from); j++ { + if cfg.endnames[j] != nil { + endname := cfg.endnames[from[j]][i] + cfg.net.Enable(endname, false) + } + } +} + +func (cfg *config) disconnect(i int, from []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.disconnectUnlocked(i, from) +} + +func (cfg *config) All() []int { + all := make([]int, cfg.n) + for i := 0; i < cfg.n; i++ { + all[i] = i + } + return all +} + +func (cfg *config) ConnectAll() { + cfg.mu.Lock() + defer cfg.mu.Unlock() + for i := 0; i < cfg.n; i++ { + cfg.connectUnlocked(i, cfg.All()) + } +} + +// Sets up 2 partitions with connectivity between servers in each partition. +func (cfg *config) partition(p1 []int, p2 []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + // log.Printf("partition servers into: %v %v\n", p1, p2) + for i := 0; i < len(p1); i++ { + cfg.disconnectUnlocked(p1[i], p2) + cfg.connectUnlocked(p1[i], p1) + } + for i := 0; i < len(p2); i++ { + cfg.disconnectUnlocked(p2[i], p1) + cfg.connectUnlocked(p2[i], p2) + } +} + +// Create a clerk with clerk specific server names. +// Give it connections to all of the servers, but for +// now enable only connections to servers in to[]. +func (cfg *config) makeClient(to []int) *Clerk { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + // a fresh set of ClientEnds. + ends := make([]*labrpc.ClientEnd, cfg.n) + endnames := make([]string, cfg.n) + for j := 0; j < cfg.n; j++ { + endnames[j] = randstring(20) + ends[j] = cfg.net.MakeEnd(endnames[j]) + cfg.net.Connect(endnames[j], j) + } + + ck := MakeClerk(random_handles(ends)) + cfg.clerks[ck] = endnames + cfg.nextClientId++ + cfg.ConnectClientUnlocked(ck, to) + return ck +} + +func (cfg *config) deleteClient(ck *Clerk) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + v := cfg.clerks[ck] + for i := 0; i < len(v); i++ { + os.Remove(v[i]) + } + delete(cfg.clerks, ck) +} + +// caller should hold cfg.mu +func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { + // log.Printf("ConnectClient %v to %v\n", ck, to) + endnames := cfg.clerks[ck] + for j := 0; j < len(to); j++ { + s := endnames[to[j]] + cfg.net.Enable(s, true) + } +} + +func (cfg *config) ConnectClient(ck *Clerk, to []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.ConnectClientUnlocked(ck, to) +} + +// caller should hold cfg.mu +func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { + // log.Printf("DisconnectClient %v from %v\n", ck, from) + endnames := cfg.clerks[ck] + for j := 0; j < len(from); j++ { + s := endnames[from[j]] + cfg.net.Enable(s, false) + } +} + +func (cfg *config) DisconnectClient(ck *Clerk, from []int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + cfg.DisconnectClientUnlocked(ck, from) +} + +// Shutdown a server by isolating it +func (cfg *config) ShutdownServer(i int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + cfg.disconnectUnlocked(i, cfg.All()) + + // disable client connections to the server. + // it's important to do this before creating + // the new Persister in saved[i], to avoid + // the possibility of the server returning a + // positive reply to an Append but persisting + // the result in the superseded Persister. + cfg.net.DeleteServer(i) + + // a fresh persister, in case old instance + // continues to update the Persister. + // but copy old persister's content so that we always + // pass Make() the last persisted state. + if cfg.saved[i] != nil { + cfg.saved[i] = cfg.saved[i].Copy() + } + + kv := cfg.servers[i] + if kv != nil { + cfg.mu.Unlock() + kv.Kill() + cfg.mu.Lock() + cfg.servers[i] = nil + } +} + +// If restart servers, first call ShutdownServer +func (cfg *config) StartServer(i int) { + cfg.mu.Lock() + + // a fresh set of outgoing ClientEnd names. + cfg.endnames[i] = make([]string, cfg.n) + for j := 0; j < cfg.n; j++ { + cfg.endnames[i][j] = randstring(20) + } + + // a fresh set of ClientEnds. + ends := make([]*labrpc.ClientEnd, cfg.n) + for j := 0; j < cfg.n; j++ { + ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) + cfg.net.Connect(cfg.endnames[i][j], j) + } + + // a fresh persister, so old instance doesn't overwrite + // new instance's persisted state. + // give the fresh persister a copy of the old persister's + // state, so that the spec is that we pass StartKVServer() + // the last persisted state. + if cfg.saved[i] != nil { + cfg.saved[i] = cfg.saved[i].Copy() + } else { + cfg.saved[i] = raft.MakePersister() + } + + cfg.mu.Unlock() + + cfg.servers[i] = StartServer(ends, i, cfg.saved[i]) + + kvsvc := labrpc.MakeService(cfg.servers[i]) + rfsvc := labrpc.MakeService(cfg.servers[i].rf) + srv := labrpc.MakeServer() + srv.AddService(kvsvc) + srv.AddService(rfsvc) + cfg.net.AddServer(i, srv) +} + +func (cfg *config) Leader() (bool, int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + for i := 0; i < cfg.n; i++ { + if cfg.servers[i] != nil { + _, is_leader := cfg.servers[i].rf.GetState() + if is_leader { + return true, i + } + } + } + return false, 0 +} + +// Partition servers into 2 groups and put current leader in minority +func (cfg *config) make_partition() ([]int, []int) { + _, l := cfg.Leader() + p1 := make([]int, cfg.n/2+1) + p2 := make([]int, cfg.n/2) + j := 0 + for i := 0; i < cfg.n; i++ { + if i != l { + if j < len(p1) { + p1[j] = i + } else { + p2[j-len(p1)] = i + } + j++ + } + } + p2[len(p2)-1] = l + return p1, p2 +} + +func make_config(t *testing.T, n int, unreliable bool) *config { + runtime.GOMAXPROCS(4) + cfg := &config{} + cfg.t = t + cfg.net = labrpc.MakeNetwork() + cfg.n = n + cfg.servers = make([]*ShardCtrler, cfg.n) + cfg.saved = make([]*raft.Persister, cfg.n) + cfg.endnames = make([][]string, cfg.n) + cfg.clerks = make(map[*Clerk][]string) + cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid + cfg.start = time.Now() + + // create a full set of KV servers. + for i := 0; i < cfg.n; i++ { + cfg.StartServer(i) + } + + cfg.ConnectAll() + + cfg.net.Reliable(!unreliable) + + return cfg +} diff --git a/shardctrler/server.go b/shardctrler/server.go new file mode 100644 index 0000000..229e014 --- /dev/null +++ b/shardctrler/server.go @@ -0,0 +1,76 @@ +package shardctrler + + +import "6.5840/raft" +import "6.5840/labrpc" +import "sync" +import "6.5840/labgob" + + +type ShardCtrler struct { + mu sync.Mutex + me int + rf *raft.Raft + applyCh chan raft.ApplyMsg + + // Your data here. + + configs []Config // indexed by config num +} + + +type Op struct { + // Your data here. +} + + +func (sc *ShardCtrler) Join(args *JoinArgs, reply *JoinReply) { + // Your code here. +} + +func (sc *ShardCtrler) Leave(args *LeaveArgs, reply *LeaveReply) { + // Your code here. +} + +func (sc *ShardCtrler) Move(args *MoveArgs, reply *MoveReply) { + // Your code here. +} + +func (sc *ShardCtrler) Query(args *QueryArgs, reply *QueryReply) { + // Your code here. +} + + +// the tester calls Kill() when a ShardCtrler instance won't +// be needed again. you are not required to do anything +// in Kill(), but it might be convenient to (for example) +// turn off debug output from this instance. +func (sc *ShardCtrler) Kill() { + sc.rf.Kill() + // Your code here, if desired. +} + +// needed by shardkv tester +func (sc *ShardCtrler) Raft() *raft.Raft { + return sc.rf +} + +// servers[] contains the ports of the set of +// servers that will cooperate via Raft to +// form the fault-tolerant shardctrler service. +// me is the index of the current server in servers[]. +func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardCtrler { + sc := new(ShardCtrler) + sc.me = me + + sc.configs = make([]Config, 1) + sc.configs[0].Groups = map[int][]string{} + + labgob.Register(Op{}) + sc.applyCh = make(chan raft.ApplyMsg) + sc.rf = raft.Make(servers, me, persister, sc.applyCh) + + // Your code here. + + return sc +} diff --git a/shardctrler/test_test.go b/shardctrler/test_test.go new file mode 100644 index 0000000..3642e46 --- /dev/null +++ b/shardctrler/test_test.go @@ -0,0 +1,494 @@ +package shardctrler + +import ( + "fmt" + "sync" + "testing" + "time" +) + +// import "time" + +func check(t *testing.T, groups []int, ck *Clerk) { + c := ck.Query(-1) + if len(c.Groups) != len(groups) { + t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups)) + } + + // are the groups as expected? + for _, g := range groups { + _, ok := c.Groups[g] + if ok != true { + t.Fatalf("missing group %v", g) + } + } + + // any un-allocated shards? + if len(groups) > 0 { + for s, g := range c.Shards { + _, ok := c.Groups[g] + if ok == false { + t.Fatalf("shard %v -> invalid group %v", s, g) + } + } + } + + // more or less balanced sharding? + counts := map[int]int{} + for _, g := range c.Shards { + counts[g] += 1 + } + min := 257 + max := 0 + for g, _ := range c.Groups { + if counts[g] > max { + max = counts[g] + } + if counts[g] < min { + min = counts[g] + } + } + if max > min+1 { + t.Fatalf("max %v too much larger than min %v", max, min) + } +} + +func check_same_config(t *testing.T, c1 Config, c2 Config) { + if c1.Num != c2.Num { + t.Fatalf("Num wrong") + } + if c1.Shards != c2.Shards { + t.Fatalf("Shards wrong") + } + if len(c1.Groups) != len(c2.Groups) { + t.Fatalf("number of Groups is wrong") + } + for gid, sa := range c1.Groups { + sa1, ok := c2.Groups[gid] + if ok == false || len(sa1) != len(sa) { + t.Fatalf("len(Groups) wrong") + } + if ok && len(sa1) == len(sa) { + for j := 0; j < len(sa); j++ { + if sa[j] != sa1[j] { + t.Fatalf("Groups wrong") + } + } + } + } +} + +func TestBasic(t *testing.T) { + const nservers = 3 + cfg := make_config(t, nservers, false) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.All()) + cfa := make([]Config, 6) + t.Run("LeaveJoin", func(t *testing.T) { + fmt.Printf("Test: Basic leave/join ...\n") + + cfa[0] = ck.Query(-1) + + check(t, []int{}, ck) + + var gid1 int = 1 + ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}}) + check(t, []int{gid1}, ck) + cfa[1] = ck.Query(-1) + + var gid2 int = 2 + ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}}) + check(t, []int{gid1, gid2}, ck) + cfa[2] = ck.Query(-1) + + cfx := ck.Query(-1) + sa1 := cfx.Groups[gid1] + if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { + t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) + } + sa2 := cfx.Groups[gid2] + if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { + t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) + } + + ck.Leave([]int{gid1}) + check(t, []int{gid2}, ck) + cfa[4] = ck.Query(-1) + + ck.Leave([]int{gid2}) + cfa[5] = ck.Query(-1) + + fmt.Printf(" ... Passed\n") + }) + + t.Run("HistoricalQueries", func(t *testing.T) { + fmt.Printf("Test: Historical queries ...\n") + + for s := 0; s < nservers; s++ { + cfg.ShutdownServer(s) + for i := 0; i < len(cfa); i++ { + c := ck.Query(cfa[i].Num) + check_same_config(t, c, cfa[i]) + } + cfg.StartServer(s) + cfg.ConnectAll() + } + fmt.Printf(" ... Passed\n") + }) + + t.Run("Move", func(t *testing.T) { + fmt.Printf("Test: Move ...\n") + var gid3 int = 503 + ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}}) + var gid4 int = 504 + ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}}) + for i := 0; i < NShards; i++ { + cf := ck.Query(-1) + if i < NShards/2 { + ck.Move(i, gid3) + if cf.Shards[i] != gid3 { + cf1 := ck.Query(-1) + if cf1.Num <= cf.Num { + t.Fatalf("Move should increase Config.Num") + } + } + } else { + ck.Move(i, gid4) + if cf.Shards[i] != gid4 { + cf1 := ck.Query(-1) + if cf1.Num <= cf.Num { + t.Fatalf("Move should increase Config.Num") + } + } + } + } + cf2 := ck.Query(-1) + for i := 0; i < NShards; i++ { + if i < NShards/2 { + if cf2.Shards[i] != gid3 { + t.Fatalf("expected shard %v on gid %v actually %v", + i, gid3, cf2.Shards[i]) + } + } else { + if cf2.Shards[i] != gid4 { + t.Fatalf("expected shard %v on gid %v actually %v", + i, gid4, cf2.Shards[i]) + } + } + } + ck.Leave([]int{gid3}) + ck.Leave([]int{gid4}) + fmt.Printf(" ... Passed\n") + }) + + const npara = 10 + t.Run("ConcurrentLeaveJoin", func(t *testing.T) { + fmt.Printf("Test: Concurrent leave/join ...\n") + + var cka [npara]*Clerk + for i := 0; i < len(cka); i++ { + cka[i] = cfg.makeClient(cfg.All()) + } + gids := make([]int, npara) + ch := make(chan bool) + for xi := 0; xi < npara; xi++ { + gids[xi] = int((xi * 10) + 100) + go func(i int) { + defer func() { ch <- true }() + var gid int = gids[i] + var sid1 = fmt.Sprintf("s%da", gid) + var sid2 = fmt.Sprintf("s%db", gid) + cka[i].Join(map[int][]string{gid + 1000: []string{sid1}}) + cka[i].Join(map[int][]string{gid: []string{sid2}}) + cka[i].Leave([]int{gid + 1000}) + }(xi) + } + for i := 0; i < npara; i++ { + <-ch + } + check(t, gids, ck) + + fmt.Printf(" ... Passed\n") + }) + + var c1, c2 Config + t.Run("MinimalTransfer", func(t *testing.T) { + fmt.Printf("Test: Minimal transfers after joins ...\n") + + c1 = ck.Query(-1) + for i := 0; i < 5; i++ { + var gid = int(npara + 1 + i) + ck.Join(map[int][]string{gid: []string{ + fmt.Sprintf("%da", gid), + fmt.Sprintf("%db", gid), + fmt.Sprintf("%db", gid)}}) + } + c2 = ck.Query(-1) + for i := int(1); i <= npara; i++ { + for j := 0; j < len(c1.Shards); j++ { + if c2.Shards[j] == i { + if c1.Shards[j] != i { + t.Fatalf("non-minimal transfer after Join()s") + } + } + } + } + + fmt.Printf(" ... Passed\n") + }) + + t.Run("MinimalTransferAfterLeave", func(t *testing.T) { + fmt.Printf("Test: Minimal transfers after leaves ...\n") + + for i := 0; i < 5; i++ { + ck.Leave([]int{int(npara + 1 + i)}) + } + c3 := ck.Query(-1) + for i := int(1); i <= npara; i++ { + for j := 0; j < len(c1.Shards); j++ { + if c2.Shards[j] == i { + if c3.Shards[j] != i { + t.Fatalf("non-minimal transfer after Leave()s") + } + } + } + } + + fmt.Printf(" ... Passed\n") + }) +} + +func TestMinimalAgain(t *testing.T) { + const nservers = 3 + cfg := make_config(t, nservers, false) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.All()) + + fmt.Printf("Test: minimal movement again ...\n") + + ck.Join(map[int][]string{1: []string{"x", "y", "z"}}) + + ck.Join(map[int][]string{2: []string{"a", "b", "c"}}) + + c1 := ck.Query(-1) + + ck.Join(map[int][]string{3: []string{"d", "e", "f"}}) + + c2 := ck.Query(-1) + + // fmt.Printf("after join(3):\n%v\n%v\n", c1.Shards, c2.Shards) + + // any shard that wasn't moved to gid 3 should + // stay where it was. + for i := 0; i < NShards; i++ { + if c2.Shards[i] != 3 { + if c1.Shards[i] != c2.Shards[i] { + t.Fatalf("shard %v moved from gid %v to gid %v, but shouldn't have\n", i, c1.Shards[i], c2.Shards[i]) + } + } + } + + // a maximum of NShards/3 + 1 shards should move + changed := 0 + for i := 0; i < NShards; i++ { + if c1.Shards[i] != c2.Shards[i] { + changed += 1 + } + } + if changed > (NShards/3)+1 { + t.Fatalf("too many shards (%v) moved after join\n", changed) + } + + // now gid 1 leaves. + ck.Leave([]int{1}) + c3 := ck.Query(-1) + // fmt.Printf("after leave(1):\n%v\n%v\n", c2.Shards, c3.Shards) + + // any shard that wasn't in gid 1 should + // stay where it was. + for i := 0; i < NShards; i++ { + if c2.Shards[i] != 1 { + if c2.Shards[i] != c3.Shards[i] { + t.Fatalf("shard %v moved from gid %v to gid %v, but shouldn't have\n", i, c2.Shards[i], c3.Shards[i]) + } + } + } + + // a maximum of NShards/3 + 1 shards should move + changed = 0 + for i := 0; i < NShards; i++ { + if c2.Shards[i] != c3.Shards[i] { + changed += 1 + } + } + if changed > (NShards/3)+1 { + t.Fatalf("too many shards (%v) moved after leave\n", changed) + } + + fmt.Printf(" ... Passed\n") +} + +func TestMulti(t *testing.T) { + const nservers = 3 + cfg := make_config(t, nservers, false) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.All()) + + t.Run("LeaveJoin", func(t *testing.T) { + fmt.Printf("Test: Multi-group join/leave ...\n") + + cfa := make([]Config, 6) + cfa[0] = ck.Query(-1) + + check(t, []int{}, ck) + + var gid1 int = 1 + var gid2 int = 2 + ck.Join(map[int][]string{ + gid1: []string{"x", "y", "z"}, + gid2: []string{"a", "b", "c"}, + }) + check(t, []int{gid1, gid2}, ck) + cfa[1] = ck.Query(-1) + + var gid3 int = 3 + ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}}) + check(t, []int{gid1, gid2, gid3}, ck) + cfa[2] = ck.Query(-1) + + cfx := ck.Query(-1) + sa1 := cfx.Groups[gid1] + if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { + t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) + } + sa2 := cfx.Groups[gid2] + if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { + t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) + } + sa3 := cfx.Groups[gid3] + if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" { + t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3) + } + + ck.Leave([]int{gid1, gid3}) + check(t, []int{gid2}, ck) + cfa[3] = ck.Query(-1) + + cfx = ck.Query(-1) + sa2 = cfx.Groups[gid2] + if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { + t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) + } + + ck.Leave([]int{gid2}) + + fmt.Printf(" ... Passed\n") + }) + + const npara = 10 + t.Run("ConcurrentLeaveJoin", func(t *testing.T) { + fmt.Printf("Test: Concurrent multi leave/join ...\n") + + var cka [npara]*Clerk + for i := 0; i < len(cka); i++ { + cka[i] = cfg.makeClient(cfg.All()) + } + gids := make([]int, npara) + var wg sync.WaitGroup + for xi := 0; xi < npara; xi++ { + wg.Add(1) + gids[xi] = int(xi + 1000) + go func(i int) { + defer wg.Done() + var gid int = gids[i] + cka[i].Join(map[int][]string{ + gid: []string{ + fmt.Sprintf("%da", gid), + fmt.Sprintf("%db", gid), + fmt.Sprintf("%dc", gid)}, + gid + 1000: []string{fmt.Sprintf("%da", gid+1000)}, + gid + 2000: []string{fmt.Sprintf("%da", gid+2000)}, + }) + cka[i].Leave([]int{gid + 1000, gid + 2000}) + }(xi) + } + wg.Wait() + check(t, gids, ck) + + fmt.Printf(" ... Passed\n") + }) + + var c1, c2 Config + t.Run("MinimalTransfer", func(t *testing.T) { + fmt.Printf("Test: Minimal transfers after multijoins ...\n") + + c1 = ck.Query(-1) + m := make(map[int][]string) + for i := 0; i < 5; i++ { + var gid = npara + 1 + i + m[gid] = []string{fmt.Sprintf("%da", gid), fmt.Sprintf("%db", gid)} + } + ck.Join(m) + c2 = ck.Query(-1) + for i := int(1); i <= npara; i++ { + for j := 0; j < len(c1.Shards); j++ { + if c2.Shards[j] == i { + if c1.Shards[j] != i { + t.Fatalf("non-minimal transfer after Join()s") + } + } + } + } + + fmt.Printf(" ... Passed\n") + }) + + t.Run("MinimalTransferAfterLeave", func(t *testing.T) { + fmt.Printf("Test: Minimal transfers after multileaves ...\n") + + var l []int + for i := 0; i < 5; i++ { + l = append(l, npara+1+i) + } + ck.Leave(l) + c3 := ck.Query(-1) + for i := int(1); i <= npara; i++ { + for j := 0; j < len(c1.Shards); j++ { + if c2.Shards[j] == i { + if c3.Shards[j] != i { + t.Fatalf("non-minimal transfer after Leave()s") + } + } + } + } + + fmt.Printf(" ... Passed\n") + }) + + t.Run("AgreeConfig", func(t *testing.T) { + fmt.Printf("Test: Check Same config on servers ...\n") + + isLeader, leader := cfg.Leader() + if !isLeader { + t.Fatalf("Leader not found") + } + c := ck.Query(-1) // Config leader claims + + cfg.ShutdownServer(leader) + + attempts := 0 + for isLeader, leader = cfg.Leader(); isLeader; time.Sleep(1 * time.Second) { + if attempts++; attempts >= 3 { + t.Fatalf("Leader not found") + } + } + + c1 = ck.Query(-1) + check_same_config(t, c, c1) + + fmt.Printf(" ... Passed\n") + }) +} diff --git a/shardkv/client.go b/shardkv/client.go new file mode 100644 index 0000000..27e360c --- /dev/null +++ b/shardkv/client.go @@ -0,0 +1,129 @@ +package shardkv + +// +// client code to talk to a sharded key/value service. +// +// the client first talks to the shardctrler to find out +// the assignment of shards (keys) to groups, and then +// talks to the group that holds the key's shard. +// + +import "6.5840/labrpc" +import "crypto/rand" +import "math/big" +import "6.5840/shardctrler" +import "time" + +// which shard is a key in? +// please use this function, +// and please do not change it. +func key2shard(key string) int { + shard := 0 + if len(key) > 0 { + shard = int(key[0]) + } + shard %= shardctrler.NShards + return shard +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +type Clerk struct { + sm *shardctrler.Clerk + config shardctrler.Config + make_end func(string) *labrpc.ClientEnd + // You will have to modify this struct. +} + +// the tester calls MakeClerk. +// +// ctrlers[] is needed to call shardctrler.MakeClerk(). +// +// make_end(servername) turns a server name from a +// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can +// send RPCs. +func MakeClerk(ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.sm = shardctrler.MakeClerk(ctrlers) + ck.make_end = make_end + // You'll have to add code here. + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// You will have to modify this function. +func (ck *Clerk) Get(key string) string { + args := GetArgs{} + args.Key = key + + for { + shard := key2shard(key) + gid := ck.config.Shards[shard] + if servers, ok := ck.config.Groups[gid]; ok { + // try each server for the shard. + for si := 0; si < len(servers); si++ { + srv := ck.make_end(servers[si]) + var reply GetReply + ok := srv.Call("ShardKV.Get", &args, &reply) + if ok && (reply.Err == OK || reply.Err == ErrNoKey) { + return reply.Value + } + if ok && (reply.Err == ErrWrongGroup) { + break + } + // ... not ok, or ErrWrongLeader + } + } + time.Sleep(100 * time.Millisecond) + // ask controller for the latest configuration. + ck.config = ck.sm.Query(-1) + } + + return "" +} + +// shared by Put and Append. +// You will have to modify this function. +func (ck *Clerk) PutAppend(key string, value string, op string) { + args := PutAppendArgs{} + args.Key = key + args.Value = value + args.Op = op + + + for { + shard := key2shard(key) + gid := ck.config.Shards[shard] + if servers, ok := ck.config.Groups[gid]; ok { + for si := 0; si < len(servers); si++ { + srv := ck.make_end(servers[si]) + var reply PutAppendReply + ok := srv.Call("ShardKV.PutAppend", &args, &reply) + if ok && reply.Err == OK { + return + } + if ok && reply.Err == ErrWrongGroup { + break + } + // ... not ok, or ErrWrongLeader + } + } + time.Sleep(100 * time.Millisecond) + // ask controller for the latest configuration. + ck.config = ck.sm.Query(-1) + } +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} +func (ck *Clerk) Append(key string, value string) { + ck.PutAppend(key, value, "Append") +} diff --git a/shardkv/common.go b/shardkv/common.go new file mode 100644 index 0000000..e183a39 --- /dev/null +++ b/shardkv/common.go @@ -0,0 +1,44 @@ +package shardkv + +// +// Sharded key/value server. +// Lots of replica groups, each running Raft. +// Shardctrler decides which group serves each shard. +// Shardctrler may change shard assignment from time to time. +// +// You will have to modify these definitions. +// + +const ( + OK = "OK" + ErrNoKey = "ErrNoKey" + ErrWrongGroup = "ErrWrongGroup" + ErrWrongLeader = "ErrWrongLeader" +) + +type Err string + +// Put or Append +type PutAppendArgs struct { + // You'll have to add definitions here. + Key string + Value string + Op string // "Put" or "Append" + // You'll have to add definitions here. + // Field names must start with capital letters, + // otherwise RPC will break. +} + +type PutAppendReply struct { + Err Err +} + +type GetArgs struct { + Key string + // You'll have to add definitions here. +} + +type GetReply struct { + Err Err + Value string +} diff --git a/shardkv/config.go b/shardkv/config.go new file mode 100644 index 0000000..63052e4 --- /dev/null +++ b/shardkv/config.go @@ -0,0 +1,402 @@ +package shardkv + +import "6.5840/shardctrler" +import "6.5840/labrpc" +import "testing" +import "os" + +// import "log" +import crand "crypto/rand" +import "math/big" +import "math/rand" +import "encoding/base64" +import "sync" +import "runtime" +import "6.5840/raft" +import "strconv" +import "fmt" +import "time" + +func randstring(n int) string { + b := make([]byte, 2*n) + crand.Read(b) + s := base64.URLEncoding.EncodeToString(b) + return s[0:n] +} + +func makeSeed() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := crand.Int(crand.Reader, max) + x := bigx.Int64() + return x +} + +// Randomize server handles +func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { + sa := make([]*labrpc.ClientEnd, len(kvh)) + copy(sa, kvh) + for i := range sa { + j := rand.Intn(i + 1) + sa[i], sa[j] = sa[j], sa[i] + } + return sa +} + +type group struct { + gid int + servers []*ShardKV + saved []*raft.Persister + endnames [][]string + mendnames [][]string +} + +// a replicated shardctrler service. +type ctrler struct { + n int + servers []*shardctrler.ShardCtrler + names []string + ck *shardctrler.Clerk +} + +type config struct { + mu sync.Mutex + t *testing.T + net *labrpc.Network + start time.Time // time at which make_config() was called + + ctl *ctrler // shardctrler service + + ngroups int + n int // servers per k/v group + groups []*group + + clerks map[*Clerk][]string + nextClientId int + maxraftstate int +} + +func (cfg *config) checkTimeout() { + // enforce a two minute real-time limit on each test + if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { + cfg.t.Fatal("test took longer than 120 seconds") + } +} + +func (cfg *config) cleanup() { + for gi := 0; gi < cfg.ngroups; gi++ { + cfg.ShutdownGroup(gi) + } + for i := 0; i < cfg.ctl.n; i++ { + cfg.ctl.servers[i].Kill() + } + cfg.net.Cleanup() + cfg.checkTimeout() +} + +// check that no server's log is too big. +func (cfg *config) checklogs() { + for gi := 0; gi < cfg.ngroups; gi++ { + for i := 0; i < cfg.n; i++ { + raft := cfg.groups[gi].saved[i].RaftStateSize() + snap := len(cfg.groups[gi].saved[i].ReadSnapshot()) + if cfg.maxraftstate >= 0 && raft > 8*cfg.maxraftstate { + cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v", + raft, cfg.maxraftstate) + } + if cfg.maxraftstate < 0 && snap > 0 { + cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!") + } + } + } +} + +// controller server name for labrpc. +func (ctl *ctrler) ctrlername(i int) string { + return ctl.names[i] +} + +// shard server name for labrpc. +// i'th server of group gid. +func (cfg *config) servername(gid int, i int) string { + return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i) +} + +func (cfg *config) makeClient(ctl *ctrler) *Clerk { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + // ClientEnds to talk to controller service. + ends := make([]*labrpc.ClientEnd, ctl.n) + endnames := make([]string, cfg.n) + for j := 0; j < ctl.n; j++ { + endnames[j] = randstring(20) + ends[j] = cfg.net.MakeEnd(endnames[j]) + cfg.net.Connect(endnames[j], ctl.ctrlername(j)) + cfg.net.Enable(endnames[j], true) + } + + ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd { + name := randstring(20) + end := cfg.net.MakeEnd(name) + cfg.net.Connect(name, servername) + cfg.net.Enable(name, true) + return end + }) + cfg.clerks[ck] = endnames + cfg.nextClientId++ + return ck +} + +func (cfg *config) deleteClient(ck *Clerk) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + v := cfg.clerks[ck] + for i := 0; i < len(v); i++ { + os.Remove(v[i]) + } + delete(cfg.clerks, ck) +} + +// Shutdown i'th server of gi'th group, by isolating it +func (cfg *config) ShutdownServer(gi int, i int) { + cfg.mu.Lock() + defer cfg.mu.Unlock() + + gg := cfg.groups[gi] + + // prevent this server from sending + for j := 0; j < len(gg.servers); j++ { + name := gg.endnames[i][j] + cfg.net.Enable(name, false) + } + for j := 0; j < len(gg.mendnames[i]); j++ { + name := gg.mendnames[i][j] + cfg.net.Enable(name, false) + } + + // disable client connections to the server. + // it's important to do this before creating + // the new Persister in saved[i], to avoid + // the possibility of the server returning a + // positive reply to an Append but persisting + // the result in the superseded Persister. + cfg.net.DeleteServer(cfg.servername(gg.gid, i)) + + // a fresh persister, in case old instance + // continues to update the Persister. + // but copy old persister's content so that we always + // pass Make() the last persisted state. + if gg.saved[i] != nil { + gg.saved[i] = gg.saved[i].Copy() + } + + kv := gg.servers[i] + if kv != nil { + cfg.mu.Unlock() + kv.Kill() + cfg.mu.Lock() + gg.servers[i] = nil + } +} + +func (cfg *config) ShutdownGroup(gi int) { + for i := 0; i < cfg.n; i++ { + cfg.ShutdownServer(gi, i) + } +} + +// start i'th server in gi'th group +func (cfg *config) StartServer(gi int, i int) { + cfg.mu.Lock() + + gg := cfg.groups[gi] + + // a fresh set of outgoing ClientEnd names + // to talk to other servers in this group. + gg.endnames[i] = make([]string, cfg.n) + for j := 0; j < cfg.n; j++ { + gg.endnames[i][j] = randstring(20) + } + + // and the connections to other servers in this group. + ends := make([]*labrpc.ClientEnd, cfg.n) + for j := 0; j < cfg.n; j++ { + ends[j] = cfg.net.MakeEnd(gg.endnames[i][j]) + cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j)) + cfg.net.Enable(gg.endnames[i][j], true) + } + + // ends to talk to shardctrler service + mends := make([]*labrpc.ClientEnd, cfg.ctl.n) + gg.mendnames[i] = make([]string, cfg.ctl.n) + for j := 0; j < cfg.ctl.n; j++ { + gg.mendnames[i][j] = randstring(20) + mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j]) + cfg.net.Connect(gg.mendnames[i][j], cfg.ctl.ctrlername(j)) + cfg.net.Enable(gg.mendnames[i][j], true) + } + + // a fresh persister, so old instance doesn't overwrite + // new instance's persisted state. + // give the fresh persister a copy of the old persister's + // state, so that the spec is that we pass StartKVServer() + // the last persisted state. + if gg.saved[i] != nil { + gg.saved[i] = gg.saved[i].Copy() + } else { + gg.saved[i] = raft.MakePersister() + } + cfg.mu.Unlock() + + gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate, + gg.gid, mends, + func(servername string) *labrpc.ClientEnd { + name := randstring(20) + end := cfg.net.MakeEnd(name) + cfg.net.Connect(name, servername) + cfg.net.Enable(name, true) + return end + }) + + kvsvc := labrpc.MakeService(gg.servers[i]) + rfsvc := labrpc.MakeService(gg.servers[i].rf) + srv := labrpc.MakeServer() + srv.AddService(kvsvc) + srv.AddService(rfsvc) + cfg.net.AddServer(cfg.servername(gg.gid, i), srv) +} + +func (cfg *config) StartGroup(gi int) { + for i := 0; i < cfg.n; i++ { + cfg.StartServer(gi, i) + } +} + +func (cfg *config) StartCtrlerServer(ctl *ctrler, i int) { + // ClientEnds to talk to other controller replicas. + ends := make([]*labrpc.ClientEnd, ctl.n) + for j := 0; j < ctl.n; j++ { + endname := randstring(20) + ends[j] = cfg.net.MakeEnd(endname) + cfg.net.Connect(endname, ctl.ctrlername(j)) + cfg.net.Enable(endname, true) + } + + p := raft.MakePersister() + + ctl.servers[i] = shardctrler.StartServer(ends, i, p) + + msvc := labrpc.MakeService(ctl.servers[i]) + rfsvc := labrpc.MakeService(ctl.servers[i].Raft()) + srv := labrpc.MakeServer() + srv.AddService(msvc) + srv.AddService(rfsvc) + cfg.net.AddServer(ctl.ctrlername(i), srv) +} + +func (cfg *config) ctrlerclerk(ctl *ctrler) *shardctrler.Clerk { + // ClientEnds to talk to ctrler service. + ends := make([]*labrpc.ClientEnd, ctl.n) + for j := 0; j < ctl.n; j++ { + name := randstring(20) + ends[j] = cfg.net.MakeEnd(name) + cfg.net.Connect(name, ctl.ctrlername(j)) + cfg.net.Enable(name, true) + } + + return shardctrler.MakeClerk(ends) +} + +// tell the shardctrler that a group is joining. +func (cfg *config) join(gi int) { + cfg.joinm([]int{gi}, cfg.ctl) +} + +func (cfg *config) ctljoin(gi int, ctl *ctrler) { + cfg.joinm([]int{gi}, ctl) +} + +func (cfg *config) joinm(gis []int, ctl *ctrler) { + m := make(map[int][]string, len(gis)) + for _, g := range gis { + gid := cfg.groups[g].gid + servernames := make([]string, cfg.n) + for i := 0; i < cfg.n; i++ { + servernames[i] = cfg.servername(gid, i) + } + m[gid] = servernames + } + ctl.ck.Join(m) +} + +// tell the shardctrler that a group is leaving. +func (cfg *config) leave(gi int) { + cfg.leavem([]int{gi}) +} + +func (cfg *config) leavem(gis []int) { + gids := make([]int, 0, len(gis)) + for _, g := range gis { + gids = append(gids, cfg.groups[g].gid) + } + cfg.ctl.ck.Leave(gids) +} + +func (cfg *config) StartCtrlerService() *ctrler { + ctl := &ctrler{} + ctl.n = 3 + ctl.servers = make([]*shardctrler.ShardCtrler, ctl.n) + ctl.names = make([]string, ctl.n) + for i := 0; i < ctl.n; i++ { + ctl.names[i] = "ctlr-" + randstring(20) + } + for i := 0; i < ctl.n; i++ { + cfg.StartCtrlerServer(ctl, i) + } + ctl.ck = cfg.ctrlerclerk(ctl) + return ctl +} + +var ncpu_once sync.Once + +func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config { + ncpu_once.Do(func() { + if runtime.NumCPU() < 2 { + fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") + } + rand.Seed(makeSeed()) + }) + runtime.GOMAXPROCS(4) + cfg := &config{} + cfg.t = t + cfg.maxraftstate = maxraftstate + cfg.net = labrpc.MakeNetwork() + cfg.start = time.Now() + + // controller and its clerk + cfg.ctl = cfg.StartCtrlerService() + + cfg.ngroups = 3 + cfg.groups = make([]*group, cfg.ngroups) + cfg.n = n + for gi := 0; gi < cfg.ngroups; gi++ { + gg := &group{} + cfg.groups[gi] = gg + gg.gid = 100 + gi + gg.servers = make([]*ShardKV, cfg.n) + gg.saved = make([]*raft.Persister, cfg.n) + gg.endnames = make([][]string, cfg.n) + gg.mendnames = make([][]string, cfg.ctl.n) + for i := 0; i < cfg.n; i++ { + cfg.StartServer(gi, i) + } + } + + cfg.clerks = make(map[*Clerk][]string) + cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid + + cfg.net.Reliable(!unreliable) + + return cfg +} diff --git a/shardkv/server.go b/shardkv/server.go new file mode 100644 index 0000000..fe05812 --- /dev/null +++ b/shardkv/server.go @@ -0,0 +1,97 @@ +package shardkv + + +import "6.5840/labrpc" +import "6.5840/raft" +import "sync" +import "6.5840/labgob" + + + +type Op struct { + // Your definitions here. + // Field names must start with capital letters, + // otherwise RPC will break. +} + +type ShardKV struct { + mu sync.Mutex + me int + rf *raft.Raft + applyCh chan raft.ApplyMsg + make_end func(string) *labrpc.ClientEnd + gid int + ctrlers []*labrpc.ClientEnd + maxraftstate int // snapshot if log grows this big + + // Your definitions here. +} + + +func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) { + // Your code here. +} + +func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { + // Your code here. +} + +// the tester calls Kill() when a ShardKV instance won't +// be needed again. you are not required to do anything +// in Kill(), but it might be convenient to (for example) +// turn off debug output from this instance. +func (kv *ShardKV) Kill() { + kv.rf.Kill() + // Your code here, if desired. +} + + +// servers[] contains the ports of the servers in this group. +// +// me is the index of the current server in servers[]. +// +// the k/v server should store snapshots through the underlying Raft +// implementation, which should call persister.SaveStateAndSnapshot() to +// atomically save the Raft state along with the snapshot. +// +// the k/v server should snapshot when Raft's saved state exceeds +// maxraftstate bytes, in order to allow Raft to garbage-collect its +// log. if maxraftstate is -1, you don't need to snapshot. +// +// gid is this group's GID, for interacting with the shardctrler. +// +// pass ctrlers[] to shardctrler.MakeClerk() so you can send +// RPCs to the shardctrler. +// +// make_end(servername) turns a server name from a +// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can +// send RPCs. You'll need this to send RPCs to other groups. +// +// look at client.go for examples of how to use ctrlers[] +// and make_end() to send RPCs to the group owning a specific shard. +// +// StartServer() must return quickly, so it should start goroutines +// for any long-running work. +func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV { + // call labgob.Register on structures you want + // Go's RPC library to marshall/unmarshall. + labgob.Register(Op{}) + + kv := new(ShardKV) + kv.me = me + kv.maxraftstate = maxraftstate + kv.make_end = make_end + kv.gid = gid + kv.ctrlers = ctrlers + + // Your initialization code here. + + // Use something like this to talk to the shardctrler: + // kv.mck = shardctrler.MakeClerk(kv.ctrlers) + + kv.applyCh = make(chan raft.ApplyMsg) + kv.rf = raft.Make(servers, me, persister, kv.applyCh) + + + return kv +} diff --git a/shardkv/test_test.go b/shardkv/test_test.go new file mode 100644 index 0000000..83f2e0f --- /dev/null +++ b/shardkv/test_test.go @@ -0,0 +1,1020 @@ +package shardkv + +import "6.5840/porcupine" +import "6.5840/models" +import "testing" +import "strconv" +import "time" +import "fmt" +import "sync/atomic" +import "sync" +import "math/rand" +import "io/ioutil" + +const linearizabilityCheckTimeout = 1 * time.Second + +func check(t *testing.T, ck *Clerk, key string, value string) { + v := ck.Get(key) + if v != value { + t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v) + } +} + +// test static 2-way sharding, without shard movement. +func TestStaticShards5A(t *testing.T) { + fmt.Printf("Test (5A): static shards ...\n") + + cfg := make_config(t, 3, false, -1) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + cfg.join(1) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(20) + ck.Put(ka[i], va[i]) + } + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + // make sure that the data really is sharded by + // shutting down one shard and checking that some + // Get()s don't succeed. + cfg.ShutdownGroup(1) + cfg.checklogs() // forbid snapshots + + ch := make(chan string) + for xi := 0; xi < n; xi++ { + ck1 := cfg.makeClient(cfg.ctl) // only one call allowed per client + go func(i int) { + v := ck1.Get(ka[i]) + if v != va[i] { + ch <- fmt.Sprintf("Get(%v): expected:\n%v\nreceived:\n%v", ka[i], va[i], v) + } else { + ch <- "" + } + }(xi) + } + + // wait a bit, only about half the Gets should succeed. + ndone := 0 + done := false + for done == false { + select { + case err := <-ch: + if err != "" { + t.Fatal(err) + } + ndone += 1 + case <-time.After(time.Second * 2): + done = true + break + } + } + + if ndone != n/2 { + t.Fatalf("expected %v completions with one shard dead; got %v\n", + n/2, ndone) + } + + // bring the crashed shard/group back to life. + cfg.StartGroup(1) + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +// do servers reject operations on shards for +// which they are not responsible? +func TestRejection5A(t *testing.T) { + fmt.Printf("Test (5A): rejection ...\n") + + cfg := make_config(t, 3, false, -1) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + cfg.join(1) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(20) + ck.Put(ka[i], va[i]) + } + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + // now create a separate controller that thinks + // there is only one group, handling all shards. + // the k/v servers talk to the original controller, + // so the k/v servers still think the shards are + // divided between the two k/v groups. + ctl1 := cfg.StartCtrlerService() + cfg.ctljoin(0, ctl1) + + // ask clients that talk to ctl1 to fetch keys. + // they'll send all requests to a single k/v group. + // half the requests should be rejected due to + // being sent to the a k/v group that doesn't think it + // is handling the shard. + ch := make(chan string) + for xi := 0; xi < n; xi++ { + ck1 := cfg.makeClient(ctl1) + go func(i int) { + v := ck1.Get(ka[i]) + if v != va[i] { + if v == "" { + // if v is "", it probably means that a k/v group + // returned a value for a key even though that + // key's shard wasn't assigned to to the group. + ch <- fmt.Sprintf("Get(%v): returned a value, but server should have rejected the request due to wrong shard", ka[i]) + } else { + ch <- fmt.Sprintf("Get(%v): expected:\n%v\nreceived:\n%v", ka[i], va[i], v) + } + } else { + ch <- "" + } + }(xi) + } + + // wait a bit, only about half the Gets should succeed. + ndone := 0 + done := false + for done == false { + select { + case err := <-ch: + if err != "" { + t.Fatal(err) + } + ndone += 1 + case <-time.After(time.Second * 2): + done = true + break + } + } + + if ndone != n/2 { + t.Fatalf("expected %v completions; got %v\n", n/2, ndone) + } + + fmt.Printf(" ... Passed\n") +} + +func TestJoinLeave5B(t *testing.T) { + fmt.Printf("Test (5B): join then leave ...\n") + + cfg := make_config(t, 3, false, -1) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(5) + ck.Put(ka[i], va[i]) + } + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + cfg.join(1) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + x := randstring(5) + ck.Append(ka[i], x) + va[i] += x + } + + cfg.leave(0) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + x := randstring(5) + ck.Append(ka[i], x) + va[i] += x + } + + // allow time for shards to transfer. + time.Sleep(1 * time.Second) + + cfg.checklogs() + cfg.ShutdownGroup(0) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +func TestSnapshot5B(t *testing.T) { + fmt.Printf("Test (5B): snapshots, join, and leave ...\n") + + cfg := make_config(t, 3, false, 1000) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + + n := 30 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(20) + ck.Put(ka[i], va[i]) + } + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + cfg.join(1) + cfg.join(2) + cfg.leave(0) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + x := randstring(20) + ck.Append(ka[i], x) + va[i] += x + } + + cfg.leave(1) + cfg.join(0) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + x := randstring(20) + ck.Append(ka[i], x) + va[i] += x + } + + time.Sleep(1 * time.Second) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + time.Sleep(1 * time.Second) + + cfg.checklogs() + + cfg.ShutdownGroup(0) + cfg.ShutdownGroup(1) + cfg.ShutdownGroup(2) + + cfg.StartGroup(0) + cfg.StartGroup(1) + cfg.StartGroup(2) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +func TestMissChange5B(t *testing.T) { + fmt.Printf("Test (5B): servers miss configuration changes...\n") + + cfg := make_config(t, 3, false, 1000) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(20) + ck.Put(ka[i], va[i]) + } + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + cfg.join(1) + + cfg.ShutdownServer(0, 0) + cfg.ShutdownServer(1, 0) + cfg.ShutdownServer(2, 0) + + cfg.join(2) + cfg.leave(1) + cfg.leave(0) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + x := randstring(20) + ck.Append(ka[i], x) + va[i] += x + } + + cfg.join(1) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + x := randstring(20) + ck.Append(ka[i], x) + va[i] += x + } + + cfg.StartServer(0, 0) + cfg.StartServer(1, 0) + cfg.StartServer(2, 0) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + x := randstring(20) + ck.Append(ka[i], x) + va[i] += x + } + + time.Sleep(2 * time.Second) + + cfg.ShutdownServer(0, 1) + cfg.ShutdownServer(1, 1) + cfg.ShutdownServer(2, 1) + + cfg.join(0) + cfg.leave(2) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + x := randstring(20) + ck.Append(ka[i], x) + va[i] += x + } + + cfg.StartServer(0, 1) + cfg.StartServer(1, 1) + cfg.StartServer(2, 1) + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +func TestConcurrent1_5B(t *testing.T) { + fmt.Printf("Test (5B): concurrent puts and configuration changes...\n") + + cfg := make_config(t, 3, false, 100) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(5) + ck.Put(ka[i], va[i]) + } + + var done int32 + ch := make(chan bool) + + ff := func(i int) { + defer func() { ch <- true }() + ck1 := cfg.makeClient(cfg.ctl) + for atomic.LoadInt32(&done) == 0 { + x := randstring(5) + ck1.Append(ka[i], x) + va[i] += x + time.Sleep(10 * time.Millisecond) + } + } + + for i := 0; i < n; i++ { + go ff(i) + } + + time.Sleep(150 * time.Millisecond) + cfg.join(1) + time.Sleep(500 * time.Millisecond) + cfg.join(2) + time.Sleep(500 * time.Millisecond) + cfg.leave(0) + + cfg.ShutdownGroup(0) + time.Sleep(100 * time.Millisecond) + cfg.ShutdownGroup(1) + time.Sleep(100 * time.Millisecond) + cfg.ShutdownGroup(2) + + cfg.leave(2) + + time.Sleep(100 * time.Millisecond) + cfg.StartGroup(0) + cfg.StartGroup(1) + cfg.StartGroup(2) + + time.Sleep(100 * time.Millisecond) + cfg.join(0) + cfg.leave(1) + time.Sleep(500 * time.Millisecond) + cfg.join(1) + + time.Sleep(1 * time.Second) + + atomic.StoreInt32(&done, 1) + for i := 0; i < n; i++ { + <-ch + } + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +// this tests the various sources from which a re-starting +// group might need to fetch shard contents. +func TestConcurrent2_5B(t *testing.T) { + fmt.Printf("Test (5B): more concurrent puts and configuration changes...\n") + + cfg := make_config(t, 3, false, -1) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(1) + cfg.join(0) + cfg.join(2) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(1) + ck.Put(ka[i], va[i]) + } + + var done int32 + ch := make(chan bool) + + ff := func(i int, ck1 *Clerk) { + defer func() { ch <- true }() + for atomic.LoadInt32(&done) == 0 { + x := randstring(1) + ck1.Append(ka[i], x) + va[i] += x + time.Sleep(50 * time.Millisecond) + } + } + + for i := 0; i < n; i++ { + ck1 := cfg.makeClient(cfg.ctl) + go ff(i, ck1) + } + + cfg.leave(0) + cfg.leave(2) + time.Sleep(3000 * time.Millisecond) + cfg.join(0) + cfg.join(2) + cfg.leave(1) + time.Sleep(3000 * time.Millisecond) + cfg.join(1) + cfg.leave(0) + cfg.leave(2) + time.Sleep(3000 * time.Millisecond) + + cfg.ShutdownGroup(1) + cfg.ShutdownGroup(2) + time.Sleep(1000 * time.Millisecond) + cfg.StartGroup(1) + cfg.StartGroup(2) + + time.Sleep(2 * time.Second) + + atomic.StoreInt32(&done, 1) + for i := 0; i < n; i++ { + <-ch + } + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +func TestConcurrent3_5B(t *testing.T) { + fmt.Printf("Test (5B): concurrent configuration change and restart...\n") + + cfg := make_config(t, 3, false, 300) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) + va[i] = randstring(1) + ck.Put(ka[i], va[i]) + } + + var done int32 + ch := make(chan bool) + + ff := func(i int, ck1 *Clerk) { + defer func() { ch <- true }() + for atomic.LoadInt32(&done) == 0 { + x := randstring(1) + ck1.Append(ka[i], x) + va[i] += x + } + } + + for i := 0; i < n; i++ { + ck1 := cfg.makeClient(cfg.ctl) + go ff(i, ck1) + } + + t0 := time.Now() + for time.Since(t0) < 12*time.Second { + cfg.join(2) + cfg.join(1) + time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond) + cfg.ShutdownGroup(0) + cfg.ShutdownGroup(1) + cfg.ShutdownGroup(2) + cfg.StartGroup(0) + cfg.StartGroup(1) + cfg.StartGroup(2) + + time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond) + cfg.leave(1) + cfg.leave(2) + time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond) + } + + time.Sleep(2 * time.Second) + + atomic.StoreInt32(&done, 1) + for i := 0; i < n; i++ { + <-ch + } + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +func TestUnreliable1_5B(t *testing.T) { + fmt.Printf("Test (5B): unreliable 1...\n") + + cfg := make_config(t, 3, true, 100) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(5) + ck.Put(ka[i], va[i]) + } + + cfg.join(1) + cfg.join(2) + cfg.leave(0) + + for ii := 0; ii < n*2; ii++ { + i := ii % n + check(t, ck, ka[i], va[i]) + x := randstring(5) + ck.Append(ka[i], x) + va[i] += x + } + + cfg.join(0) + cfg.leave(1) + + for ii := 0; ii < n*2; ii++ { + i := ii % n + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +func TestUnreliable2_5B(t *testing.T) { + fmt.Printf("Test (5B): unreliable 2...\n") + + cfg := make_config(t, 3, true, 100) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(5) + ck.Put(ka[i], va[i]) + } + + var done int32 + ch := make(chan bool) + + ff := func(i int) { + defer func() { ch <- true }() + ck1 := cfg.makeClient(cfg.ctl) + for atomic.LoadInt32(&done) == 0 { + x := randstring(5) + ck1.Append(ka[i], x) + va[i] += x + } + } + + for i := 0; i < n; i++ { + go ff(i) + } + + time.Sleep(150 * time.Millisecond) + cfg.join(1) + time.Sleep(500 * time.Millisecond) + cfg.join(2) + time.Sleep(500 * time.Millisecond) + cfg.leave(0) + time.Sleep(500 * time.Millisecond) + cfg.leave(1) + time.Sleep(500 * time.Millisecond) + cfg.join(1) + cfg.join(0) + + time.Sleep(2 * time.Second) + + atomic.StoreInt32(&done, 1) + cfg.net.Reliable(true) + for i := 0; i < n; i++ { + <-ch + } + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +func TestUnreliable3_5B(t *testing.T) { + fmt.Printf("Test (5B): unreliable 3...\n") + + cfg := make_config(t, 3, true, 100) + defer cfg.cleanup() + + begin := time.Now() + var operations []porcupine.Operation + var opMu sync.Mutex + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = randstring(5) + start := int64(time.Since(begin)) + ck.Put(ka[i], va[i]) + end := int64(time.Since(begin)) + inp := models.KvInput{Op: 1, Key: ka[i], Value: va[i]} + var out models.KvOutput + op := porcupine.Operation{Input: inp, Call: start, Output: out, Return: end, ClientId: 0} + operations = append(operations, op) + } + + var done int32 + ch := make(chan bool) + + ff := func(i int) { + defer func() { ch <- true }() + ck1 := cfg.makeClient(cfg.ctl) + for atomic.LoadInt32(&done) == 0 { + ki := rand.Int() % n + nv := randstring(5) + var inp models.KvInput + var out models.KvOutput + start := int64(time.Since(begin)) + if (rand.Int() % 1000) < 500 { + ck1.Append(ka[ki], nv) + inp = models.KvInput{Op: 2, Key: ka[ki], Value: nv} + } else if (rand.Int() % 1000) < 100 { + ck1.Put(ka[ki], nv) + inp = models.KvInput{Op: 1, Key: ka[ki], Value: nv} + } else { + v := ck1.Get(ka[ki]) + inp = models.KvInput{Op: 0, Key: ka[ki]} + out = models.KvOutput{Value: v} + } + end := int64(time.Since(begin)) + op := porcupine.Operation{Input: inp, Call: start, Output: out, Return: end, ClientId: i} + opMu.Lock() + operations = append(operations, op) + opMu.Unlock() + } + } + + for i := 0; i < n; i++ { + go ff(i) + } + + time.Sleep(150 * time.Millisecond) + cfg.join(1) + time.Sleep(500 * time.Millisecond) + cfg.join(2) + time.Sleep(500 * time.Millisecond) + cfg.leave(0) + time.Sleep(500 * time.Millisecond) + cfg.leave(1) + time.Sleep(500 * time.Millisecond) + cfg.join(1) + cfg.join(0) + + time.Sleep(2 * time.Second) + + atomic.StoreInt32(&done, 1) + cfg.net.Reliable(true) + for i := 0; i < n; i++ { + <-ch + } + + res, info := porcupine.CheckOperationsVerbose(models.KvModel, operations, linearizabilityCheckTimeout) + if res == porcupine.Illegal { + file, err := ioutil.TempFile("", "*.html") + if err != nil { + fmt.Printf("info: failed to create temp file for visualization") + } else { + err = porcupine.Visualize(models.KvModel, info, file) + if err != nil { + fmt.Printf("info: failed to write history visualization to %s\n", file.Name()) + } else { + fmt.Printf("info: wrote history visualization to %s\n", file.Name()) + } + } + t.Fatal("history is not linearizable") + } else if res == porcupine.Unknown { + fmt.Println("info: linearizability check timed out, assuming history is ok") + } + + fmt.Printf(" ... Passed\n") +} + +// optional test to see whether servers are deleting +// shards for which they are no longer responsible. +func TestChallenge1Delete(t *testing.T) { + fmt.Printf("Test: shard deletion (challenge 1) ...\n") + + // "1" means force snapshot after every log entry. + cfg := make_config(t, 3, false, 1) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + cfg.join(0) + + // 30,000 bytes of total values. + n := 30 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) + va[i] = randstring(1000) + ck.Put(ka[i], va[i]) + } + for i := 0; i < 3; i++ { + check(t, ck, ka[i], va[i]) + } + + for iters := 0; iters < 2; iters++ { + cfg.join(1) + cfg.leave(0) + cfg.join(2) + time.Sleep(3 * time.Second) + for i := 0; i < 3; i++ { + check(t, ck, ka[i], va[i]) + } + cfg.leave(1) + cfg.join(0) + cfg.leave(2) + time.Sleep(3 * time.Second) + for i := 0; i < 3; i++ { + check(t, ck, ka[i], va[i]) + } + } + + cfg.join(1) + cfg.join(2) + time.Sleep(1 * time.Second) + for i := 0; i < 3; i++ { + check(t, ck, ka[i], va[i]) + } + time.Sleep(1 * time.Second) + for i := 0; i < 3; i++ { + check(t, ck, ka[i], va[i]) + } + time.Sleep(1 * time.Second) + for i := 0; i < 3; i++ { + check(t, ck, ka[i], va[i]) + } + + total := 0 + for gi := 0; gi < cfg.ngroups; gi++ { + for i := 0; i < cfg.n; i++ { + raft := cfg.groups[gi].saved[i].RaftStateSize() + snap := len(cfg.groups[gi].saved[i].ReadSnapshot()) + total += raft + snap + } + } + + // 27 keys should be stored once. + // 3 keys should also be stored in client dup tables. + // everything on 3 replicas. + // plus slop. + expected := 3 * (((n - 3) * 1000) + 2*3*1000 + 6000) + if total > expected { + t.Fatalf("snapshot + persisted Raft state are too big: %v > %v\n", total, expected) + } + + for i := 0; i < n; i++ { + check(t, ck, ka[i], va[i]) + } + + fmt.Printf(" ... Passed\n") +} + +// optional test to see whether servers can handle +// shards that are not affected by a config change +// while the config change is underway +func TestChallenge2Unaffected(t *testing.T) { + fmt.Printf("Test: unaffected shard access (challenge 2) ...\n") + + cfg := make_config(t, 3, true, 100) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + // JOIN 100 + cfg.join(0) + + // Do a bunch of puts to keys in all shards + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = "100" + ck.Put(ka[i], va[i]) + } + + // JOIN 101 + cfg.join(1) + + // QUERY to find shards now owned by 101 + c := cfg.ctl.ck.Query(-1) + owned := make(map[int]bool, n) + for s, gid := range c.Shards { + owned[s] = gid == cfg.groups[1].gid + } + + // Wait for migration to new config to complete, and for clients to + // start using this updated config. Gets to any key k such that + // owned[shard(k)] == true should now be served by group 101. + <-time.After(1 * time.Second) + for i := 0; i < n; i++ { + if owned[i] { + va[i] = "101" + ck.Put(ka[i], va[i]) + } + } + + // KILL 100 + cfg.ShutdownGroup(0) + + // LEAVE 100 + // 101 doesn't get a chance to migrate things previously owned by 100 + cfg.leave(0) + + // Wait to make sure clients see new config + <-time.After(1 * time.Second) + + // And finally: check that gets/puts for 101-owned keys still complete + for i := 0; i < n; i++ { + shard := int(ka[i][0]) % 10 + if owned[shard] { + check(t, ck, ka[i], va[i]) + ck.Put(ka[i], va[i]+"-1") + check(t, ck, ka[i], va[i]+"-1") + } + } + + fmt.Printf(" ... Passed\n") +} + +// optional test to see whether servers can handle operations on shards that +// have been received as a part of a config migration when the entire migration +// has not yet completed. +func TestChallenge2Partial(t *testing.T) { + fmt.Printf("Test: partial migration shard access (challenge 2) ...\n") + + cfg := make_config(t, 3, true, 100) + defer cfg.cleanup() + + ck := cfg.makeClient(cfg.ctl) + + // JOIN 100 + 101 + 102 + cfg.joinm([]int{0, 1, 2}, cfg.ctl) + + // Give the implementation some time to reconfigure + <-time.After(1 * time.Second) + + // Do a bunch of puts to keys in all shards + n := 10 + ka := make([]string, n) + va := make([]string, n) + for i := 0; i < n; i++ { + ka[i] = strconv.Itoa(i) // ensure multiple shards + va[i] = "100" + ck.Put(ka[i], va[i]) + } + + // QUERY to find shards owned by 102 + c := cfg.ctl.ck.Query(-1) + owned := make(map[int]bool, n) + for s, gid := range c.Shards { + owned[s] = gid == cfg.groups[2].gid + } + + // KILL 100 + cfg.ShutdownGroup(0) + + // LEAVE 100 + 102 + // 101 can get old shards from 102, but not from 100. 101 should start + // serving shards that used to belong to 102 as soon as possible + cfg.leavem([]int{0, 2}) + + // Give the implementation some time to start reconfiguration + // And to migrate 102 -> 101 + <-time.After(1 * time.Second) + + // And finally: check that gets/puts for 101-owned keys now complete + for i := 0; i < n; i++ { + shard := key2shard(ka[i]) + if owned[shard] { + check(t, ck, ka[i], va[i]) + ck.Put(ka[i], va[i]+"-2") + check(t, ck, ka[i], va[i]+"-2") + } + } + + fmt.Printf(" ... Passed\n") +}