403 lines
9.6 KiB
Go
403 lines
9.6 KiB
Go
package shardkv
|
|
|
|
import "6.5840/shardctrler"
|
|
import "6.5840/labrpc"
|
|
import "testing"
|
|
import "os"
|
|
|
|
// import "log"
|
|
import crand "crypto/rand"
|
|
import "math/big"
|
|
import "math/rand"
|
|
import "encoding/base64"
|
|
import "sync"
|
|
import "runtime"
|
|
import "6.5840/raft"
|
|
import "strconv"
|
|
import "fmt"
|
|
import "time"
|
|
|
|
func randstring(n int) string {
|
|
b := make([]byte, 2*n)
|
|
crand.Read(b)
|
|
s := base64.URLEncoding.EncodeToString(b)
|
|
return s[0:n]
|
|
}
|
|
|
|
func makeSeed() int64 {
|
|
max := big.NewInt(int64(1) << 62)
|
|
bigx, _ := crand.Int(crand.Reader, max)
|
|
x := bigx.Int64()
|
|
return x
|
|
}
|
|
|
|
// Randomize server handles
|
|
func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
|
|
sa := make([]*labrpc.ClientEnd, len(kvh))
|
|
copy(sa, kvh)
|
|
for i := range sa {
|
|
j := rand.Intn(i + 1)
|
|
sa[i], sa[j] = sa[j], sa[i]
|
|
}
|
|
return sa
|
|
}
|
|
|
|
type group struct {
|
|
gid int
|
|
servers []*ShardKV
|
|
saved []*raft.Persister
|
|
endnames [][]string
|
|
mendnames [][]string
|
|
}
|
|
|
|
// a replicated shardctrler service.
|
|
type ctrler struct {
|
|
n int
|
|
servers []*shardctrler.ShardCtrler
|
|
names []string
|
|
ck *shardctrler.Clerk
|
|
}
|
|
|
|
type config struct {
|
|
mu sync.Mutex
|
|
t *testing.T
|
|
net *labrpc.Network
|
|
start time.Time // time at which make_config() was called
|
|
|
|
ctl *ctrler // shardctrler service
|
|
|
|
ngroups int
|
|
n int // servers per k/v group
|
|
groups []*group
|
|
|
|
clerks map[*Clerk][]string
|
|
nextClientId int
|
|
maxraftstate int
|
|
}
|
|
|
|
func (cfg *config) checkTimeout() {
|
|
// enforce a two minute real-time limit on each test
|
|
if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
|
|
cfg.t.Fatal("test took longer than 120 seconds")
|
|
}
|
|
}
|
|
|
|
func (cfg *config) cleanup() {
|
|
for gi := 0; gi < cfg.ngroups; gi++ {
|
|
cfg.ShutdownGroup(gi)
|
|
}
|
|
for i := 0; i < cfg.ctl.n; i++ {
|
|
cfg.ctl.servers[i].Kill()
|
|
}
|
|
cfg.net.Cleanup()
|
|
cfg.checkTimeout()
|
|
}
|
|
|
|
// check that no server's log is too big.
|
|
func (cfg *config) checklogs() {
|
|
for gi := 0; gi < cfg.ngroups; gi++ {
|
|
for i := 0; i < cfg.n; i++ {
|
|
raft := cfg.groups[gi].saved[i].RaftStateSize()
|
|
snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
|
|
if cfg.maxraftstate >= 0 && raft > 8*cfg.maxraftstate {
|
|
cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v",
|
|
raft, cfg.maxraftstate)
|
|
}
|
|
if cfg.maxraftstate < 0 && snap > 0 {
|
|
cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// controller server name for labrpc.
|
|
func (ctl *ctrler) ctrlername(i int) string {
|
|
return ctl.names[i]
|
|
}
|
|
|
|
// shard server name for labrpc.
|
|
// i'th server of group gid.
|
|
func (cfg *config) servername(gid int, i int) string {
|
|
return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i)
|
|
}
|
|
|
|
func (cfg *config) makeClient(ctl *ctrler) *Clerk {
|
|
cfg.mu.Lock()
|
|
defer cfg.mu.Unlock()
|
|
|
|
// ClientEnds to talk to controller service.
|
|
ends := make([]*labrpc.ClientEnd, ctl.n)
|
|
endnames := make([]string, cfg.n)
|
|
for j := 0; j < ctl.n; j++ {
|
|
endnames[j] = randstring(20)
|
|
ends[j] = cfg.net.MakeEnd(endnames[j])
|
|
cfg.net.Connect(endnames[j], ctl.ctrlername(j))
|
|
cfg.net.Enable(endnames[j], true)
|
|
}
|
|
|
|
ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd {
|
|
name := randstring(20)
|
|
end := cfg.net.MakeEnd(name)
|
|
cfg.net.Connect(name, servername)
|
|
cfg.net.Enable(name, true)
|
|
return end
|
|
})
|
|
cfg.clerks[ck] = endnames
|
|
cfg.nextClientId++
|
|
return ck
|
|
}
|
|
|
|
func (cfg *config) deleteClient(ck *Clerk) {
|
|
cfg.mu.Lock()
|
|
defer cfg.mu.Unlock()
|
|
|
|
v := cfg.clerks[ck]
|
|
for i := 0; i < len(v); i++ {
|
|
os.Remove(v[i])
|
|
}
|
|
delete(cfg.clerks, ck)
|
|
}
|
|
|
|
// Shutdown i'th server of gi'th group, by isolating it
|
|
func (cfg *config) ShutdownServer(gi int, i int) {
|
|
cfg.mu.Lock()
|
|
defer cfg.mu.Unlock()
|
|
|
|
gg := cfg.groups[gi]
|
|
|
|
// prevent this server from sending
|
|
for j := 0; j < len(gg.servers); j++ {
|
|
name := gg.endnames[i][j]
|
|
cfg.net.Enable(name, false)
|
|
}
|
|
for j := 0; j < len(gg.mendnames[i]); j++ {
|
|
name := gg.mendnames[i][j]
|
|
cfg.net.Enable(name, false)
|
|
}
|
|
|
|
// disable client connections to the server.
|
|
// it's important to do this before creating
|
|
// the new Persister in saved[i], to avoid
|
|
// the possibility of the server returning a
|
|
// positive reply to an Append but persisting
|
|
// the result in the superseded Persister.
|
|
cfg.net.DeleteServer(cfg.servername(gg.gid, i))
|
|
|
|
// a fresh persister, in case old instance
|
|
// continues to update the Persister.
|
|
// but copy old persister's content so that we always
|
|
// pass Make() the last persisted state.
|
|
if gg.saved[i] != nil {
|
|
gg.saved[i] = gg.saved[i].Copy()
|
|
}
|
|
|
|
kv := gg.servers[i]
|
|
if kv != nil {
|
|
cfg.mu.Unlock()
|
|
kv.Kill()
|
|
cfg.mu.Lock()
|
|
gg.servers[i] = nil
|
|
}
|
|
}
|
|
|
|
func (cfg *config) ShutdownGroup(gi int) {
|
|
for i := 0; i < cfg.n; i++ {
|
|
cfg.ShutdownServer(gi, i)
|
|
}
|
|
}
|
|
|
|
// start i'th server in gi'th group
|
|
func (cfg *config) StartServer(gi int, i int) {
|
|
cfg.mu.Lock()
|
|
|
|
gg := cfg.groups[gi]
|
|
|
|
// a fresh set of outgoing ClientEnd names
|
|
// to talk to other servers in this group.
|
|
gg.endnames[i] = make([]string, cfg.n)
|
|
for j := 0; j < cfg.n; j++ {
|
|
gg.endnames[i][j] = randstring(20)
|
|
}
|
|
|
|
// and the connections to other servers in this group.
|
|
ends := make([]*labrpc.ClientEnd, cfg.n)
|
|
for j := 0; j < cfg.n; j++ {
|
|
ends[j] = cfg.net.MakeEnd(gg.endnames[i][j])
|
|
cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j))
|
|
cfg.net.Enable(gg.endnames[i][j], true)
|
|
}
|
|
|
|
// ends to talk to shardctrler service
|
|
mends := make([]*labrpc.ClientEnd, cfg.ctl.n)
|
|
gg.mendnames[i] = make([]string, cfg.ctl.n)
|
|
for j := 0; j < cfg.ctl.n; j++ {
|
|
gg.mendnames[i][j] = randstring(20)
|
|
mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j])
|
|
cfg.net.Connect(gg.mendnames[i][j], cfg.ctl.ctrlername(j))
|
|
cfg.net.Enable(gg.mendnames[i][j], true)
|
|
}
|
|
|
|
// a fresh persister, so old instance doesn't overwrite
|
|
// new instance's persisted state.
|
|
// give the fresh persister a copy of the old persister's
|
|
// state, so that the spec is that we pass StartKVServer()
|
|
// the last persisted state.
|
|
if gg.saved[i] != nil {
|
|
gg.saved[i] = gg.saved[i].Copy()
|
|
} else {
|
|
gg.saved[i] = raft.MakePersister()
|
|
}
|
|
cfg.mu.Unlock()
|
|
|
|
gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate,
|
|
gg.gid, mends,
|
|
func(servername string) *labrpc.ClientEnd {
|
|
name := randstring(20)
|
|
end := cfg.net.MakeEnd(name)
|
|
cfg.net.Connect(name, servername)
|
|
cfg.net.Enable(name, true)
|
|
return end
|
|
})
|
|
|
|
kvsvc := labrpc.MakeService(gg.servers[i])
|
|
rfsvc := labrpc.MakeService(gg.servers[i].rf)
|
|
srv := labrpc.MakeServer()
|
|
srv.AddService(kvsvc)
|
|
srv.AddService(rfsvc)
|
|
cfg.net.AddServer(cfg.servername(gg.gid, i), srv)
|
|
}
|
|
|
|
func (cfg *config) StartGroup(gi int) {
|
|
for i := 0; i < cfg.n; i++ {
|
|
cfg.StartServer(gi, i)
|
|
}
|
|
}
|
|
|
|
func (cfg *config) StartCtrlerServer(ctl *ctrler, i int) {
|
|
// ClientEnds to talk to other controller replicas.
|
|
ends := make([]*labrpc.ClientEnd, ctl.n)
|
|
for j := 0; j < ctl.n; j++ {
|
|
endname := randstring(20)
|
|
ends[j] = cfg.net.MakeEnd(endname)
|
|
cfg.net.Connect(endname, ctl.ctrlername(j))
|
|
cfg.net.Enable(endname, true)
|
|
}
|
|
|
|
p := raft.MakePersister()
|
|
|
|
ctl.servers[i] = shardctrler.StartServer(ends, i, p)
|
|
|
|
msvc := labrpc.MakeService(ctl.servers[i])
|
|
rfsvc := labrpc.MakeService(ctl.servers[i].Raft())
|
|
srv := labrpc.MakeServer()
|
|
srv.AddService(msvc)
|
|
srv.AddService(rfsvc)
|
|
cfg.net.AddServer(ctl.ctrlername(i), srv)
|
|
}
|
|
|
|
func (cfg *config) ctrlerclerk(ctl *ctrler) *shardctrler.Clerk {
|
|
// ClientEnds to talk to ctrler service.
|
|
ends := make([]*labrpc.ClientEnd, ctl.n)
|
|
for j := 0; j < ctl.n; j++ {
|
|
name := randstring(20)
|
|
ends[j] = cfg.net.MakeEnd(name)
|
|
cfg.net.Connect(name, ctl.ctrlername(j))
|
|
cfg.net.Enable(name, true)
|
|
}
|
|
|
|
return shardctrler.MakeClerk(ends)
|
|
}
|
|
|
|
// tell the shardctrler that a group is joining.
|
|
func (cfg *config) join(gi int) {
|
|
cfg.joinm([]int{gi}, cfg.ctl)
|
|
}
|
|
|
|
func (cfg *config) ctljoin(gi int, ctl *ctrler) {
|
|
cfg.joinm([]int{gi}, ctl)
|
|
}
|
|
|
|
func (cfg *config) joinm(gis []int, ctl *ctrler) {
|
|
m := make(map[int][]string, len(gis))
|
|
for _, g := range gis {
|
|
gid := cfg.groups[g].gid
|
|
servernames := make([]string, cfg.n)
|
|
for i := 0; i < cfg.n; i++ {
|
|
servernames[i] = cfg.servername(gid, i)
|
|
}
|
|
m[gid] = servernames
|
|
}
|
|
ctl.ck.Join(m)
|
|
}
|
|
|
|
// tell the shardctrler that a group is leaving.
|
|
func (cfg *config) leave(gi int) {
|
|
cfg.leavem([]int{gi})
|
|
}
|
|
|
|
func (cfg *config) leavem(gis []int) {
|
|
gids := make([]int, 0, len(gis))
|
|
for _, g := range gis {
|
|
gids = append(gids, cfg.groups[g].gid)
|
|
}
|
|
cfg.ctl.ck.Leave(gids)
|
|
}
|
|
|
|
func (cfg *config) StartCtrlerService() *ctrler {
|
|
ctl := &ctrler{}
|
|
ctl.n = 3
|
|
ctl.servers = make([]*shardctrler.ShardCtrler, ctl.n)
|
|
ctl.names = make([]string, ctl.n)
|
|
for i := 0; i < ctl.n; i++ {
|
|
ctl.names[i] = "ctlr-" + randstring(20)
|
|
}
|
|
for i := 0; i < ctl.n; i++ {
|
|
cfg.StartCtrlerServer(ctl, i)
|
|
}
|
|
ctl.ck = cfg.ctrlerclerk(ctl)
|
|
return ctl
|
|
}
|
|
|
|
var ncpu_once sync.Once
|
|
|
|
func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
|
|
ncpu_once.Do(func() {
|
|
if runtime.NumCPU() < 2 {
|
|
fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
|
|
}
|
|
rand.Seed(makeSeed())
|
|
})
|
|
runtime.GOMAXPROCS(4)
|
|
cfg := &config{}
|
|
cfg.t = t
|
|
cfg.maxraftstate = maxraftstate
|
|
cfg.net = labrpc.MakeNetwork()
|
|
cfg.start = time.Now()
|
|
|
|
// controller and its clerk
|
|
cfg.ctl = cfg.StartCtrlerService()
|
|
|
|
cfg.ngroups = 3
|
|
cfg.groups = make([]*group, cfg.ngroups)
|
|
cfg.n = n
|
|
for gi := 0; gi < cfg.ngroups; gi++ {
|
|
gg := &group{}
|
|
cfg.groups[gi] = gg
|
|
gg.gid = 100 + gi
|
|
gg.servers = make([]*ShardKV, cfg.n)
|
|
gg.saved = make([]*raft.Persister, cfg.n)
|
|
gg.endnames = make([][]string, cfg.n)
|
|
gg.mendnames = make([][]string, cfg.ctl.n)
|
|
for i := 0; i < cfg.n; i++ {
|
|
cfg.StartServer(gi, i)
|
|
}
|
|
}
|
|
|
|
cfg.clerks = make(map[*Clerk][]string)
|
|
cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
|
|
|
|
cfg.net.Reliable(!unreliable)
|
|
|
|
return cfg
|
|
}
|