|
@ -1,10 +1,9 @@ |
|
|
package nodes |
|
|
package nodes |
|
|
|
|
|
|
|
|
import ( |
|
|
import ( |
|
|
"math/rand" |
|
|
|
|
|
"sort" |
|
|
"sort" |
|
|
"strconv" |
|
|
"strconv" |
|
|
"time" |
|
|
|
|
|
|
|
|
"sync" |
|
|
|
|
|
|
|
|
"go.uber.org/zap" |
|
|
"go.uber.org/zap" |
|
|
) |
|
|
) |
|
@ -24,31 +23,31 @@ type AppendEntriesReply struct { |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// leader收到新内容要广播,以及心跳广播(同步自己的log)
|
|
|
// leader收到新内容要广播,以及心跳广播(同步自己的log)
|
|
|
func (node *Node) BroadCastKV(callMode CallMode) { |
|
|
|
|
|
|
|
|
func (node *Node) BroadCastKV() { |
|
|
|
|
|
log.Sugar().Infof("leader[%s]广播消息", node.SelfId) |
|
|
|
|
|
failCount := 0 |
|
|
|
|
|
// 这里增加一个锁,防止并发修改成功计数
|
|
|
|
|
|
var failMutex sync.Mutex |
|
|
// 遍历所有节点
|
|
|
// 遍历所有节点
|
|
|
for _, id := range node.nodes { |
|
|
|
|
|
go func(id string, kv CallMode) { |
|
|
|
|
|
node.sendKV(id, callMode) |
|
|
|
|
|
}(id, callMode) |
|
|
|
|
|
|
|
|
for _, id := range node.Nodes { |
|
|
|
|
|
go func(id string) { |
|
|
|
|
|
node.sendKV(id, &failCount, &failMutex) |
|
|
|
|
|
}(id) |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
func (node *Node) sendKV(peerId string, callMode CallMode) { |
|
|
|
|
|
|
|
|
|
|
|
switch callMode { |
|
|
|
|
|
case Fail: |
|
|
|
|
|
log.Info("模拟发送失败") |
|
|
|
|
|
// 这么写向所有的node发送都失败,也可以随机数确定是否失败
|
|
|
|
|
|
case Delay: |
|
|
|
|
|
log.Info("模拟发送延迟") |
|
|
|
|
|
// 随机延迟0-5ms
|
|
|
|
|
|
time.Sleep(time.Millisecond * time.Duration(rand.Intn(5))) |
|
|
|
|
|
default: |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
client, err := node.transport.DialHTTPWithTimeout("tcp", peerId) |
|
|
|
|
|
|
|
|
func (node *Node) sendKV(peerId string, failCount *int, failMutex *sync.Mutex) { |
|
|
|
|
|
client, err := node.Transport.DialHTTPWithTimeout("tcp", node.SelfId, peerId) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
log.Error(node.selfId + "dialling [" + peerId + "] fail: ", zap.Error(err)) |
|
|
|
|
|
|
|
|
log.Error("[" + node.SelfId + "]dialling [" + peerId + "] fail: ", zap.Error(err)) |
|
|
|
|
|
failMutex.Lock() |
|
|
|
|
|
*failCount++ |
|
|
|
|
|
if *failCount == len(node.Nodes) / 2 + 1 { // 无法联系超过半数:自己有问题,降级
|
|
|
|
|
|
node.LeaderId = "" |
|
|
|
|
|
node.State = Follower |
|
|
|
|
|
node.resetElectionTimer() |
|
|
|
|
|
} |
|
|
|
|
|
failMutex.Unlock() |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
@ -59,69 +58,78 @@ func (node *Node) sendKV(peerId string, callMode CallMode) { |
|
|
} |
|
|
} |
|
|
}(client) |
|
|
}(client) |
|
|
|
|
|
|
|
|
node.mu.Lock() |
|
|
|
|
|
defer node.mu.Unlock() |
|
|
|
|
|
|
|
|
node.Mu.Lock() |
|
|
|
|
|
defer node.Mu.Unlock() |
|
|
|
|
|
|
|
|
var appendReply AppendEntriesReply |
|
|
var appendReply AppendEntriesReply |
|
|
appendReply.Success = false |
|
|
appendReply.Success = false |
|
|
nextIndex := node.nextIndex[peerId] |
|
|
|
|
|
// log.Info("nextindex " + strconv.Itoa(nextIndex))
|
|
|
|
|
|
|
|
|
NextIndex := node.NextIndex[peerId] |
|
|
|
|
|
// log.Info("NextIndex " + strconv.Itoa(NextIndex))
|
|
|
for (!appendReply.Success) { |
|
|
for (!appendReply.Success) { |
|
|
if nextIndex < 0 { |
|
|
|
|
|
|
|
|
if NextIndex < 0 { |
|
|
log.Fatal("assert >= 0 here") |
|
|
log.Fatal("assert >= 0 here") |
|
|
} |
|
|
} |
|
|
sendEntries := node.log[nextIndex:] |
|
|
|
|
|
|
|
|
sendEntries := node.Log[NextIndex:] |
|
|
arg := AppendEntriesArg{ |
|
|
arg := AppendEntriesArg{ |
|
|
Term: node.currTerm, |
|
|
|
|
|
PrevLogIndex: nextIndex - 1, |
|
|
|
|
|
|
|
|
Term: node.CurrTerm, |
|
|
|
|
|
PrevLogIndex: NextIndex - 1, |
|
|
Entries: sendEntries, |
|
|
Entries: sendEntries, |
|
|
LeaderCommit: node.commitIndex, |
|
|
|
|
|
LeaderId: node.selfId, |
|
|
|
|
|
|
|
|
LeaderCommit: node.CommitIndex, |
|
|
|
|
|
LeaderId: node.SelfId, |
|
|
} |
|
|
} |
|
|
if arg.PrevLogIndex >= 0 { |
|
|
if arg.PrevLogIndex >= 0 { |
|
|
arg.PrevLogTerm = node.log[arg.PrevLogIndex].Term |
|
|
|
|
|
|
|
|
arg.PrevLogTerm = node.Log[arg.PrevLogIndex].Term |
|
|
} |
|
|
} |
|
|
callErr := node.transport.CallWithTimeout(client, "Node.AppendEntries", &arg, &appendReply) // RPC
|
|
|
|
|
|
|
|
|
callErr := node.Transport.CallWithTimeout(client, "Node.AppendEntries", &arg, &appendReply) // RPC
|
|
|
if callErr != nil { |
|
|
if callErr != nil { |
|
|
log.Error(node.selfId + "calling [" + peerId + "] fail: ", zap.Error(callErr)) |
|
|
|
|
|
|
|
|
log.Error("[" + node.SelfId + "]calling [" + peerId + "] fail: ", zap.Error(callErr)) |
|
|
|
|
|
failMutex.Lock() |
|
|
|
|
|
*failCount++ |
|
|
|
|
|
if *failCount == len(node.Nodes) / 2 + 1 { // 无法联系超过半数:自己有问题,降级
|
|
|
|
|
|
node.LeaderId = "" |
|
|
|
|
|
node.State = Follower |
|
|
|
|
|
node.resetElectionTimer() |
|
|
|
|
|
} |
|
|
|
|
|
failMutex.Unlock() |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if appendReply.Term != node.currTerm { |
|
|
|
|
|
log.Info("term=" + strconv.Itoa(node.currTerm) + "的Leader[" + node.selfId + "]收到更高的 term=" + strconv.Itoa(appendReply.Term) + ",转换为 Follower") |
|
|
|
|
|
node.currTerm = appendReply.Term |
|
|
|
|
|
node.state = Follower |
|
|
|
|
|
node.votedFor = "" |
|
|
|
|
|
node.storage.SetTermAndVote(node.currTerm, node.votedFor) |
|
|
|
|
|
|
|
|
if appendReply.Term != node.CurrTerm { |
|
|
|
|
|
log.Info("term=" + strconv.Itoa(node.CurrTerm) + "的Leader[" + node.SelfId + "]收到更高的 term=" + strconv.Itoa(appendReply.Term) + ",转换为 Follower") |
|
|
|
|
|
node.LeaderId = "" |
|
|
|
|
|
node.CurrTerm = appendReply.Term |
|
|
|
|
|
node.State = Follower |
|
|
|
|
|
node.VotedFor = "" |
|
|
|
|
|
node.Storage.SetTermAndVote(node.CurrTerm, node.VotedFor) |
|
|
node.resetElectionTimer() |
|
|
node.resetElectionTimer() |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
nextIndex-- // 失败往前传一格
|
|
|
|
|
|
|
|
|
NextIndex-- // 失败往前传一格
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// 不变成follower情况下
|
|
|
// 不变成follower情况下
|
|
|
node.nextIndex[peerId] = node.maxLogId + 1 |
|
|
|
|
|
node.matchIndex[peerId] = node.maxLogId |
|
|
|
|
|
|
|
|
node.NextIndex[peerId] = node.MaxLogId + 1 |
|
|
|
|
|
node.MatchIndex[peerId] = node.MaxLogId |
|
|
node.updateCommitIndex() |
|
|
node.updateCommitIndex() |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
func (node *Node) updateCommitIndex() { |
|
|
func (node *Node) updateCommitIndex() { |
|
|
totalNodes := len(node.nodes) |
|
|
|
|
|
|
|
|
totalNodes := len(node.Nodes) |
|
|
|
|
|
|
|
|
// 收集所有 matchIndex 并排序
|
|
|
|
|
|
matchIndexes := make([]int, 0, totalNodes) |
|
|
|
|
|
for _, index := range node.matchIndex { |
|
|
|
|
|
matchIndexes = append(matchIndexes, index) |
|
|
|
|
|
|
|
|
// 收集所有 MatchIndex 并排序
|
|
|
|
|
|
MatchIndexes := make([]int, 0, totalNodes) |
|
|
|
|
|
for _, index := range node.MatchIndex { |
|
|
|
|
|
MatchIndexes = append(MatchIndexes, index) |
|
|
} |
|
|
} |
|
|
sort.Ints(matchIndexes) // 排序
|
|
|
|
|
|
|
|
|
sort.Ints(MatchIndexes) // 排序
|
|
|
|
|
|
|
|
|
// 计算多数派 commitIndex
|
|
|
|
|
|
majorityIndex := matchIndexes[totalNodes/2] // 取 N/2 位置上的索引(多数派)
|
|
|
|
|
|
|
|
|
// 计算多数派 CommitIndex
|
|
|
|
|
|
majorityIndex := MatchIndexes[totalNodes/2] // 取 N/2 位置上的索引(多数派)
|
|
|
|
|
|
|
|
|
// 确保这个索引的日志条目属于当前 term,防止提交旧 term 的日志
|
|
|
// 确保这个索引的日志条目属于当前 term,防止提交旧 term 的日志
|
|
|
if majorityIndex > node.commitIndex && majorityIndex < len(node.log) && node.log[majorityIndex].Term == node.currTerm { |
|
|
|
|
|
node.commitIndex = majorityIndex |
|
|
|
|
|
log.Info("Leader[" + node.selfId + "]更新 commitIndex: " + strconv.Itoa(majorityIndex)) |
|
|
|
|
|
|
|
|
if majorityIndex > node.CommitIndex && majorityIndex < len(node.Log) && node.Log[majorityIndex].Term == node.CurrTerm { |
|
|
|
|
|
node.CommitIndex = majorityIndex |
|
|
|
|
|
log.Info("Leader[" + node.SelfId + "]更新 CommitIndex: " + strconv.Itoa(majorityIndex)) |
|
|
|
|
|
|
|
|
// 应用日志到状态机
|
|
|
// 应用日志到状态机
|
|
|
node.applyCommittedLogs() |
|
|
node.applyCommittedLogs() |
|
@ -130,13 +138,13 @@ func (node *Node) updateCommitIndex() { |
|
|
|
|
|
|
|
|
// 应用日志到状态机
|
|
|
// 应用日志到状态机
|
|
|
func (node *Node) applyCommittedLogs() { |
|
|
func (node *Node) applyCommittedLogs() { |
|
|
for node.lastApplied < node.commitIndex { |
|
|
|
|
|
node.lastApplied++ |
|
|
|
|
|
logEntry := node.log[node.lastApplied] |
|
|
|
|
|
log.Sugar().Infof("[%s]应用日志到状态机: " + logEntry.print(), node.selfId) |
|
|
|
|
|
err := node.db.Put([]byte(logEntry.LogE.Key), []byte(logEntry.LogE.Value), nil) |
|
|
|
|
|
|
|
|
for node.LastApplied < node.CommitIndex { |
|
|
|
|
|
node.LastApplied++ |
|
|
|
|
|
logEntry := node.Log[node.LastApplied] |
|
|
|
|
|
log.Sugar().Infof("[%s]应用日志到状态机: " + logEntry.print(), node.SelfId) |
|
|
|
|
|
err := node.Db.Put([]byte(logEntry.LogE.Key), []byte(logEntry.LogE.Value), nil) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
log.Error(node.selfId + "应用状态机失败: ", zap.Error(err)) |
|
|
|
|
|
|
|
|
log.Error(node.SelfId + "应用状态机失败: ", zap.Error(err)) |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
@ -147,65 +155,66 @@ func (node *Node) AppendEntries(arg *AppendEntriesArg, reply *AppendEntriesReply |
|
|
// defer func() {
|
|
|
// defer func() {
|
|
|
// log.Sugar().Infof("AppendEntries 处理时间: %v", time.Since(start))
|
|
|
// log.Sugar().Infof("AppendEntries 处理时间: %v", time.Since(start))
|
|
|
// }()
|
|
|
// }()
|
|
|
node.mu.Lock() |
|
|
|
|
|
defer node.mu.Unlock() |
|
|
|
|
|
|
|
|
log.Sugar().Infof("[%s]收到[%s]的AppendEntries", node.SelfId, arg.LeaderId) |
|
|
|
|
|
node.Mu.Lock() |
|
|
|
|
|
defer node.Mu.Unlock() |
|
|
|
|
|
|
|
|
// 如果 term 过期,拒绝接受日志
|
|
|
// 如果 term 过期,拒绝接受日志
|
|
|
if node.currTerm > arg.Term { |
|
|
|
|
|
*reply = AppendEntriesReply{node.currTerm, false} |
|
|
|
|
|
|
|
|
if node.CurrTerm > arg.Term { |
|
|
|
|
|
*reply = AppendEntriesReply{node.CurrTerm, false} |
|
|
return nil |
|
|
return nil |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
node.leaderId = arg.LeaderId // 记录Leader
|
|
|
|
|
|
|
|
|
node.LeaderId = arg.LeaderId // 记录Leader
|
|
|
|
|
|
|
|
|
// 如果term比自己高,或自己不是follower但收到相同term的心跳
|
|
|
// 如果term比自己高,或自己不是follower但收到相同term的心跳
|
|
|
if node.currTerm < arg.Term || node.state != Follower { |
|
|
|
|
|
log.Sugar().Infof("[%s]发现更高 term(%s)", node.selfId, strconv.Itoa(arg.Term)) |
|
|
|
|
|
node.currTerm = arg.Term |
|
|
|
|
|
node.state = Follower |
|
|
|
|
|
node.votedFor = "" |
|
|
|
|
|
// node.storage.SetTermAndVote(node.currTerm, node.votedFor)
|
|
|
|
|
|
|
|
|
if node.CurrTerm < arg.Term || node.State != Follower { |
|
|
|
|
|
log.Sugar().Infof("[%s]发现更高 term(%s)", node.SelfId, strconv.Itoa(arg.Term)) |
|
|
|
|
|
node.CurrTerm = arg.Term |
|
|
|
|
|
node.State = Follower |
|
|
|
|
|
node.VotedFor = "" |
|
|
|
|
|
// node.storage.SetTermAndVote(node.CurrTerm, node.VotedFor)
|
|
|
} |
|
|
} |
|
|
node.storage.SetTermAndVote(node.currTerm, node.votedFor) |
|
|
|
|
|
|
|
|
node.Storage.SetTermAndVote(node.CurrTerm, node.VotedFor) |
|
|
|
|
|
|
|
|
// 检查 prevLogIndex 是否有效
|
|
|
// 检查 prevLogIndex 是否有效
|
|
|
if arg.PrevLogIndex >= len(node.log) || (arg.PrevLogIndex >= 0 && node.log[arg.PrevLogIndex].Term != arg.PrevLogTerm) { |
|
|
|
|
|
*reply = AppendEntriesReply{node.currTerm, false} |
|
|
|
|
|
|
|
|
if arg.PrevLogIndex >= len(node.Log) || (arg.PrevLogIndex >= 0 && node.Log[arg.PrevLogIndex].Term != arg.PrevLogTerm) { |
|
|
|
|
|
*reply = AppendEntriesReply{node.CurrTerm, false} |
|
|
return nil |
|
|
return nil |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// 处理日志冲突(如果存在不同 term,则截断日志)
|
|
|
// 处理日志冲突(如果存在不同 term,则截断日志)
|
|
|
idx := arg.PrevLogIndex + 1 |
|
|
idx := arg.PrevLogIndex + 1 |
|
|
for i := idx; i < len(node.log) && i-idx < len(arg.Entries); i++ { |
|
|
|
|
|
if node.log[i].Term != arg.Entries[i-idx].Term { |
|
|
|
|
|
node.log = node.log[:idx] |
|
|
|
|
|
|
|
|
for i := idx; i < len(node.Log) && i-idx < len(arg.Entries); i++ { |
|
|
|
|
|
if node.Log[i].Term != arg.Entries[i-idx].Term { |
|
|
|
|
|
node.Log = node.Log[:idx] |
|
|
break |
|
|
break |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
// log.Info(strconv.Itoa(idx) + strconv.Itoa(len(node.log)))
|
|
|
|
|
|
|
|
|
// log.Info(strconv.Itoa(idx) + strconv.Itoa(len(node.Log)))
|
|
|
|
|
|
|
|
|
// 追加新的日志条目
|
|
|
// 追加新的日志条目
|
|
|
for _, raftLogEntry := range arg.Entries { |
|
|
for _, raftLogEntry := range arg.Entries { |
|
|
log.Sugar().Infof("[%s]写入:" + raftLogEntry.print(), node.selfId) |
|
|
|
|
|
if idx < len(node.log) { |
|
|
|
|
|
node.log[idx] = raftLogEntry |
|
|
|
|
|
|
|
|
log.Sugar().Infof("[%s]写入:" + raftLogEntry.print(), node.SelfId) |
|
|
|
|
|
if idx < len(node.Log) { |
|
|
|
|
|
node.Log[idx] = raftLogEntry |
|
|
} else { |
|
|
} else { |
|
|
node.log = append(node.log, raftLogEntry) |
|
|
|
|
|
|
|
|
node.Log = append(node.Log, raftLogEntry) |
|
|
} |
|
|
} |
|
|
idx++ |
|
|
idx++ |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// 暴力持久化
|
|
|
// 暴力持久化
|
|
|
node.storage.WriteLog(node.log) |
|
|
|
|
|
|
|
|
node.Storage.WriteLog(node.Log) |
|
|
|
|
|
|
|
|
// 更新 maxLogId
|
|
|
|
|
|
node.maxLogId = len(node.log) - 1 |
|
|
|
|
|
|
|
|
// 更新 MaxLogId
|
|
|
|
|
|
node.MaxLogId = len(node.Log) - 1 |
|
|
|
|
|
|
|
|
// 更新 commitIndex
|
|
|
|
|
|
if arg.LeaderCommit < node.maxLogId { |
|
|
|
|
|
node.commitIndex = arg.LeaderCommit |
|
|
|
|
|
|
|
|
// 更新 CommitIndex
|
|
|
|
|
|
if arg.LeaderCommit < node.MaxLogId { |
|
|
|
|
|
node.CommitIndex = arg.LeaderCommit |
|
|
} else { |
|
|
} else { |
|
|
node.commitIndex = node.maxLogId |
|
|
|
|
|
|
|
|
node.CommitIndex = node.MaxLogId |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// 提交已提交的日志
|
|
|
// 提交已提交的日志
|
|
@ -213,6 +222,6 @@ func (node *Node) AppendEntries(arg *AppendEntriesArg, reply *AppendEntriesReply |
|
|
|
|
|
|
|
|
// 在成功接受日志或心跳后,重置选举超时
|
|
|
// 在成功接受日志或心跳后,重置选举超时
|
|
|
node.resetElectionTimer() |
|
|
node.resetElectionTimer() |
|
|
*reply = AppendEntriesReply{node.currTerm, true} |
|
|
|
|
|
|
|
|
*reply = AppendEntriesReply{node.CurrTerm, true} |
|
|
return nil |
|
|
return nil |
|
|
} |
|
|
} |