|
|
@ -1,7 +1,7 @@ |
|
|
|
package nodes |
|
|
|
|
|
|
|
import ( |
|
|
|
// "context"
|
|
|
|
"errors" |
|
|
|
"fmt" |
|
|
|
"net" |
|
|
|
"net/http" |
|
|
@ -15,24 +15,18 @@ import ( |
|
|
|
|
|
|
|
var log, _ = logprovider.CreateDefaultZapLogger(zap.InfoLevel) |
|
|
|
|
|
|
|
func newNode(address string) *Public_node_info { |
|
|
|
return &Public_node_info{ |
|
|
|
connect: false, |
|
|
|
address: address, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func Init(selfId string, nodeAddr map[string]string, db *leveldb.DB, rstorage *RaftStorage, isRestart bool) *Node { |
|
|
|
ns := make(map[string]*Public_node_info) |
|
|
|
for id, addr := range nodeAddr { |
|
|
|
ns[id] = newNode(addr) |
|
|
|
// 运行在进程上的初始化 + rpc注册
|
|
|
|
func InitRPCNode(selfId string, port string, nodeAddr map[string]string, db *leveldb.DB, rstorage *RaftStorage, isRestart bool) *Node { |
|
|
|
var nodeIds []string |
|
|
|
for id := range nodeAddr { |
|
|
|
nodeIds = append(nodeIds, id) |
|
|
|
} |
|
|
|
|
|
|
|
// 创建节点
|
|
|
|
node := &Node{ |
|
|
|
selfId: selfId, |
|
|
|
leaderId: "", |
|
|
|
nodes: ns, |
|
|
|
nodes: nodeIds, |
|
|
|
maxLogId: -1, // 后来发现论文中是从1开始的(初始0),但不想改了
|
|
|
|
currTerm: 1, |
|
|
|
log: make([]RaftLogEntry, 0), |
|
|
@ -42,6 +36,7 @@ func Init(selfId string, nodeAddr map[string]string, db *leveldb.DB, rstorage *R |
|
|
|
matchIndex: make(map[string]int), |
|
|
|
db: db, |
|
|
|
storage: rstorage, |
|
|
|
transport: &HTTPTransport{NodeMap: nodeAddr}, |
|
|
|
} |
|
|
|
node.initLeaderState() |
|
|
|
if isRestart { |
|
|
@ -51,40 +46,13 @@ func Init(selfId string, nodeAddr map[string]string, db *leveldb.DB, rstorage *R |
|
|
|
log.Sugar().Infof("[%s]从重启中恢复log数量: %d", selfId, len(node.log)) |
|
|
|
} |
|
|
|
|
|
|
|
return node |
|
|
|
} |
|
|
|
|
|
|
|
func (n *Node) initLeaderState() { |
|
|
|
for peerId := range n.nodes { |
|
|
|
n.nextIndex[peerId] = len(n.log) // 发送日志的下一个索引
|
|
|
|
n.matchIndex[peerId] = 0 // 复制日志的最新匹配索引
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func Start(node *Node) { |
|
|
|
node.state = Follower // 所有节点以 Follower 状态启动
|
|
|
|
node.resetElectionTimer() // 启动选举超时定时器
|
|
|
|
log.Sugar().Infof("[%s]开始监听" + port + "端口", selfId) |
|
|
|
node.ListenPort(port) |
|
|
|
|
|
|
|
go func() { |
|
|
|
for { |
|
|
|
switch node.state { |
|
|
|
case Follower: |
|
|
|
// 监听心跳超时
|
|
|
|
fmt.Printf("[%s] is a follower, 监听中...\n", node.selfId) |
|
|
|
|
|
|
|
case Leader: |
|
|
|
// 发送心跳
|
|
|
|
fmt.Printf("[%s] is the leader, 发送心跳...\n", node.selfId) |
|
|
|
node.resetElectionTimer() // leader不主动触发选举
|
|
|
|
node.BroadCastKV(Normal) |
|
|
|
} |
|
|
|
time.Sleep(50 * time.Millisecond) |
|
|
|
} |
|
|
|
}() |
|
|
|
return node |
|
|
|
} |
|
|
|
|
|
|
|
// 初始时注册rpc方法
|
|
|
|
func (node *Node) Rpc(port string) { |
|
|
|
func (node *Node) ListenPort(port string) { |
|
|
|
|
|
|
|
err := rpc.Register(node) |
|
|
|
if err != nil { |
|
|
@ -104,37 +72,140 @@ func (node *Node) Rpc(port string) { |
|
|
|
}() |
|
|
|
} |
|
|
|
|
|
|
|
// 封装有超时的dial
|
|
|
|
func DialHTTPWithTimeout(network, address string) (*rpc.Client, error) { |
|
|
|
done := make(chan struct{}) |
|
|
|
var client *rpc.Client |
|
|
|
var err error |
|
|
|
// 线程模拟的初始化
|
|
|
|
func InitThreadNode(selfId string, peerIds []string, db *leveldb.DB, rstorage *RaftStorage, isRestart bool, threadTransport *ThreadTransport) (*Node, chan struct{}) { |
|
|
|
rpcChan := make(chan RPCRequest, 100) // 要监听的chan
|
|
|
|
// 创建节点
|
|
|
|
node := &Node{ |
|
|
|
selfId: selfId, |
|
|
|
leaderId: "", |
|
|
|
nodes: peerIds, |
|
|
|
maxLogId: -1, // 后来发现论文中是从1开始的(初始0),但不想改了
|
|
|
|
currTerm: 1, |
|
|
|
log: make([]RaftLogEntry, 0), |
|
|
|
commitIndex: -1, |
|
|
|
lastApplied: -1, |
|
|
|
nextIndex: make(map[string]int), |
|
|
|
matchIndex: make(map[string]int), |
|
|
|
db: db, |
|
|
|
storage: rstorage, |
|
|
|
transport: threadTransport, |
|
|
|
} |
|
|
|
node.initLeaderState() |
|
|
|
if isRestart { |
|
|
|
node.currTerm = rstorage.GetCurrentTerm() |
|
|
|
node.votedFor = rstorage.GetVotedFor() |
|
|
|
node.log = rstorage.GetLogEntries() |
|
|
|
log.Sugar().Infof("[%s]从重启中恢复log数量: %d", selfId, len(node.log)) |
|
|
|
} |
|
|
|
|
|
|
|
go func() { |
|
|
|
client, err = rpc.DialHTTP(network, address) |
|
|
|
close(done) |
|
|
|
}() |
|
|
|
threadTransport.RegisterNodeChan(selfId, rpcChan) |
|
|
|
quitChan := make(chan struct{}, 1) |
|
|
|
go node.listenForChan(rpcChan, quitChan) |
|
|
|
|
|
|
|
return node, quitChan |
|
|
|
} |
|
|
|
|
|
|
|
select { |
|
|
|
case <-done: |
|
|
|
return client, err |
|
|
|
case <-time.After(50 * time.Millisecond): |
|
|
|
return nil, fmt.Errorf("dial timeout: %s", address) |
|
|
|
func (node *Node) listenForChan(rpcChan chan RPCRequest, quitChan chan struct{}) { |
|
|
|
defer node.db.Close() |
|
|
|
|
|
|
|
for { |
|
|
|
select { |
|
|
|
case req := <-rpcChan: |
|
|
|
switch req.ServiceMethod { |
|
|
|
case "Node.AppendEntries": |
|
|
|
arg, ok := req.Args.(*AppendEntriesArg) |
|
|
|
resp, ok2 := req.Reply.(*AppendEntriesReply) |
|
|
|
if !ok || !ok2 { |
|
|
|
req.Done <- errors.New("type assertion failed for AppendEntries") |
|
|
|
} else { |
|
|
|
req.Done <- node.AppendEntries(arg, resp) |
|
|
|
} |
|
|
|
|
|
|
|
case "Node.RequestVote": |
|
|
|
arg, ok := req.Args.(*RequestVoteArgs) |
|
|
|
resp, ok2 := req.Reply.(*RequestVoteReply) |
|
|
|
if !ok || !ok2 { |
|
|
|
req.Done <- errors.New("type assertion failed for RequestVote") |
|
|
|
} else { |
|
|
|
req.Done <- node.RequestVote(arg, resp) |
|
|
|
} |
|
|
|
|
|
|
|
case "Node.WriteKV": |
|
|
|
arg, ok := req.Args.(*LogEntryCall) |
|
|
|
resp, ok2 := req.Reply.(*ServerReply) |
|
|
|
if !ok || !ok2 { |
|
|
|
req.Done <- errors.New("type assertion failed for WriteKV") |
|
|
|
} else { |
|
|
|
req.Done <- node.WriteKV(arg, resp) |
|
|
|
} |
|
|
|
|
|
|
|
case "Node.ReadKey": |
|
|
|
arg, ok := req.Args.(*string) |
|
|
|
resp, ok2 := req.Reply.(*ServerReply) |
|
|
|
if !ok || !ok2 { |
|
|
|
req.Done <- errors.New("type assertion failed for ReadKey") |
|
|
|
} else { |
|
|
|
req.Done <- node.ReadKey(arg, resp) |
|
|
|
} |
|
|
|
|
|
|
|
case "Node.FindLeader": |
|
|
|
arg, ok := req.Args.(struct{}) |
|
|
|
resp, ok2 := req.Reply.(*FindLeaderReply) |
|
|
|
if !ok || !ok2 { |
|
|
|
req.Done <- errors.New("type assertion failed for FindLeader") |
|
|
|
} else { |
|
|
|
req.Done <- node.FindLeader(arg, resp) |
|
|
|
} |
|
|
|
|
|
|
|
default: |
|
|
|
req.Done <- fmt.Errorf("未知方法: %s", req.ServiceMethod) |
|
|
|
} |
|
|
|
case <-quitChan: |
|
|
|
log.Sugar().Infof("[%s] 监听线程收到退出信号", node.selfId) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// 共同部分和启动
|
|
|
|
func (n *Node) initLeaderState() { |
|
|
|
for _, peerId := range n.nodes { |
|
|
|
n.nextIndex[peerId] = len(n.log) // 发送日志的下一个索引
|
|
|
|
n.matchIndex[peerId] = 0 // 复制日志的最新匹配索引
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// 封装有超时的call
|
|
|
|
func CallWithTimeout[T1 any, T2 any](client *rpc.Client, serviceMethod string, args *T1, reply *T2) error { |
|
|
|
done := make(chan error, 1) |
|
|
|
func Start(node *Node, quitChan chan struct{}) { |
|
|
|
node.state = Follower // 所有节点以 Follower 状态启动
|
|
|
|
node.resetElectionTimer() // 启动选举超时定时器
|
|
|
|
|
|
|
|
go func() { |
|
|
|
done <- client.Call(serviceMethod, args, reply) |
|
|
|
}() |
|
|
|
ticker := time.NewTicker(50 * time.Millisecond) |
|
|
|
defer ticker.Stop() |
|
|
|
|
|
|
|
select { |
|
|
|
case err := <-done: |
|
|
|
return err |
|
|
|
case <-time.After(50 * time.Millisecond): |
|
|
|
return fmt.Errorf("call timeout: %s", serviceMethod) |
|
|
|
} |
|
|
|
for { |
|
|
|
select { |
|
|
|
case <-quitChan: |
|
|
|
fmt.Printf("[%s] Raft start 退出...\n", node.selfId) |
|
|
|
return // 退出 goroutine
|
|
|
|
|
|
|
|
case <-ticker.C: |
|
|
|
switch node.state { |
|
|
|
case Follower: |
|
|
|
// 监听心跳超时
|
|
|
|
fmt.Printf("[%s] is a follower, 监听中...\n", node.selfId) |
|
|
|
|
|
|
|
case Leader: |
|
|
|
// 发送心跳
|
|
|
|
fmt.Printf("[%s] is the leader, 发送心跳...\n", node.selfId) |
|
|
|
node.resetElectionTimer() // leader 不主动触发选举
|
|
|
|
node.BroadCastKV(Normal) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
}() |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|