Skip to content

Commit fdf3c53

Browse files
authored
fix(dot/network): fix receiving notifications messages from substrate peers (#1517)
1 parent 610366b commit fdf3c53

File tree

11 files changed

+74
-98
lines changed

11 files changed

+74
-98
lines changed

dot/network/block_announce.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ func (bm *BlockAnnounceMessage) Type() byte {
5656

5757
// string formats a BlockAnnounceMessage as a string
5858
func (bm *BlockAnnounceMessage) String() string {
59-
return fmt.Sprintf("BlockAnnounceMessage ParentHash=%s Number=%d StateRoot=%sx ExtrinsicsRoot=%s Digest=%v",
59+
return fmt.Sprintf("BlockAnnounceMessage ParentHash=%s Number=%d StateRoot=%s ExtrinsicsRoot=%s Digest=%v",
6060
bm.ParentHash,
6161
bm.Number,
6262
bm.StateRoot,

dot/network/connmgr.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"math/rand"
2222
"sync"
23+
"time"
2324

2425
"github.com/libp2p/go-libp2p-core/connmgr"
2526
"github.com/libp2p/go-libp2p-core/network"
@@ -29,6 +30,10 @@ import (
2930
ma "github.com/multiformats/go-multiaddr"
3031
)
3132

33+
var (
34+
maxRetries = 12
35+
)
36+
3237
// ConnManager implements connmgr.ConnManager
3338
type ConnManager struct {
3439
sync.Mutex
@@ -191,10 +196,18 @@ func (cm *ConnManager) Disconnected(n network.Network, c network.Conn) {
191196
Addrs: addrs,
192197
}
193198

194-
err := cm.host.connect(info)
195-
if err != nil {
196-
logger.Warn("failed to reconnect to persistent peer", "peer", c.RemotePeer(), "error", err)
197-
}
199+
go func() {
200+
for i := 0; i < maxRetries; i++ {
201+
err := cm.host.connect(info)
202+
if err != nil {
203+
logger.Warn("failed to reconnect to persistent peer", "peer", c.RemotePeer(), "error", err)
204+
time.Sleep(time.Minute)
205+
continue
206+
}
207+
208+
return
209+
}
210+
}()
198211

199212
// TODO: if number of peers falls below the min desired peer count, we should try to connect to previously discovered peers
200213
}
@@ -207,7 +220,6 @@ func (cm *ConnManager) registerDisconnectHandler(cb func(peer.ID)) {
207220
func (cm *ConnManager) OpenedStream(n network.Network, s network.Stream) {
208221
logger.Trace(
209222
"Opened stream",
210-
"host", s.Conn().LocalPeer(),
211223
"peer", s.Conn().RemotePeer(),
212224
"protocol", s.Protocol(),
213225
)
@@ -221,7 +233,6 @@ func (cm *ConnManager) registerCloseHandler(protocolID protocol.ID, cb func(id p
221233
func (cm *ConnManager) ClosedStream(n network.Network, s network.Stream) {
222234
logger.Trace(
223235
"Closed stream",
224-
"host", s.Conn().LocalPeer(),
225236
"peer", s.Conn().RemotePeer(),
226237
"protocol", s.Protocol(),
227238
)

dot/network/connmgr_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func TestPersistentPeers(t *testing.T) {
112112
require.NotEqual(t, 0, len(conns))
113113

114114
// if A disconnects from B, B should reconnect
115-
nodeA.host.h.Network().ClosePeer(nodeA.host.id())
115+
nodeA.host.h.Network().ClosePeer(nodeB.host.id())
116116
time.Sleep(time.Millisecond * 500)
117117
conns = nodeB.host.h.Network().ConnsToPeer(nodeA.host.id())
118118
require.NotEqual(t, 0, len(conns))

dot/network/message.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ func (bm *BlockRequestMessage) Decode(in []byte) error {
148148
case *pb.BlockRequest_Hash:
149149
startingBlock, err = variadic.NewUint64OrHash(common.BytesToHash(from.Hash))
150150
case *pb.BlockRequest_Number:
151+
// TODO: we are receiving block requests w/ 4-byte From field; did the format change?
152+
if len(from.Number) != 8 {
153+
return errors.New("invalid BlockResponseMessage.From; uint64 is not 8 bytes")
154+
}
151155
startingBlock, err = variadic.NewUint64OrHash(binary.LittleEndian.Uint64(from.Number))
152156
default:
153157
err = errors.New("invalid StartingBlock")

dot/network/notifications.go

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ func (s *Service) createNotificationsMessageHandler(info *notificationsProtocol,
131131
err := handshakeValidator(peer, hs)
132132
if err != nil {
133133
logger.Trace("failed to validate handshake", "protocol", info.protocolID, "peer", peer, "error", err)
134-
_ = stream.Conn().Close()
135134
return errCannotValidateHandshake
136135
}
137136

@@ -141,17 +140,17 @@ func (s *Service) createNotificationsMessageHandler(info *notificationsProtocol,
141140
// once validated, send back a handshake
142141
resp, err := info.getHandshake()
143142
if err != nil {
144-
logger.Debug("failed to get handshake", "protocol", info.protocolID, "error", err)
143+
logger.Warn("failed to get handshake", "protocol", info.protocolID, "error", err)
145144
return err
146145
}
147146

148-
err = s.host.send(peer, info.protocolID, resp)
147+
err = s.host.writeToStream(stream, resp)
149148
if err != nil {
150149
logger.Trace("failed to send handshake", "protocol", info.protocolID, "peer", peer, "error", err)
151-
_ = stream.Conn().Close()
152150
return err
153151
}
154152
logger.Trace("receiver: sent handshake", "protocol", info.protocolID, "peer", peer)
153+
return nil
155154
}
156155

157156
// if we are the initiator and haven't received the handshake already, validate it
@@ -161,7 +160,6 @@ func (s *Service) createNotificationsMessageHandler(info *notificationsProtocol,
161160
if err != nil {
162161
logger.Trace("failed to validate handshake", "protocol", info.protocolID, "peer", peer, "error", err)
163162
hsData.validated = false
164-
_ = stream.Conn().Close()
165163
return errCannotValidateHandshake
166164
}
167165

@@ -175,7 +173,7 @@ func (s *Service) createNotificationsMessageHandler(info *notificationsProtocol,
175173
// if we are the initiator, send the message
176174
if hsData, has := info.getHandshakeData(peer); has && hsData.validated && hsData.received && hsData.outboundMsg != nil {
177175
logger.Trace("sender: sending message", "protocol", info.protocolID)
178-
err := s.host.send(peer, info.protocolID, hsData.outboundMsg)
176+
err := s.host.writeToStream(stream, hsData.outboundMsg)
179177
if err != nil {
180178
logger.Debug("failed to send message", "protocol", info.protocolID, "peer", peer, "error", err)
181179
return err
@@ -197,11 +195,14 @@ func (s *Service) createNotificationsMessageHandler(info *notificationsProtocol,
197195
}
198196

199197
// TODO: improve this by keeping track of who you've received/sent messages from
200-
if !s.noGossip {
201-
seen := s.gossip.hasSeen(msg)
202-
if !seen {
203-
s.broadcastExcluding(info, peer, msg)
204-
}
198+
if s.noGossip {
199+
return nil
200+
}
201+
202+
seen := s.gossip.hasSeen(msg)
203+
if !seen {
204+
// TODO: update this to write to stream w/ handshake established
205+
s.broadcastExcluding(info, peer, msg)
205206
}
206207

207208
return nil
@@ -261,7 +262,7 @@ func (s *Service) broadcastExcluding(info *notificationsProtocol, excluding peer
261262
}
262263

263264
if err != nil {
264-
logger.Error("failed to send message to peer", "peer", peer, "error", err)
265+
logger.Debug("failed to send message to peer", "peer", peer, "error", err)
265266
}
266267
}
267268
}

dot/network/service.go

Lines changed: 10 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ func NewService(cfg *Config) (*Service, error) {
145145
}
146146

147147
network.syncQueue = newSyncQueue(network)
148-
148+
network.noGossip = true // TODO: remove once duplicate message sending is merged
149149
return network, err
150150
}
151151

@@ -281,40 +281,8 @@ func (s *Service) logPeerCount() {
281281

282282
func (s *Service) handleConn(conn libp2pnetwork.Conn) {
283283
// give new peers a slight weight
284+
// TODO: do this once handshake is received
284285
s.syncQueue.updatePeerScore(conn.RemotePeer(), 1)
285-
286-
s.notificationsMu.Lock()
287-
defer s.notificationsMu.Unlock()
288-
289-
info, has := s.notificationsProtocols[BlockAnnounceMsgType]
290-
if !has {
291-
// this shouldn't happen
292-
logger.Warn("block announce protocol is not yet registered!")
293-
return
294-
}
295-
296-
// open block announce substream
297-
hs, err := info.getHandshake()
298-
if err != nil {
299-
logger.Warn("failed to get handshake", "protocol", blockAnnounceID, "error", err)
300-
return
301-
}
302-
303-
info.mapMu.RLock()
304-
defer info.mapMu.RUnlock()
305-
306-
peer := conn.RemotePeer()
307-
if hsData, has := info.getHandshakeData(peer); !has || !hsData.received { //nolint
308-
info.handshakeData.Store(peer, &handshakeData{
309-
validated: false,
310-
})
311-
312-
logger.Trace("sending handshake", "protocol", info.protocolID, "peer", peer, "message", hs)
313-
err = s.host.send(peer, info.protocolID, hs)
314-
if err != nil {
315-
logger.Trace("failed to send block announce handshake to peer", "peer", peer, "error", err)
316-
}
317-
}
318286
}
319287

320288
func (s *Service) beginDiscovery() error {
@@ -528,7 +496,7 @@ func (s *Service) readStream(stream libp2pnetwork.Stream, peer peer.ID, decoder
528496
if err == io.EOF {
529497
continue
530498
} else if err != nil {
531-
logger.Trace("failed to read from stream", "protocol", stream.Protocol(), "error", err)
499+
logger.Trace("failed to read from stream", "peer", stream.Conn().RemotePeer(), "protocol", stream.Protocol(), "error", err)
532500
_ = stream.Close()
533501
return
534502
}
@@ -541,21 +509,18 @@ func (s *Service) readStream(stream libp2pnetwork.Stream, peer peer.ID, decoder
541509
}
542510

543511
logger.Trace(
544-
"Received message from peer",
512+
"received message from peer",
545513
"host", s.host.id(),
546514
"peer", peer,
547515
"msg", msg.String(),
548516
)
549517

550-
go func() {
551-
// handle message based on peer status and message type
552-
err = handler(stream, msg)
553-
if err != nil {
554-
logger.Trace("Failed to handle message from stream", "message", msg, "error", err)
555-
_ = stream.Close()
556-
return
557-
}
558-
}()
518+
err = handler(stream, msg)
519+
if err != nil {
520+
logger.Debug("failed to handle message from stream", "message", msg, "error", err)
521+
_ = stream.Close()
522+
return
523+
}
559524
}
560525
}
561526

dot/network/service_test.go

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -438,16 +438,4 @@ func TestHandleConn(t *testing.T) {
438438
aScore, ok := nodeB.syncQueue.peerScore.Load(nodeA.host.id())
439439
require.True(t, ok)
440440
require.Equal(t, 1, aScore)
441-
442-
infoA := nodeA.notificationsProtocols[BlockAnnounceMsgType]
443-
hsDataB, has := infoA.getHandshakeData(nodeB.host.id())
444-
require.True(t, has)
445-
require.True(t, hsDataB.received)
446-
require.True(t, hsDataB.validated)
447-
448-
infoB := nodeB.notificationsProtocols[BlockAnnounceMsgType]
449-
hsDataA, has := infoB.getHandshakeData(nodeA.host.id())
450-
require.True(t, has)
451-
require.True(t, hsDataA.received)
452-
require.True(t, hsDataA.validated)
453441
}

dot/network/sync.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package network
1818

1919
import (
2020
"context"
21+
"errors"
2122
"fmt"
2223
"reflect"
2324
"sort"
@@ -29,6 +30,7 @@ import (
2930
"github.com/ChainSafe/gossamer/lib/common/optional"
3031
"github.com/ChainSafe/gossamer/lib/common/variadic"
3132

33+
"github.com/ChainSafe/chaindb"
3234
libp2pnetwork "github.com/libp2p/go-libp2p-core/network"
3335
"github.com/libp2p/go-libp2p-core/peer"
3436
)
@@ -182,7 +184,7 @@ func (q *syncQueue) syncAtHead() {
182184
for {
183185
select {
184186
// sleep for average block time TODO: make this configurable from slot duration
185-
case <-time.After(q.slotDuration):
187+
case <-time.After(q.slotDuration * 2):
186188
case <-q.ctx.Done():
187189
return
188190
}
@@ -689,14 +691,14 @@ func (q *syncQueue) handleBlockJustification(data []*types.BlockData) {
689691
}
690692

691693
func (q *syncQueue) handleBlockData(data []*types.BlockData) {
692-
bestNum, err := q.s.blockState.BestBlockNumber()
694+
finalized, err := q.s.blockState.GetFinalizedHeader(0, 0)
693695
if err != nil {
694696
panic(err) // TODO: don't panic but try again. seems blockState needs better concurrency handling
695697
}
696698

697699
end := data[len(data)-1].Number().Int64()
698-
if end <= bestNum.Int64() {
699-
logger.Debug("ignoring block data that is below our head", "got", end, "head", bestNum.Int64())
700+
if end <= finalized.Number.Int64() {
701+
logger.Debug("ignoring block data that is below our head", "got", end, "head", finalized.Number.Int64())
700702
q.pushRequest(uint64(end+1), blockRequestBufferSize, "")
701703
return
702704
}
@@ -736,7 +738,7 @@ func (q *syncQueue) handleBlockData(data []*types.BlockData) {
736738
func (q *syncQueue) handleBlockDataFailure(idx int, err error, data []*types.BlockData) {
737739
logger.Warn("failed to handle block data", "failed on block", q.currStart+int64(idx), "error", err)
738740

739-
if err.Error() == "failed to get parent hash: Key not found" { // TODO: unwrap err
741+
if errors.Is(err, chaindb.ErrKeyNotFound) {
740742
header, err := types.NewHeaderFromOptional(data[idx].Header)
741743
if err != nil {
742744
logger.Debug("failed to get header from BlockData", "idx", idx, "error", err)
@@ -787,7 +789,6 @@ func (q *syncQueue) handleBlockAnnounce(msg *BlockAnnounceMessage, from peer.ID)
787789
return
788790
}
789791

790-
logger.Debug("received BlockAnnounce!", "number", msg.Number, "hash", header.Hash(), "from", from)
791792
has, _ := q.s.blockState.HasBlockBody(header.Hash())
792793
if has {
793794
return
@@ -797,13 +798,16 @@ func (q *syncQueue) handleBlockAnnounce(msg *BlockAnnounceMessage, from peer.ID)
797798
return
798799
}
799800

801+
q.goal = header.Number.Int64()
802+
800803
bestNum, err := q.s.blockState.BestBlockNumber()
801804
if err != nil {
802805
logger.Error("failed to get best block number", "error", err)
803806
return
804807
}
805808

806-
q.goal = header.Number.Int64()
809+
// TODO: if we're at the head, this should request by hash instead of number, since there will
810+
// certainly be blocks with the same number.
807811
q.pushRequest(uint64(bestNum.Int64()+1), blockRequestBufferSize, from)
808812
}
809813

dot/network/sync_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"github.com/ChainSafe/gossamer/lib/common/optional"
2929
"github.com/ChainSafe/gossamer/lib/utils"
3030

31+
"github.com/ChainSafe/chaindb"
3132
"github.com/libp2p/go-libp2p-core/peer"
3233
"github.com/stretchr/testify/require"
3334
)
@@ -425,9 +426,10 @@ func TestSyncQueue_SyncAtHead(t *testing.T) {
425426
q.stop()
426427
time.Sleep(time.Second)
427428
q.ctx = context.Background()
429+
q.slotDuration = time.Millisecond * 100
428430

429431
go q.syncAtHead()
430-
time.Sleep(time.Millisecond * 6100)
432+
time.Sleep(q.slotDuration * 3)
431433
select {
432434
case req := <-q.requestCh:
433435
require.Equal(t, uint64(2), req.req.StartingBlock.Uint64())
@@ -500,7 +502,7 @@ func TestSyncQueue_handleBlockDataFailure_MissingParent(t *testing.T) {
500502
q.ctx = context.Background()
501503

502504
data := testBlockResponseMessage().BlockData
503-
q.handleBlockDataFailure(0, fmt.Errorf("failed to get parent hash: Key not found"), data)
505+
q.handleBlockDataFailure(0, fmt.Errorf("some error: %w", chaindb.ErrKeyNotFound), data)
504506
select {
505507
case req := <-q.requestCh:
506508
require.True(t, req.req.StartingBlock.IsHash())

dot/network/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ func readStream(stream libp2pnetwork.Stream, buf []byte) (int, error) {
189189
}
190190

191191
if length == 0 {
192-
return 0, err // TODO: return bytes read from readLEB128ToUint64
192+
return 0, nil // msg length of 0 is allowed, for example transactions handshake
193193
}
194194

195195
// TODO: check if length > len(buf), if so probably log.Crit

0 commit comments

Comments
 (0)