Compare commits

..

113 Commits

Author SHA1 Message Date
Peter Zhang
718855bca3 increase db_max_num_sectors to be about 1000GB 2024-11-15 12:15:20 +08:00
Bo QIU
e912522386
Supports to sync historical files without NewFile gossip message (#269)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Supports to randomly sync historical files

* Add name for random file sync batcher

* Remove sync store metrics since multiple random batcher created

* opt log

* ignore pruned or finalized historical file

* Add python tests for historical file sync
2024-11-15 10:00:58 +08:00
MiniFrenchBread
4566eadb3e
chore: expose chunk data (#270)
* chore: expose chunk data
2024-11-15 08:26:34 +08:00
0g-peterzhb
4b48d25fb4
track tx seq number time consumed (#268)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-11-14 16:54:42 +08:00
peilun-conflux
1046fed088
Wait for tx to be processed in pruner. (#267)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Wait for tx to be processed in pruner.

* Put FIRST_REWARDABLE_CHUNK_KEY in data db.
2024-11-14 02:58:59 +08:00
0g-peterzhb
f4d5228234
@peter/detailed metrics (#256)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* add detailed metrics for storage layer
2024-11-13 17:07:34 +08:00
Bo QIU
d93f453d50
RPC return file pruned info (#266)
* RPC return file pruned info

* return tx status in atomic manner

* fix clippy
2024-11-13 16:55:57 +08:00
leopardracer
16e70bde68
fix: typos in documentation files (#265)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Update README.md
2024-11-13 09:07:06 +08:00
Bo QIU
1de7afec14
Add more metrics for network unbounded channel (#264)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Add metrics for file finalization in chunk pool

* Add metrics for network unbounded channel
2024-11-12 17:25:49 +08:00
Joel Liu
0c493880ee
add timeout for rpc connections (#263)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-11-09 13:37:09 +08:00
0g-peterzhb
fae2d5efb6
@peter/db split (#262)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* do not pad tx during sync phase

* add pad data store

* support async padding after sync phase

* split misc

* add sleep for the next loop
2024-11-08 22:06:45 +08:00
Bo QIU
3fd800275a
Improve rate limit for UDP discovery (#261)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-11-06 18:25:08 +08:00
Bo QIU
baf0521c99
copy key file to bootnode folder in python tests (#260)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-11-06 14:01:47 +08:00
Bo QIU
bcbd8b3baa
Ban peer if failed to decode pubsub message (#259)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-11-05 15:10:44 +08:00
Bo QIU
cae5b62440
Hotfix for python tests caused by unexpected file deletion (#258)
* Hotfix for python tests caused by unexpected file deletion

* add more info when launch blockchain node failed

* add stdout if blockchain launch failed

* seek stdout and err to 0 if failed to launch blockchain

* Improve zg chain port to avoid port conflict in parallel execution

* fix float issue

* Fix py failures
2024-11-05 13:49:58 +08:00
0g-peterzhb
9eea71e97d
separate data db from flow db (#252)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* separate data db from flow db
2024-10-31 15:44:26 +08:00
Bo QIU
bb6e1457b7
Refuse network identity incompatible nodes in UDP discovery layer (#253)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Add python test for UDP discovery

* Refuse nodes with incompatible ENR
2024-10-30 17:26:02 +08:00
peilun-conflux
da2cdec8a1
Remove unused. (#251)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-10-29 23:23:53 +08:00
Bo QIU
9b68a8b7d7
Implement file sync protocol V2 (#249)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* Add new P2P protocol NewFile

* Publish NewFile message when any file finalized

* handle NewFile message in router

* handle NewFile in sync servic to write in db

* use propagation source to handle NewFile message

* Disable sequential sync and store new file in v2 sync store

* Add shard config in FindFile

* Add AnnounceFile RPC message in network layer

* do not propagate FindFile to whole network

* Mark peer connected if FileAnnouncement RPC message received

* fix unit test failures

* Change P2P protocol version

* Ignore py tests of sequential auto sync

* Add py test for auto sync v2

* fmt code

* remove dummy code in py test

* fix random test failure

* Add comments

* Enable file sync protocol v2 in config file by default
2024-10-28 14:56:08 +08:00
peilun-conflux
789eae5cc1
Start nodes sequentially to fix some random failure. (#243)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
Currently we use the config `network_libp2p_nodes` to connect nodes
in the tests. This will not be retried, so if an early node starts
too slowly, other nodes may fail to connect to it.
2024-10-28 10:53:28 +08:00
peilun-conflux
2f9960e8e7
Hardcode pad data segment root. (#250)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Hardcode pad data segment root.

* fix deref

---------

Co-authored-by: Peter Zhang <peter@0g.ai>
2024-10-27 20:58:03 +08:00
peilun-conflux
506d234562
Use LRU to cache MPT nodes. (#227)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Add trait.

* Update merkle tree trait.

* Use NodeManager.

* fix.

* Use LRU for cache.

* fix clippy.

* Save layer size.

* Initialize LogManager with NodeManager.

* Fix.

* Fix test.

* fix.
2024-10-27 12:52:06 +08:00
Bo QIU
8f17a7ad72
Fix metrics config deser (#245)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-10-22 14:14:33 +08:00
Joel Liu
2947cb7ac6
Optimizing recover perf by reducing sync progress events (#244)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Optimizing recover perf by reducing sync progress events

* add log

* add log
2024-10-21 21:24:50 +08:00
peilun-conflux
39efb721c5
Remove sender from contract call. (#242)
This allows the RPC services to cache the results.
2024-10-21 16:58:50 +08:00
Joel Liu
9fe5a2c18b
Stop recovery when sending via the channel fails (#240)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-10-18 16:08:35 +08:00
MiniFrenchBread
80b4d63cba
fix: error handling (#235)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-10-15 19:27:22 +08:00
bruno-valante
b2a70501c2
Test flow root consistency (#230) 2024-10-15 14:24:56 +08:00
Bo QIU
e701c8fdbd
Supports custom public ip to announce file (#233)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Supports custom public ip to announce file

* Fix comment
2024-10-14 14:57:42 +08:00
0g-peterzhb
a4b02a21b7
add retry (#232) 2024-10-14 14:19:05 +08:00
MiniFrenchBread
3fc1543fb4
chore: update abi (#234)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-10-14 12:38:13 +08:00
bruno-valante
82fd29968b
Support shard in case the mining is not enabled (#231)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-10-12 17:03:47 +08:00
peilun-conflux
45fa344564
Check the local flow root against the contract state. (#229)
* Check the local flow root against the contract state.

* Check zero contract root.

* Fix wrong root before the first segment.

* Update contracts.

* Fix proof insertion.
2024-10-12 16:50:31 +08:00
0g-peterzhb
48868b60db
update ip (#223)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-10-11 17:07:43 +08:00
Joel Liu
649f6e5e9f
Fix the dead loop problem (#228)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Fix the dead loop problem

* keep parent block of log_latest_block_number block

* avoid overflow
2024-10-10 22:02:20 +08:00
MiniFrenchBread
79d960d4ea
feat: tx_seq rpc (#226)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-10-10 14:57:40 +08:00
MiniFrenchBread
b131dc532f
test: update contracts, shard submission test (#225)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* fix: pad tx based on the on-chain contract result.

* chore: update contracts

* test: shard submission

---------

Co-authored-by: Peilun Li <peilun.li@confluxnetwork.org>
2024-10-09 16:33:20 +08:00
peilun-conflux
4e2c5fa8a4
fix: pad tx based on the on-chain contract result. (#224) 2024-10-09 15:20:42 +08:00
peilun-conflux
b76fa7be9b
Add TopicScoreParams for gossipsubs. (#189)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-10-08 14:32:23 +08:00
bruno-valante
27db2d6496
Avoid appending to_seal_set if sealer is not enabled (#212) 2024-10-08 14:26:35 +08:00
0g-peterzhb
20266e0a6c
@peter/async padding (#219)
* support async padding
2024-10-08 13:13:32 +08:00
peilun-conflux
395aeabde7
fix: do not finalize same-root tx with missing data. (#222)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-10-05 22:16:33 +08:00
MiniFrenchBread
ee4123418f
fix: end_segment_index (#221)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* fix: end_segment_index
2024-10-05 14:31:12 +08:00
MiniFrenchBread
949462084a
fix: finalize check on tx sync (#220)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* fix: finalize

* fix: check

* fix: end_segment_index

* fix: fmt
2024-09-30 18:43:11 +08:00
MiniFrenchBread
07ac814e57
test: use fixed 0g binary (#218)
* test: use latest 0g binary

* feat: use bug fixed 0g binary
2024-09-30 14:56:38 +08:00
peilun-conflux
69b71fc0b2
Fix issue in reverting the last incomplete tx. (#215) 2024-09-27 10:02:47 +08:00
Bo QIU
ae6ecfec96
reduce default sync threads (#214) 2024-09-26 14:55:00 +08:00
Bo QIU
ad80b22a1b
Optimize rpc config (#213) 2024-09-25 16:48:40 +08:00
Bo QIU
84c415e959
opt zgs version (#211) 2024-09-24 16:42:38 +08:00
Bo QIU
9cde84ae15
Refactor network peer db configs (#209) 2024-09-24 11:59:34 +08:00
Bo QIU
1dd7bf7734
Remove the 0gchain genesis init script for mac os (#208)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* Remove the 0gchain genesis init script for mac os

* fix on mac
2024-09-20 19:16:53 +08:00
MiniFrenchBread
5849e9c2ba
fix: finalize file does not need to save (#206)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* fix: finalize file does not need to save

* fix: rust fmt
2024-09-19 19:14:45 +08:00
0g-peterzhb
b7badcda5e
do not fail on pruner first reward chunk revert error (#205) 2024-09-19 17:33:56 +08:00
bruno-valante
1434b94495
Resolve dependency issue from atty (#204)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-09-14 19:06:52 +08:00
bruno-valante
da0e2c7031
Fix issues found in auditions (#203) 2024-09-14 18:19:16 +08:00
peilun-conflux
10bd71046b
Add some input validation for append_merkle. (#202)
* Add some input validation for `append_merkle`.

* Fix clippy.
2024-09-14 17:39:07 +08:00
Bo QIU
a153955246
Add log entry sync info in zgs_getStatus rpc (#200)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-09-14 10:11:12 +08:00
Joel Liu
f878a4849c
Handle cases where sequence is not continuous during catch-up (#199)
* Handle cases where the sequence is not continuous during the catch-up process

* get block hash from rpc if not found in local cache
2024-09-14 09:05:11 +08:00
Bo QIU
a9f5169c15
Allow user to configure network bandwidth for file sync (#198)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-09-13 16:58:27 +08:00
Jason
59d24b073d
change the Pora result from debug to info (#192)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
Co-authored-by: jason1 <jason1@cambricon.com>
2024-09-13 07:41:23 +08:00
Bo QIU
7f14451141
Add zgs rpc to check file finality (#196)
* Add tx seq or root enum

* Add zgs rpc to check file finality

* trailing whitespace
2024-09-13 07:40:43 +08:00
peilun-conflux
5c81abb79f
Use iterator to return padding data. (#197)
* Use iterator to return padding data.

* Fix order.
2024-09-13 07:38:47 +08:00
peilun-conflux
a4dd88f2b8
Change submit receipt to info. (#195)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-09-11 17:59:43 +08:00
peilun-conflux
3dc3d0574f
Fix typos. (#193) 2024-09-11 17:57:41 +08:00
Bo QIU
1d48cb1ea7
Use 0gchain to test mine tests (#191)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Use 0gchain to test mine tests

* adjust zg block time
2024-09-11 10:13:31 +08:00
Joel Liu
702680f3a4
reset parent block hash when reset progress (#188)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* reset parent block hash when reset progress
2024-09-09 14:51:08 +08:00
peilun-conflux
678d233f69
Output debug log with serde_json and fix debug root db key. (#185)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Output debug log with serde_json and fix debug root db key.
2024-09-08 20:21:34 +08:00
Bo QIU
052d2d781b
Change zg chain block time in python tests (#181)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Change zg chain block time in python tests
2024-09-08 08:38:53 +08:00
bruno-valante
52878b6709
Fix recall range computation for sharding (#186)
* Fix recall range computation for sharding

* cargo fmt
2024-09-08 08:37:03 +08:00
Joel Liu
43c2d5f788
Add configuration which can force sync from start_block_number (#183)
* queyr logs via LogQuery in wath loop
2024-09-08 08:34:59 +08:00
peilun-conflux
29fcc415a6
Add log for proof generation errors. (#182)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-09-06 17:56:04 +08:00
bruno-valante
bf3694d138
Update mine test process to avoid random bugs on low-performance devices (#184) 2024-09-06 17:53:00 +08:00
Joel Liu
041f5f12b6
Allow for retry attempts in the watch loop (#179)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Allow for retry attempts in the watch loop
2024-09-06 15:08:13 +08:00
Bo QIU
b6972b97af
Adjust default value for testnet configs (#180)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Adjust default value for testnet configs

* Supports to disable sequential auto sync

* Add py tests for auto sync

* fix py file name

* rm dummy ;py code

* change default block confirm count
2024-09-05 10:09:29 +08:00
Joel Liu
e20be63026
Query logs via LogQuery in watch loop (#177)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* queyr logs via LogQuery in wath loop

* fix lints error
2024-09-02 18:49:18 +08:00
boqiu
e7a562fa61 hotfix for json serde camel case 2024-09-02 16:12:01 +08:00
Bo QIU
4edd61b9d2
return network identity for status rpc (#178) 2024-09-02 16:05:44 +08:00
0g-peterzhb
e3c199d361
check error messages (#176)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-08-30 17:28:34 +08:00
0g-peterzhb
508e787bd0
update boot nodes (#175)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-08-30 10:38:19 +08:00
Bo QIU
6685757a03
notify file announcement to sync layer only if shard config matches (#174)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* notify file announcement to sync layer only if shard config matches

* use FromStr trait
2024-08-29 18:35:35 +08:00
boqiu
75531a5878 Add metrics in file location cache
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-08-29 11:55:00 +08:00
Bo QIU
2fd9712d59
Enhance P2P network protocol to support batch messages for performance concern (#173)
* Add p2p protocol version in network identity

* Cache annouce file pubsub messages to publish in batch

* fix file location cache

* opt sync metrics

* opt file location cache default configs

* publish files announcements in batch

* enhance announce file pubsub msg metrics

* opt metrics

* fix ci

* fix clippy

* fix batcher

* minor fix

* opt batcher: publish all if expired
2024-08-29 09:55:24 +08:00
bruno-valante
a79f7bbf12
Fix contract interface build cache (#172)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-08-27 18:08:10 +08:00
bruno-valante
aaba4b5da0
Fix issue found in auditions (#170) 2024-08-27 17:36:55 +08:00
bruno-valante
5757c98d2a
Config query context interval & Add config comments (#156) 2024-08-27 17:36:36 +08:00
Bo QIU
f1a1c39332
adjust sync timeout in py test (#171)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-08-27 09:52:00 +08:00
Bo QIU
37344b9439
opt sync config and update config file (#169)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-08-26 15:29:52 +08:00
0g-peterzhb
780865b1b8
do not fail on prune error (#168)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* do not fail on prune error
2024-08-25 16:48:03 +08:00
peilun-conflux
1c72607fbc
Set sync start index based on data in db. (#166)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* Set sync start index based on data in db.

* Fix test.

* nit.
2024-08-23 12:41:25 +08:00
Bo QIU
f14a1b5975
add timestamp in p2p rpc request id for latency stat (#167)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* add timestamp in p2p rpc request id for latency stat

* fix test compilation error
2024-08-22 14:34:10 +08:00
Bo QIU
82fef674ed
Add more metrics for file sync (#164)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* add metrics in log sync package

* udpate auto sync metrics

* Add metrics for completed file sync

* add more metrics for serial file sync

* adjust default timeout value for auto sync

* fix metrics rpc for Timer type

* add metrics for channel

* refactor channel metrics

* add timeout metrics for segment sync

* refactor channel receiver
2024-08-22 10:42:15 +08:00
Bo QIU
c1f465e009
Do not verify announced ip address by default (#163)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-08-20 14:39:58 +08:00
boqiu
c337ff90fb hotfix for admin rpc metrics
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-08-19 17:07:57 +08:00
Bo QIU
908ee156ca
Add admin rpc to exports metrics (#162) 2024-08-19 15:18:12 +08:00
Bo QIU
f9120b1e4a
Add metrics for router and auto sync (#161)
* Add metrics configurations

* Add metrics in router package

* Add catch up info in sync service state

* Add metrics for auto sync

* update cargo lock
2024-08-19 09:54:52 +08:00
Bo QIU
22ed8f5f91
opt router debug logs (#160)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-08-16 10:34:28 +08:00
peilun-conflux
12d0c6b675
Check network id in status and ban incompatible peers. (#159)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* Check network id in status and ban incompatible peers.

* Revert debug changes.

* Request chain id from the blockchain server.
2024-08-14 11:35:48 +08:00
Bo QIU
03286ebd78
Supports HDD config file (#158)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* Add debug log config

* Add config file for ssd and hdd

* Set most default value for ssd and hdd config file
2024-08-12 17:07:06 +08:00
0g-peterzhb
f03f97c609
more robust provider (#157)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* more robust provider
2024-08-12 11:35:21 +08:00
peilun-conflux
53449e1faa
Half log_page_size when it queries too many logs. (#152)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
* Half log_page_size when it queries too many logs.

* fmt.

* Increase log broadcast channel size.
2024-08-09 17:13:20 +08:00
Bo QIU
9189cabbb2
Supports batch randomly auto sync files (#154)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
2024-08-08 18:36:08 +08:00
Bo QIU
e1df2f0868
Change hardcoded const variables to configs (#153) 2024-08-08 16:21:50 +08:00
rickiey
77d1b84974
fix:log config : invalid filter directive (#146)
Some checks failed
abi-consistent-check / build-and-compare (push) Has been cancelled
code-coverage / unittest-cov (push) Has been cancelled
rust / check (push) Has been cancelled
rust / test (push) Has been cancelled
rust / lints (push) Has been cancelled
functional-test / test (push) Has been cancelled
2024-08-06 18:46:53 +08:00
peilun-conflux
d80e7e22ca
Prune no reward chunks. (#145)
* Prune no reward chunks.

* Add tests.

* Fix tests.

* Fix clippy.

* Revert test.

* Enable market in shard_sync_test.

* Add tx prune status.

* Fix tests.
2024-08-06 15:06:15 +08:00
Bo QIU
6ade66c086
Add admin rpc to return sync service state (#151) 2024-08-06 14:01:57 +08:00
Bo QIU
891e00fa80
Refactor auto sync to return sync status (#150)
Some checks are pending
abi-consistent-check / build-and-compare (push) Waiting to run
code-coverage / unittest-cov (push) Waiting to run
rust / check (push) Waiting to run
rust / test (push) Waiting to run
rust / lints (push) Waiting to run
functional-test / test (push) Waiting to run
* Refactor auto sync to return sync state

* Add auto sync manager to wrap multiple objects

* use auto sync manager

* fix python tests
2024-08-05 17:30:26 +08:00
peilun-conflux
dbd865fded
Change db_max_num_chunks to db_max_num_sectors. (#137)
* Change db_max_num_chunks to db_max_num_sectors.

* Update tests and config files.

* Revert contract change.
2024-07-29 22:31:19 +08:00
Bo QIU
ae9c52c0e6
Terminate file sync if connecting peers too long (#136) 2024-07-27 21:05:44 +08:00
bruno-valante
7d73ccd1e1
Update with the most recent storage contract (#142)
* Update with the most recent storage contract

* fix nits
2024-07-24 13:01:30 +08:00
MiniFrenchBread
533bacb234
fix: admin_getFileLocation; test: sync test (#141)
* feat: add all_shards in admin_getFileLocation

* fix: admin_getFileLocation

* test: improve sync test

* fix: lint
2024-07-23 15:47:44 +08:00
bruno-valante
f0c3f2cfd0
Remove contract abi submodule (#139) 2024-07-22 17:32:51 +08:00
dependabot[bot]
116046366a
Bump thread_local from 1.1.3 to 1.1.8 in /version-meld/discv5 (#140)
Bumps [thread_local](https://github.com/Amanieu/thread_local-rs) from 1.1.3 to 1.1.8.
- [Commits](https://github.com/Amanieu/thread_local-rs/compare/v1.1.3...1.1.8)

---
updated-dependencies:
- dependency-name: thread_local
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-22 15:51:45 +08:00
MiniFrenchBread
a782cf2d54
feat: add all_shards in admin_getFileLocation (#138) 2024-07-22 11:28:05 +08:00
189 changed files with 13134 additions and 2315 deletions

48
.github/workflows/abi.yml vendored Normal file
View File

@ -0,0 +1,48 @@
name: abi-consistent-check
on:
push:
branches: [ "main"]
pull_request:
branches: [ "main" ]
jobs:
build-and-compare:
runs-on: ubuntu-latest
steps:
- name: Clone current repository
uses: actions/checkout@v3
- name: Get the Git revision from the current repository
id: get-rev
run: echo "rev=$(cat ./storage-contracts-abis/0g-storage-contracts-rev)" >> $GITHUB_OUTPUT
- name: Clone another repository
uses: actions/checkout@v3
with:
repository: '0glabs/0g-storage-contracts'
path: '0g-storage-contracts'
- name: Checkout specific revision
working-directory: ./0g-storage-contracts
run: |
git fetch --depth=1 origin ${{ steps.get-rev.outputs.rev }}
git checkout ${{ steps.get-rev.outputs.rev }}
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: '18.17'
cache: 'yarn'
cache-dependency-path: ./0g-storage-contracts
- name: Run yarn in the cloned repository
working-directory: ./0g-storage-contracts
run: |
yarn
yarn build
- name: Compare files
run: |
./scripts/check_abis.sh ./0g-storage-contracts/artifacts/

2
.gitignore vendored
View File

@ -5,4 +5,4 @@
tests/**/__pycache__
tests/tmp/**
.vscode/*.json
/0g-storage-contracts-dev
/0g-storage-contracts-dev

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "0g-storage-contracts"]
path = 0g-storage-contracts
url = https://github.com/0glabs/0g-storage-contracts.git

@ -1 +0,0 @@
Subproject commit 25bc14a27441e8fb26e4d42d7c8c885f92d6c74a

749
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -27,10 +27,17 @@ members = [
]
resolver = "2"
[workspace.dependencies]
metrics = { git = "https://github.com/Conflux-Chain/conflux-rust.git", rev = "c4734e337c66d38e6396742cd5117b596e8d2603" }
[patch.crates-io]
discv5 = { path = "version-meld/discv5" }
eth2_ssz = { path = "version-meld/eth2_ssz" }
enr = { path = "version-meld/enr" }
[profile.bench.package.'storage']
debug = true
debug = true
[profile.dev]
# enabling debug_assertions will make node fail to start because of checks in `clap`.
debug-assertions = false

View File

@ -4,7 +4,7 @@
0G Storage is the storage layer for the ZeroGravity data availability (DA) system. The 0G Storage layer holds three important features:
* Buit-in - It is natively built into the ZeroGravity DA system for data storage and retrieval.
* Built-in - It is natively built into the ZeroGravity DA system for data storage and retrieval.
* General purpose - It is designed to support atomic transactions, mutable kv stores as well as archive log systems to enable wide range of applications with various data types.
* Incentive - Instead of being just a decentralized database, 0G Storage introduces PoRA mining algorithm to incentivize storage network participants.

View File

@ -12,4 +12,9 @@ eth2_ssz_derive = "0.3.0"
serde = { version = "1.0.137", features = ["derive"] }
lazy_static = "1.4.0"
tracing = "0.1.36"
once_cell = "1.19.0"
once_cell = "1.19.0"
metrics = { workspace = true }
itertools = "0.13.0"
lru = "0.12.5"

View File

@ -1,23 +1,30 @@
mod merkle_tree;
mod metrics;
mod node_manager;
mod proof;
mod sha3;
use anyhow::{anyhow, bail, Result};
use itertools::Itertools;
use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap};
use std::fmt::Debug;
use std::marker::PhantomData;
use std::sync::Arc;
use std::time::Instant;
use tracing::{trace, warn};
use crate::merkle_tree::MerkleTreeWrite;
pub use crate::merkle_tree::{
Algorithm, HashElement, MerkleTreeInitialData, MerkleTreeRead, ZERO_HASHES,
};
pub use crate::node_manager::{EmptyNodeDatabase, NodeDatabase, NodeManager, NodeTransaction};
pub use proof::{Proof, RangeProof};
pub use sha3::Sha3Algorithm;
pub struct AppendMerkleTree<E: HashElement, A: Algorithm<E>> {
/// Keep all the nodes in the latest version. `layers[0]` is the layer of leaves.
layers: Vec<Vec<E>>,
node_manager: NodeManager<E>,
/// Keep the delta nodes that can be used to construct a history tree.
/// The key is the root node of that version.
delta_nodes_map: BTreeMap<u64, DeltaNodes<E>>,
@ -35,13 +42,16 @@ pub struct AppendMerkleTree<E: HashElement, A: Algorithm<E>> {
impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
pub fn new(leaves: Vec<E>, leaf_height: usize, start_tx_seq: Option<u64>) -> Self {
let mut merkle = Self {
layers: vec![leaves],
node_manager: NodeManager::new_dummy(),
delta_nodes_map: BTreeMap::new(),
root_to_tx_seq_map: HashMap::new(),
min_depth: None,
leaf_height,
_a: Default::default(),
};
merkle.node_manager.start_transaction();
merkle.node_manager.add_layer();
merkle.node_manager.append_nodes(0, &leaves);
if merkle.leaves() == 0 {
if let Some(seq) = start_tx_seq {
merkle.delta_nodes_map.insert(
@ -51,10 +61,12 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
},
);
}
merkle.node_manager.commit();
return merkle;
}
// Reconstruct the whole tree.
merkle.recompute(0, 0, None);
merkle.node_manager.commit();
// Commit the first version in memory.
// TODO(zz): Check when the roots become available.
merkle.commit(start_tx_seq);
@ -62,53 +74,44 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
}
pub fn new_with_subtrees(
initial_data: MerkleTreeInitialData<E>,
node_db: Arc<dyn NodeDatabase<E>>,
node_cache_capacity: usize,
leaf_height: usize,
start_tx_seq: Option<u64>,
) -> Result<Self> {
let mut merkle = Self {
layers: vec![vec![]],
node_manager: NodeManager::new(node_db, node_cache_capacity)?,
delta_nodes_map: BTreeMap::new(),
root_to_tx_seq_map: HashMap::new(),
min_depth: None,
leaf_height,
_a: Default::default(),
};
if initial_data.subtree_list.is_empty() {
if let Some(seq) = start_tx_seq {
merkle.delta_nodes_map.insert(
seq,
DeltaNodes {
right_most_nodes: vec![],
},
);
}
return Ok(merkle);
}
merkle.append_subtree_list(initial_data.subtree_list)?;
merkle.commit(start_tx_seq);
for (index, h) in initial_data.known_leaves {
merkle.fill_leaf(index, h);
}
for (layer_index, position, h) in initial_data.extra_mpt_nodes {
// TODO: Delete duplicate nodes from DB.
merkle.layers[layer_index][position] = h;
if merkle.height() == 0 {
merkle.node_manager.start_transaction();
merkle.node_manager.add_layer();
merkle.node_manager.commit();
}
Ok(merkle)
}
/// This is only used for the last chunk, so `leaf_height` is always 0 so far.
pub fn new_with_depth(leaves: Vec<E>, depth: usize, start_tx_seq: Option<u64>) -> Self {
let mut node_manager = NodeManager::new_dummy();
node_manager.start_transaction();
if leaves.is_empty() {
// Create an empty merkle tree with `depth`.
let mut merkle = Self {
layers: vec![vec![]; depth],
// dummy node manager for the last chunk.
node_manager,
delta_nodes_map: BTreeMap::new(),
root_to_tx_seq_map: HashMap::new(),
min_depth: Some(depth),
leaf_height: 0,
_a: Default::default(),
};
for _ in 0..depth {
merkle.node_manager.add_layer();
}
if let Some(seq) = start_tx_seq {
merkle.delta_nodes_map.insert(
seq,
@ -117,36 +120,58 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
},
);
}
merkle.node_manager.commit();
merkle
} else {
let mut layers = vec![vec![]; depth];
layers[0] = leaves;
let mut merkle = Self {
layers,
// dummy node manager for the last chunk.
node_manager,
delta_nodes_map: BTreeMap::new(),
root_to_tx_seq_map: HashMap::new(),
min_depth: Some(depth),
leaf_height: 0,
_a: Default::default(),
};
merkle.node_manager.add_layer();
merkle.append_nodes(0, &leaves);
for _ in 1..depth {
merkle.node_manager.add_layer();
}
// Reconstruct the whole tree.
merkle.recompute(0, 0, None);
merkle.node_manager.commit();
// Commit the first version in memory.
merkle.commit(start_tx_seq);
merkle
}
}
/// Return the new merkle root.
pub fn append(&mut self, new_leaf: E) {
self.layers[0].push(new_leaf);
let start_time = Instant::now();
if new_leaf == E::null() {
// appending null is not allowed.
return;
}
self.node_manager.start_transaction();
self.node_manager.push_node(0, new_leaf);
self.recompute_after_append_leaves(self.leaves() - 1);
self.node_manager.commit();
metrics::APPEND.update_since(start_time);
}
pub fn append_list(&mut self, mut leaf_list: Vec<E>) {
pub fn append_list(&mut self, leaf_list: Vec<E>) {
let start_time = Instant::now();
if leaf_list.contains(&E::null()) {
// appending null is not allowed.
return;
}
self.node_manager.start_transaction();
let start_index = self.leaves();
self.layers[0].append(&mut leaf_list);
self.node_manager.append_nodes(0, &leaf_list);
self.recompute_after_append_leaves(start_index);
self.node_manager.commit();
metrics::APPEND_LIST.update_since(start_time);
}
/// Append a leaf list by providing their intermediate node hash.
@ -155,44 +180,76 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
/// Other nodes in the subtree will be set to `null` nodes.
/// TODO: Optimize to avoid storing the `null` nodes?
pub fn append_subtree(&mut self, subtree_depth: usize, subtree_root: E) -> Result<()> {
let start_time = Instant::now();
if subtree_root == E::null() {
// appending null is not allowed.
bail!("subtree_root is null");
}
self.node_manager.start_transaction();
let start_index = self.leaves();
self.append_subtree_inner(subtree_depth, subtree_root)?;
self.recompute_after_append_subtree(start_index, subtree_depth - 1);
self.node_manager.commit();
metrics::APPEND_SUBTREE.update_since(start_time);
Ok(())
}
pub fn append_subtree_list(&mut self, subtree_list: Vec<(usize, E)>) -> Result<()> {
let start_time = Instant::now();
if subtree_list.iter().any(|(_, root)| root == &E::null()) {
// appending null is not allowed.
bail!("subtree_list contains null");
}
self.node_manager.start_transaction();
for (subtree_depth, subtree_root) in subtree_list {
let start_index = self.leaves();
self.append_subtree_inner(subtree_depth, subtree_root)?;
self.recompute_after_append_subtree(start_index, subtree_depth - 1);
}
self.node_manager.commit();
metrics::APPEND_SUBTREE_LIST.update_since(start_time);
Ok(())
}
/// Change the value of the last leaf and return the new merkle root.
/// This is needed if our merkle-tree in memory only keeps intermediate nodes instead of real leaves.
pub fn update_last(&mut self, updated_leaf: E) {
if self.layers[0].is_empty() {
let start_time = Instant::now();
if updated_leaf == E::null() {
// updating to null is not allowed.
return;
}
self.node_manager.start_transaction();
if self.layer_len(0) == 0 {
// Special case for the first data.
self.layers[0].push(updated_leaf);
self.push_node(0, updated_leaf);
} else {
*self.layers[0].last_mut().unwrap() = updated_leaf;
self.update_node(0, self.layer_len(0) - 1, updated_leaf);
}
self.recompute_after_append_leaves(self.leaves() - 1);
self.node_manager.commit();
metrics::UPDATE_LAST.update_since(start_time);
}
/// Fill an unknown `null` leaf with its real value.
/// Panics if the leaf changes the merkle root or the index is out of range.
/// Panics if the leaf is already set and different or the index is out of range.
/// TODO: Batch computing intermediate nodes.
pub fn fill_leaf(&mut self, index: usize, leaf: E) {
if self.layers[0][index] == E::null() {
self.layers[0][index] = leaf;
if leaf == E::null() {
// fill leaf with null is not allowed.
} else if self.node(0, index) == E::null() {
self.node_manager.start_transaction();
self.update_node(0, index, leaf);
self.recompute_after_fill_leaves(index, index + 1);
} else if self.layers[0][index] != leaf {
self.node_manager.commit();
} else if self.node(0, index) != leaf {
panic!(
"Fill with invalid leaf, index={} was={:?} get={:?}",
index, self.layers[0][index], leaf
index,
self.node(0, index),
leaf
);
}
}
@ -205,18 +262,20 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
&mut self,
proof: RangeProof<E>,
) -> Result<Vec<(usize, usize, E)>> {
self.fill_with_proof(
proof
.left_proof
.proof_nodes_in_tree()
.split_off(self.leaf_height),
)?;
self.fill_with_proof(
proof
.right_proof
.proof_nodes_in_tree()
.split_off(self.leaf_height),
)
self.node_manager.start_transaction();
let mut updated_nodes = Vec::new();
let mut left_nodes = proof.left_proof.proof_nodes_in_tree();
if left_nodes.len() >= self.leaf_height {
updated_nodes
.append(&mut self.fill_with_proof(left_nodes.split_off(self.leaf_height))?);
}
let mut right_nodes = proof.right_proof.proof_nodes_in_tree();
if right_nodes.len() >= self.leaf_height {
updated_nodes
.append(&mut self.fill_with_proof(right_nodes.split_off(self.leaf_height))?);
}
self.node_manager.commit();
Ok(updated_nodes)
}
pub fn fill_with_file_proof(
@ -241,13 +300,16 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
if tx_merkle_nodes.is_empty() {
return Ok(Vec::new());
}
self.node_manager.start_transaction();
let mut position_and_data =
proof.file_proof_nodes_in_tree(tx_merkle_nodes, tx_merkle_nodes_size);
let start_index = (start_index >> self.leaf_height) as usize;
for (i, (position, _)) in position_and_data.iter_mut().enumerate() {
*position += start_index >> i;
}
self.fill_with_proof(position_and_data)
let updated_nodes = self.fill_with_proof(position_and_data)?;
self.node_manager.commit();
Ok(updated_nodes)
}
/// This assumes that the proof leaf is no lower than the tree leaf. It holds for both SegmentProof and ChunkProof.
@ -259,28 +321,27 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
let mut updated_nodes = Vec::new();
// A valid proof should not fail the following checks.
for (i, (position, data)) in position_and_data.into_iter().enumerate() {
let layer = &mut self.layers[i];
if position > layer.len() {
if position > self.layer_len(i) {
bail!(
"proof position out of range, position={} layer.len()={}",
position,
layer.len()
self.layer_len(i)
);
}
if position == layer.len() {
if position == self.layer_len(i) {
// skip padding node.
continue;
}
if layer[position] == E::null() {
layer[position] = data.clone();
if self.node(i, position) == E::null() {
self.update_node(i, position, data.clone());
updated_nodes.push((i, position, data))
} else if layer[position] != data {
} else if self.node(i, position) != data {
// The last node in each layer may have changed in the tree.
trace!(
"conflict data layer={} position={} tree_data={:?} proof_data={:?}",
i,
position,
layer[position],
self.node(i, position),
data
);
}
@ -296,8 +357,8 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
if position >= self.leaves() {
bail!("Out of bound: position={} end={}", position, self.leaves());
}
if self.layers[0][position] != E::null() {
Ok(Some(self.layers[0][position].clone()))
if self.node(0, position) != E::null() {
Ok(Some(self.node(0, position)))
} else {
// The leaf hash is unknown.
Ok(None)
@ -345,11 +406,11 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
return;
}
let mut right_most_nodes = Vec::new();
for layer in &self.layers {
right_most_nodes.push((layer.len() - 1, layer.last().unwrap().clone()));
for height in 0..self.height() {
let pos = self.layer_len(height) - 1;
right_most_nodes.push((pos, self.node(height, pos)));
}
let root = self.root().clone();
assert_eq!(root, right_most_nodes.last().unwrap().1);
let root = self.root();
self.delta_nodes_map
.insert(tx_seq, DeltaNodes::new(right_most_nodes));
self.root_to_tx_seq_map.insert(root, tx_seq);
@ -357,8 +418,8 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
}
fn before_extend_layer(&mut self, height: usize) {
if height == self.layers.len() {
self.layers.push(Vec::new());
if height == self.height() {
self.node_manager.add_layer()
}
}
@ -375,7 +436,6 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
}
/// Given a range of changed leaf nodes and recompute the tree.
/// Since this tree is append-only, we always compute to the end.
fn recompute(
&mut self,
mut start_index: usize,
@ -385,42 +445,51 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
start_index >>= height;
maybe_end_index = maybe_end_index.map(|end| end >> height);
// Loop until we compute the new root and reach `tree_depth`.
while self.layers[height].len() > 1 || height < self.layers.len() - 1 {
while self.layer_len(height) > 1 || height < self.height() - 1 {
let next_layer_start_index = start_index >> 1;
if start_index % 2 == 1 {
start_index -= 1;
}
let mut end_index = maybe_end_index.unwrap_or(self.layers[height].len());
if end_index % 2 == 1 && end_index != self.layers[height].len() {
let mut end_index = maybe_end_index.unwrap_or(self.layer_len(height));
if end_index % 2 == 1 && end_index != self.layer_len(height) {
end_index += 1;
}
let mut i = 0;
let mut iter = self.layers[height][start_index..end_index].chunks_exact(2);
let iter = self
.node_manager
.get_nodes(height, start_index, end_index)
.chunks(2);
// We cannot modify the parent layer while iterating the child layer,
// so just keep the changes and update them later.
let mut parent_update = Vec::new();
while let Some([left, right]) = iter.next() {
// If either left or right is null (unknown), we cannot compute the parent hash.
// Note that if we are recompute a range of an existing tree,
// we do not need to keep these possibly null parent. This is only saved
// for the case of constructing a new tree from the leaves.
let parent = if *left == E::null() || *right == E::null() {
E::null()
for chunk_iter in &iter {
let chunk: Vec<_> = chunk_iter.collect();
if chunk.len() == 2 {
let left = &chunk[0];
let right = &chunk[1];
// If either left or right is null (unknown), we cannot compute the parent hash.
// Note that if we are recompute a range of an existing tree,
// we do not need to keep these possibly null parent. This is only saved
// for the case of constructing a new tree from the leaves.
let parent = if *left == E::null() || *right == E::null() {
E::null()
} else {
A::parent(left, right)
};
parent_update.push((next_layer_start_index + i, parent));
i += 1;
} else {
A::parent(left, right)
};
parent_update.push((next_layer_start_index + i, parent));
i += 1;
}
if let [r] = iter.remainder() {
// Same as above.
let parent = if *r == E::null() {
E::null()
} else {
A::parent_single(r, height + self.leaf_height)
};
parent_update.push((next_layer_start_index + i, parent));
assert_eq!(chunk.len(), 1);
let r = &chunk[0];
// Same as above.
let parent = if *r == E::null() {
E::null()
} else {
A::parent_single(r, height + self.leaf_height)
};
parent_update.push((next_layer_start_index + i, parent));
}
}
if !parent_update.is_empty() {
self.before_extend_layer(height + 1);
@ -429,27 +498,27 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
// we can just overwrite `last_changed_parent_index` with new values.
let mut last_changed_parent_index = None;
for (parent_index, parent) in parent_update {
match parent_index.cmp(&self.layers[height + 1].len()) {
match parent_index.cmp(&self.layer_len(height + 1)) {
Ordering::Less => {
// We do not overwrite with null.
if parent != E::null() {
if self.layers[height + 1][parent_index] == E::null()
if self.node(height + 1, parent_index) == E::null()
// The last node in a layer can be updated.
|| (self.layers[height + 1][parent_index] != parent
&& parent_index == self.layers[height + 1].len() - 1)
|| (self.node(height + 1, parent_index) != parent
&& parent_index == self.layer_len(height + 1) - 1)
{
self.layers[height + 1][parent_index] = parent;
self.update_node(height + 1, parent_index, parent);
last_changed_parent_index = Some(parent_index);
} else if self.layers[height + 1][parent_index] != parent {
} else if self.node(height + 1, parent_index) != parent {
// Recompute changes a node in the middle. This should be impossible
// if the inputs are valid.
panic!("Invalid append merkle tree! height={} index={} expected={:?} get={:?}",
height + 1, parent_index, self.layers[height + 1][parent_index], parent);
height + 1, parent_index, self.node(height + 1, parent_index), parent);
}
}
}
Ordering::Equal => {
self.layers[height + 1].push(parent);
self.push_node(height + 1, parent);
last_changed_parent_index = Some(parent_index);
}
Ordering::Greater => {
@ -480,10 +549,10 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
for height in 0..(subtree_depth - 1) {
self.before_extend_layer(height);
let subtree_layer_size = 1 << (subtree_depth - 1 - height);
self.layers[height].append(&mut vec![E::null(); subtree_layer_size]);
self.append_nodes(height, &vec![E::null(); subtree_layer_size]);
}
self.before_extend_layer(subtree_depth - 1);
self.layers[subtree_depth - 1].push(subtree_root);
self.push_node(subtree_depth - 1, subtree_root);
Ok(())
}
@ -494,23 +563,45 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
}
pub fn revert_to(&mut self, tx_seq: u64) -> Result<()> {
if self.layers[0].is_empty() {
if self.layer_len(0) == 0 {
// Any previous state of an empty tree is always empty.
return Ok(());
}
self.node_manager.start_transaction();
let delta_nodes = self
.delta_nodes_map
.get(&tx_seq)
.ok_or_else(|| anyhow!("tx_seq unavailable, root={:?}", tx_seq))?;
.ok_or_else(|| anyhow!("tx_seq unavailable, root={:?}", tx_seq))?
.clone();
// Dropping the upper layers that are not in the old merkle tree.
self.layers.truncate(delta_nodes.right_most_nodes.len());
for height in (delta_nodes.right_most_nodes.len()..self.height()).rev() {
self.node_manager.truncate_layer(height);
}
for (height, (last_index, right_most_node)) in
delta_nodes.right_most_nodes.iter().enumerate()
{
self.layers[height].truncate(*last_index + 1);
self.layers[height][*last_index] = right_most_node.clone();
self.node_manager.truncate_nodes(height, *last_index + 1);
self.update_node(height, *last_index, right_most_node.clone())
}
self.clear_after(tx_seq);
self.node_manager.commit();
Ok(())
}
// Revert to a tx_seq not in `delta_nodes_map`.
// This is needed to revert the last unfinished tx after restart.
pub fn revert_to_leaves(&mut self, leaves: usize) -> Result<()> {
self.node_manager.start_transaction();
for height in (0..self.height()).rev() {
let kept_nodes = leaves >> height;
if kept_nodes == 0 {
self.node_manager.truncate_layer(height);
} else {
self.node_manager.truncate_nodes(height, kept_nodes + 1);
}
}
self.recompute_after_append_leaves(leaves);
self.node_manager.commit();
Ok(())
}
@ -530,17 +621,25 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
bail!("empty tree");
}
Ok(HistoryTree {
layers: &self.layers,
node_manager: &self.node_manager,
delta_nodes,
leaf_height: self.leaf_height,
})
}
pub fn reset(&mut self) {
self.layers = match self.min_depth {
None => vec![vec![]],
Some(depth) => vec![vec![]; depth],
};
self.node_manager.start_transaction();
for height in (0..self.height()).rev() {
self.node_manager.truncate_layer(height);
}
if let Some(depth) = self.min_depth {
for _ in 0..depth {
self.node_manager.add_layer();
}
} else {
self.node_manager.add_layer();
}
self.node_manager.commit();
}
fn clear_after(&mut self, tx_seq: u64) {
@ -560,10 +659,10 @@ impl<E: HashElement, A: Algorithm<E>> AppendMerkleTree<E, A> {
fn first_known_root_at(&self, index: usize) -> (usize, E) {
let mut height = 0;
let mut index_in_layer = index;
while height < self.layers.len() {
while height < self.height() {
let node = self.node(height, index_in_layer);
if !node.is_null() {
return (height + 1, node.clone());
return (height + 1, node);
}
height += 1;
index_in_layer /= 2;
@ -608,7 +707,7 @@ impl<E: HashElement> DeltaNodes<E> {
pub struct HistoryTree<'m, E: HashElement> {
/// A reference to the global tree nodes.
layers: &'m Vec<Vec<E>>,
node_manager: &'m NodeManager<E>,
/// The delta nodes that are difference from `layers`.
/// This could be a reference, we just take ownership for convenience.
delta_nodes: &'m DeltaNodes<E>,
@ -619,16 +718,18 @@ pub struct HistoryTree<'m, E: HashElement> {
impl<E: HashElement, A: Algorithm<E>> MerkleTreeRead for AppendMerkleTree<E, A> {
type E = E;
fn node(&self, layer: usize, index: usize) -> &Self::E {
&self.layers[layer][index]
fn node(&self, layer: usize, index: usize) -> Self::E {
self.node_manager
.get_node(layer, index)
.expect("index checked")
}
fn height(&self) -> usize {
self.layers.len()
self.node_manager.num_layers()
}
fn layer_len(&self, layer_height: usize) -> usize {
self.layers[layer_height].len()
self.node_manager.layer_size(layer_height)
}
fn padding_node(&self, height: usize) -> Self::E {
@ -638,10 +739,13 @@ impl<E: HashElement, A: Algorithm<E>> MerkleTreeRead for AppendMerkleTree<E, A>
impl<'a, E: HashElement> MerkleTreeRead for HistoryTree<'a, E> {
type E = E;
fn node(&self, layer: usize, index: usize) -> &Self::E {
fn node(&self, layer: usize, index: usize) -> Self::E {
match self.delta_nodes.get(layer, index).expect("range checked") {
Some(node) if *node != E::null() => node,
_ => &self.layers[layer][index],
Some(node) if *node != E::null() => node.clone(),
_ => self
.node_manager
.get_node(layer, index)
.expect("index checked"),
}
}
@ -658,6 +762,22 @@ impl<'a, E: HashElement> MerkleTreeRead for HistoryTree<'a, E> {
}
}
impl<E: HashElement, A: Algorithm<E>> MerkleTreeWrite for AppendMerkleTree<E, A> {
type E = E;
fn push_node(&mut self, layer: usize, node: Self::E) {
self.node_manager.push_node(layer, node);
}
fn append_nodes(&mut self, layer: usize, nodes: &[Self::E]) {
self.node_manager.append_nodes(layer, nodes);
}
fn update_node(&mut self, layer: usize, pos: usize, node: Self::E) {
self.node_manager.add_node(layer, pos, node);
}
}
#[macro_export]
macro_rules! ensure_eq {
($given:expr, $expected:expr) => {
@ -679,6 +799,7 @@ macro_rules! ensure_eq {
#[cfg(test)]
mod tests {
use crate::merkle_tree::MerkleTreeRead;
use crate::sha3::Sha3Algorithm;
use crate::AppendMerkleTree;
use ethereum_types::H256;

View File

@ -49,7 +49,7 @@ pub trait Algorithm<E: HashElement> {
pub trait MerkleTreeRead {
type E: HashElement;
fn node(&self, layer: usize, index: usize) -> &Self::E;
fn node(&self, layer: usize, index: usize) -> Self::E;
fn height(&self) -> usize;
fn layer_len(&self, layer_height: usize) -> usize;
fn padding_node(&self, height: usize) -> Self::E;
@ -58,7 +58,7 @@ pub trait MerkleTreeRead {
self.layer_len(0)
}
fn root(&self) -> &Self::E {
fn root(&self) -> Self::E {
self.node(self.height() - 1, 0)
}
@ -70,19 +70,16 @@ pub trait MerkleTreeRead {
self.leaves()
);
}
if self.node(0, leaf_index) == &Self::E::null() {
if self.node(0, leaf_index) == Self::E::null() {
bail!("Not ready to generate proof for leaf_index={}", leaf_index);
}
if self.height() == 1 {
return Ok(Proof::new(
vec![self.root().clone(), self.root().clone()],
vec![],
));
return Proof::new(vec![self.root(), self.root().clone()], vec![]);
}
let mut lemma: Vec<Self::E> = Vec::with_capacity(self.height()); // path + root
let mut path: Vec<bool> = Vec::with_capacity(self.height() - 2); // path - 1
let mut index_in_layer = leaf_index;
lemma.push(self.node(0, leaf_index).clone());
lemma.push(self.node(0, leaf_index));
for height in 0..(self.height() - 1) {
trace!(
"gen_proof: height={} index={} hash={:?}",
@ -96,15 +93,15 @@ pub trait MerkleTreeRead {
// TODO: This can be skipped if the tree size is available in validation.
lemma.push(self.padding_node(height));
} else {
lemma.push(self.node(height, index_in_layer + 1).clone());
lemma.push(self.node(height, index_in_layer + 1));
}
} else {
path.push(false);
lemma.push(self.node(height, index_in_layer - 1).clone());
lemma.push(self.node(height, index_in_layer - 1));
}
index_in_layer >>= 1;
}
lemma.push(self.root().clone());
lemma.push(self.root());
if lemma.contains(&Self::E::null()) {
bail!(
"Not enough data to generate proof, lemma={:?} path={:?}",
@ -112,7 +109,7 @@ pub trait MerkleTreeRead {
path
);
}
Ok(Proof::new(lemma, path))
Proof::new(lemma, path)
}
fn gen_range_proof(&self, start_index: usize, end_index: usize) -> Result<RangeProof<Self::E>> {
@ -133,6 +130,13 @@ pub trait MerkleTreeRead {
}
}
pub trait MerkleTreeWrite {
type E: HashElement;
fn push_node(&mut self, layer: usize, node: Self::E);
fn append_nodes(&mut self, layer: usize, nodes: &[Self::E]);
fn update_node(&mut self, layer: usize, pos: usize, node: Self::E);
}
/// This includes the data to reconstruct an `AppendMerkleTree` root where some nodes
/// are `null`. Other intermediate nodes will be computed based on these known nodes.
pub struct MerkleTreeInitialData<E: HashElement> {

View File

@ -0,0 +1,11 @@
use std::sync::Arc;
use metrics::{register_timer, Timer};
lazy_static::lazy_static! {
pub static ref APPEND: Arc<dyn Timer> = register_timer("append_merkle_append");
pub static ref APPEND_LIST: Arc<dyn Timer> = register_timer("append_merkle_append_list");
pub static ref APPEND_SUBTREE: Arc<dyn Timer> = register_timer("append_merkle_append_subtree");
pub static ref APPEND_SUBTREE_LIST: Arc<dyn Timer> = register_timer("append_merkle_append_subtree_list");
pub static ref UPDATE_LAST: Arc<dyn Timer> = register_timer("append_merkle_update_last");
}

View File

@ -0,0 +1,219 @@
use crate::HashElement;
use anyhow::Result;
use lru::LruCache;
use std::any::Any;
use std::num::NonZeroUsize;
use std::sync::Arc;
use tracing::error;
pub struct NodeManager<E: HashElement> {
cache: LruCache<(usize, usize), E>,
layer_size: Vec<usize>,
db: Arc<dyn NodeDatabase<E>>,
db_tx: Option<Box<dyn NodeTransaction<E>>>,
}
impl<E: HashElement> NodeManager<E> {
pub fn new(db: Arc<dyn NodeDatabase<E>>, capacity: usize) -> Result<Self> {
let mut layer = 0;
let mut layer_size = Vec::new();
while let Some(size) = db.get_layer_size(layer)? {
layer_size.push(size);
layer += 1;
}
Ok(Self {
cache: LruCache::new(NonZeroUsize::new(capacity).expect("capacity should be non-zero")),
layer_size,
db,
db_tx: None,
})
}
pub fn new_dummy() -> Self {
Self {
cache: LruCache::unbounded(),
layer_size: vec![],
db: Arc::new(EmptyNodeDatabase {}),
db_tx: None,
}
}
pub fn push_node(&mut self, layer: usize, node: E) {
self.add_node(layer, self.layer_size[layer], node);
self.set_layer_size(layer, self.layer_size[layer] + 1);
}
pub fn append_nodes(&mut self, layer: usize, nodes: &[E]) {
let mut pos = self.layer_size[layer];
let mut saved_nodes = Vec::with_capacity(nodes.len());
for node in nodes {
self.cache.put((layer, pos), node.clone());
saved_nodes.push((layer, pos, node));
pos += 1;
}
self.set_layer_size(layer, pos);
self.db_tx().save_node_list(&saved_nodes);
}
pub fn get_node(&self, layer: usize, pos: usize) -> Option<E> {
match self.cache.peek(&(layer, pos)) {
Some(node) => Some(node.clone()),
None => self.db.get_node(layer, pos).unwrap_or_else(|e| {
error!("Failed to get node: {}", e);
None
}),
}
}
pub fn get_nodes(&self, layer: usize, start_pos: usize, end_pos: usize) -> NodeIterator<E> {
NodeIterator {
node_manager: self,
layer,
start_pos,
end_pos,
}
}
pub fn add_node(&mut self, layer: usize, pos: usize, node: E) {
// No need to insert if the value is unchanged.
if self.cache.get(&(layer, pos)) != Some(&node) {
self.db_tx().save_node(layer, pos, &node);
self.cache.put((layer, pos), node);
}
}
pub fn add_layer(&mut self) {
self.layer_size.push(0);
let layer = self.layer_size.len() - 1;
self.db_tx().save_layer_size(layer, 0);
}
pub fn layer_size(&self, layer: usize) -> usize {
self.layer_size[layer]
}
pub fn num_layers(&self) -> usize {
self.layer_size.len()
}
pub fn truncate_nodes(&mut self, layer: usize, pos_end: usize) {
let mut removed_nodes = Vec::new();
for pos in pos_end..self.layer_size[layer] {
self.cache.pop(&(layer, pos));
removed_nodes.push((layer, pos));
}
self.db_tx().remove_node_list(&removed_nodes);
self.set_layer_size(layer, pos_end);
}
pub fn truncate_layer(&mut self, layer: usize) {
self.truncate_nodes(layer, 0);
if layer == self.num_layers() - 1 {
self.layer_size.pop();
self.db_tx().remove_layer_size(layer);
}
}
pub fn start_transaction(&mut self) {
if self.db_tx.is_some() {
error!("start new tx before commit");
panic!("start new tx before commit");
}
self.db_tx = Some(self.db.start_transaction());
}
pub fn commit(&mut self) {
let tx = match self.db_tx.take() {
Some(tx) => tx,
None => {
error!("db_tx is None");
return;
}
};
if let Err(e) = self.db.commit(tx) {
error!("Failed to commit db transaction: {}", e);
}
}
fn db_tx(&mut self) -> &mut dyn NodeTransaction<E> {
(*self.db_tx.as_mut().expect("tx checked")).as_mut()
}
fn set_layer_size(&mut self, layer: usize, size: usize) {
self.layer_size[layer] = size;
self.db_tx().save_layer_size(layer, size);
}
}
pub struct NodeIterator<'a, E: HashElement> {
node_manager: &'a NodeManager<E>,
layer: usize,
start_pos: usize,
end_pos: usize,
}
impl<'a, E: HashElement> Iterator for NodeIterator<'a, E> {
type Item = E;
fn next(&mut self) -> Option<Self::Item> {
if self.start_pos < self.end_pos {
let r = self.node_manager.get_node(self.layer, self.start_pos);
self.start_pos += 1;
r
} else {
None
}
}
}
pub trait NodeDatabase<E: HashElement>: Send + Sync {
fn get_node(&self, layer: usize, pos: usize) -> Result<Option<E>>;
fn get_layer_size(&self, layer: usize) -> Result<Option<usize>>;
fn start_transaction(&self) -> Box<dyn NodeTransaction<E>>;
fn commit(&self, tx: Box<dyn NodeTransaction<E>>) -> Result<()>;
}
pub trait NodeTransaction<E: HashElement>: Send + Sync {
fn save_node(&mut self, layer: usize, pos: usize, node: &E);
/// `nodes` are a list of tuples `(layer, pos, node)`.
fn save_node_list(&mut self, nodes: &[(usize, usize, &E)]);
fn remove_node_list(&mut self, nodes: &[(usize, usize)]);
fn save_layer_size(&mut self, layer: usize, size: usize);
fn remove_layer_size(&mut self, layer: usize);
fn into_any(self: Box<Self>) -> Box<dyn Any>;
}
/// A dummy database structure for in-memory merkle tree that will not read/write db.
pub struct EmptyNodeDatabase {}
pub struct EmptyNodeTransaction {}
impl<E: HashElement> NodeDatabase<E> for EmptyNodeDatabase {
fn get_node(&self, _layer: usize, _pos: usize) -> Result<Option<E>> {
Ok(None)
}
fn get_layer_size(&self, _layer: usize) -> Result<Option<usize>> {
Ok(None)
}
fn start_transaction(&self) -> Box<dyn NodeTransaction<E>> {
Box::new(EmptyNodeTransaction {})
}
fn commit(&self, _tx: Box<dyn NodeTransaction<E>>) -> Result<()> {
Ok(())
}
}
impl<E: HashElement> NodeTransaction<E> for EmptyNodeTransaction {
fn save_node(&mut self, _layer: usize, _pos: usize, _node: &E) {}
fn save_node_list(&mut self, _nodes: &[(usize, usize, &E)]) {}
fn remove_node_list(&mut self, _nodes: &[(usize, usize)]) {}
fn save_layer_size(&mut self, _layer: usize, _size: usize) {}
fn remove_layer_size(&mut self, _layer: usize) {}
fn into_any(self: Box<Self>) -> Box<dyn Any> {
self
}
}

View File

@ -11,9 +11,11 @@ pub struct Proof<T: HashElement> {
impl<T: HashElement> Proof<T> {
/// Creates new MT inclusion proof
pub fn new(hash: Vec<T>, path: Vec<bool>) -> Proof<T> {
assert_eq!(hash.len() - 2, path.len());
Proof { lemma: hash, path }
pub fn new(hash: Vec<T>, path: Vec<bool>) -> Result<Proof<T>> {
if hash.len() != path.len() + 2 {
bail!("hash and path length mismatch");
}
Ok(Proof { lemma: hash, path })
}
pub fn new_empty() -> Proof<T> {
@ -58,10 +60,10 @@ impl<T: HashElement> Proof<T> {
bail!("Invalid proof");
}
if *item != self.item() {
bail!("Proof item unmatch");
bail!("Proof item mismatch");
}
if position != self.position() {
bail!("Proof position unmatch");
bail!("Proof position mismatch");
}
Ok(())
}
@ -88,7 +90,7 @@ impl<T: HashElement> Proof<T> {
/// Return `Vec<(index_in_layer, data)>`.
pub fn proof_nodes_in_tree(&self) -> Vec<(usize, T)> {
let mut r = Vec::with_capacity(self.lemma.len());
let mut r = Vec::with_capacity(self.lemma.len() - 1);
let mut pos = 0;
r.push((0, self.root()));
for (i, is_left) in self.path.iter().rev().enumerate() {
@ -108,7 +110,7 @@ impl<T: HashElement> Proof<T> {
tx_merkle_nodes: Vec<(usize, T)>,
tx_merkle_nodes_size: usize,
) -> Vec<(usize, T)> {
let mut r = Vec::with_capacity(self.lemma.len());
let mut r = Vec::with_capacity(self.path.len());
let mut subtree_pos = 0;
let mut root_pos = 0;
let mut in_subtree = tx_merkle_nodes_size == 1;
@ -222,7 +224,7 @@ impl<E: HashElement> RangeProof<E> {
}
children_layer = parent_layer;
}
assert_eq!(children_layer.len(), 1);
ensure_eq!(children_layer.len(), 1);
let computed_root = children_layer.pop().unwrap();
ensure_eq!(computed_root, self.root());

View File

@ -5,3 +5,4 @@ edition = "2021"
[dependencies]
tokio = { version = "1.19.2", features = ["sync", "time"] }
metrics = { workspace = true }

View File

@ -1,7 +1,9 @@
use crate::error::Error;
use crate::metrics::unbounded_channel;
use metrics::{Counter, CounterUsize};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc::error::TryRecvError;
use tokio::sync::{mpsc, oneshot};
use tokio::sync::oneshot;
use tokio::time::timeout;
const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(3);
@ -19,20 +21,30 @@ pub struct Channel<N, Req, Res> {
}
impl<N, Req, Res> Channel<N, Req, Res> {
pub fn unbounded() -> (Sender<N, Req, Res>, Receiver<N, Req, Res>) {
let (sender, receiver) = mpsc::unbounded_channel();
(Sender { chan: sender }, Receiver { chan: receiver })
pub fn unbounded(name: &str) -> (Sender<N, Req, Res>, Receiver<N, Req, Res>) {
let metrics_group = format!("common_channel_{}", name);
let (sender, receiver) = unbounded_channel(metrics_group.as_str());
let metrics_timeout = CounterUsize::register_with_group(metrics_group.as_str(), "timeout");
(
Sender {
chan: sender,
metrics_timeout,
},
receiver,
)
}
}
pub struct Sender<N, Req, Res> {
chan: mpsc::UnboundedSender<Message<N, Req, Res>>,
chan: crate::metrics::Sender<Message<N, Req, Res>>,
metrics_timeout: Arc<dyn Counter<usize>>,
}
impl<N, Req, Res> Clone for Sender<N, Req, Res> {
fn clone(&self) -> Self {
Sender {
chan: self.chan.clone(),
metrics_timeout: self.metrics_timeout.clone(),
}
}
}
@ -53,24 +65,15 @@ impl<N, Req, Res> Sender<N, Req, Res> {
timeout(DEFAULT_REQUEST_TIMEOUT, receiver)
.await
.map_err(|_| Error::TimeoutError)?
.map_err(|_| {
self.metrics_timeout.inc(1);
Error::TimeoutError
})?
.map_err(|e| Error::RecvError(e))
}
}
pub struct Receiver<N, Req, Res> {
chan: mpsc::UnboundedReceiver<Message<N, Req, Res>>,
}
impl<N, Req, Res> Receiver<N, Req, Res> {
pub async fn recv(&mut self) -> Option<Message<N, Req, Res>> {
self.chan.recv().await
}
pub fn try_recv(&mut self) -> Result<Message<N, Req, Res>, TryRecvError> {
self.chan.try_recv()
}
}
pub type Receiver<N, Req, Res> = crate::metrics::Receiver<Message<N, Req, Res>>;
#[cfg(test)]
mod tests {
@ -91,7 +94,7 @@ mod tests {
#[tokio::test]
async fn request_response() {
let (tx, mut rx) = Channel::<Notification, Request, Response>::unbounded();
let (tx, mut rx) = Channel::<Notification, Request, Response>::unbounded("test");
let task1 = async move {
match rx.recv().await.expect("not dropped") {

View File

@ -1,5 +1,6 @@
mod channel;
pub mod error;
pub mod metrics;
pub mod test_util;
pub use crate::channel::{Channel, Message, Receiver, ResponseSender, Sender};

View File

@ -0,0 +1,112 @@
use std::{fmt::Debug, sync::Arc, time::Instant};
use metrics::{register_meter_with_group, Counter, CounterUsize, Histogram, Meter, Sample};
use tokio::sync::mpsc::{
error::{SendError, TryRecvError},
unbounded_channel as new_unbounded_channel, UnboundedReceiver, UnboundedSender,
};
pub fn unbounded_channel<T>(metric_name: &str) -> (Sender<T>, Receiver<T>) {
let (sender, receiver) = new_unbounded_channel();
let metrics_queued = CounterUsize::register_with_group(metric_name, "size");
(
Sender::new(sender, metric_name, metrics_queued.clone()),
Receiver::new(receiver, metric_name, metrics_queued),
)
}
pub struct Sender<T> {
sender: UnboundedSender<(Instant, T)>,
metrics_send_qps: Arc<dyn Meter>,
metrics_queued: Arc<dyn Counter<usize>>,
}
impl<T> Clone for Sender<T> {
fn clone(&self) -> Self {
Self {
sender: self.sender.clone(),
metrics_send_qps: self.metrics_send_qps.clone(),
metrics_queued: self.metrics_queued.clone(),
}
}
}
impl<T> Debug for Sender<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.sender)
}
}
impl<T> Sender<T> {
pub(crate) fn new(
sender: UnboundedSender<(Instant, T)>,
metrics_group: &str,
metrics_queued: Arc<dyn Counter<usize>>,
) -> Self {
Self {
sender,
metrics_send_qps: register_meter_with_group(metrics_group, "send"),
metrics_queued,
}
}
pub fn send(&self, value: T) -> Result<(), SendError<T>> {
match self.sender.send((Instant::now(), value)) {
Ok(()) => {
self.metrics_send_qps.mark(1);
self.metrics_queued.inc(1);
Ok(())
}
Err(e) => Err(SendError(e.0 .1)),
}
}
}
pub struct Receiver<T> {
receiver: UnboundedReceiver<(Instant, T)>,
metrics_recv_qps: Arc<dyn Meter>,
metrics_queued: Arc<dyn Counter<usize>>,
metrics_queue_latency: Arc<dyn Histogram>,
}
impl<T> Debug for Receiver<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.receiver)
}
}
impl<T> Receiver<T> {
pub(crate) fn new(
receiver: UnboundedReceiver<(Instant, T)>,
metrics_group: &str,
metrics_queued: Arc<dyn Counter<usize>>,
) -> Self {
Self {
receiver,
metrics_recv_qps: register_meter_with_group(metrics_group, "recv"),
metrics_queued,
metrics_queue_latency: Sample::ExpDecay(0.015).register_with_group(
metrics_group,
"latency",
1024,
),
}
}
fn on_recv(&self, value: (Instant, T)) -> T {
self.metrics_recv_qps.mark(1);
self.metrics_queued.dec(1);
self.metrics_queue_latency.update_since(value.0);
value.1
}
pub async fn recv(&mut self) -> Option<T> {
let value = self.receiver.recv().await?;
Some(self.on_recv(value))
}
pub fn try_recv(&mut self) -> Result<T, TryRecvError> {
let value = self.receiver.try_recv()?;
Ok(self.on_recv(value))
}
}

View File

@ -1,6 +1,6 @@
fn main() {
if cfg!(not(feature = "dev")) {
println!("cargo:rerun-if-changed=../../0g-storage-contracts/artifacts/");
println!("cargo:rerun-if-changed=../../storage-contracts-abis/");
} else {
println!("cargo:rerun-if-changed=../../0g-storage-contracts-dev/artifacts/");
}

View File

@ -3,25 +3,25 @@ use ethers::prelude::abigen;
// run `cargo doc -p contract-interface --open` to read struct definition
#[cfg(not(feature = "dev"))]
abigen!(
ZgsFlow,
"../../0g-storage-contracts/artifacts/contracts/dataFlow/Flow.sol/Flow.json"
);
abigen!(ZgsFlow, "../../storage-contracts-abis/Flow.json");
#[cfg(not(feature = "dev"))]
abigen!(PoraMine, "../../storage-contracts-abis/PoraMine.json");
#[cfg(not(feature = "dev"))]
abigen!(
PoraMine,
"../../0g-storage-contracts/artifacts/contracts/miner/Mine.sol/PoraMine.json"
ChunkLinearReward,
"../../storage-contracts-abis/ChunkLinearReward.json"
);
#[cfg(feature = "dev")]
abigen!(
ZgsFlow,
"../../0g-storage-contracts-dev/artifacts/contracts/dataFlow/Flow.sol/Flow.json"
);
abigen!(ZgsFlow, "../../storage-contracts-abis/Flow.json");
#[cfg(feature = "dev")]
abigen!(PoraMine, "../../storage-contracts-abis/PoraMine.json");
#[cfg(feature = "dev")]
abigen!(
PoraMine,
"../../0g-storage-contracts-dev/artifacts/contracts/miner/Mine.sol/PoraMine.json"
ChunkLinearReward,
"../../storage-contracts-abis/ChunkLinearReward.json"
);

View File

@ -9,5 +9,5 @@ exit-future = "0.2.0"
futures = "0.3.21"
lazy_static = "1.4.0"
lighthouse_metrics = { path = "../lighthouse_metrics" }
tokio = { version = "1.19.2", features = ["rt"] }
tokio = { version = "1.38.0", features = ["full"] }
tracing = "0.1.35"

View File

@ -7,41 +7,42 @@ use target_info::Target;
///
/// ## Example
///
/// `Lighthouse/v1.5.1-67da032+`
/// `v0.5.2` or `v0.5.2-1-67da032+`
pub const VERSION: &str = git_version!(
args = [
"--always",
"--dirty=+",
"--abbrev=7",
// NOTE: using --match instead of --exclude for compatibility with old Git
"--match=thiswillnevermatchlol"
// "--match=thiswillnevermatchlol"
"--tags",
],
prefix = "zgs/v0.0.1-",
// prefix = "zgs/v0.0.1-",
fallback = "unknown"
);
/// Returns `VERSION`, but with platform information appended to the end.
/// Returns `VERSION`, but with `zgs` prefix and platform information appended to the end.
///
/// ## Example
///
/// `zgs/v0.0.1-67da032+/x86_64-linux`
/// `zgs/v0.5.2/x86_64-linux`
pub fn version_with_platform() -> String {
format!("{}/{}-{}", VERSION, Target::arch(), Target::os())
format!("zgs/{}/{}-{}", VERSION, Target::arch(), Target::os())
}
#[cfg(test)]
mod test {
use super::*;
use regex::Regex;
// #[cfg(test)]
// mod test {
// use super::*;
// use regex::Regex;
#[test]
fn version_formatting() {
let re =
Regex::new(r"^zgs/v[0-9]+\.[0-9]+\.[0-9]+(-rc.[0-9])?-[[:xdigit:]]{7}\+?$").unwrap();
assert!(
re.is_match(VERSION),
"version doesn't match regex: {}",
VERSION
);
}
}
// #[test]
// fn version_formatting() {
// let re =
// Regex::new(r"^v[0-9]+\.[0-9]+\.[0-9]+(-rc.[0-9])?-[[:xdigit:]]{7}\+?$").unwrap();
// assert!(
// re.is_match(VERSION),
// "version doesn't match regex: {}",
// VERSION
// );
// }
// }

View File

@ -1,6 +1,6 @@
# Proof of Random Access
The ZeroGravity network adopts a Proof of Random Access (PoRA) mechanism to incentivize miners to store data. By requiring miners to answer randomly produced queries to archived data chunks, the PoRA mechanism establishes the relation between mining proof generation power and data storage. Miners answer the queries repeatedly and computes an output digest for each loaded chunk util find a digest that satisfies the mining difficulty (i.e., has enough leading zeros). PoRA will stress the miners' disk I/O and reduce their capability to respond user queries. So 0G Storage adopts intermittent mining, in which a mining epoch starts with a block generation at a specific block height on the host chain and stops when a valid PoRA is submitted to the 0G Storage contract.
The ZeroGravity network adopts a Proof of Random Access (PoRA) mechanism to incentivize miners to store data. By requiring miners to answer randomly produced queries to archived data chunks, the PoRA mechanism establishes the relation between mining proof generation power and data storage. Miners answer the queries repeatedly and computes an output digest for each loaded chunk until find a digest that satisfies the mining difficulty (i.e., has enough leading zeros). PoRA will stress the miners' disk I/O and reduce their capability to respond user queries. So 0G Storage adopts intermittent mining, in which a mining epoch starts with a block generation at a specific block height on the host chain and stops when a valid PoRA is submitted to the 0G Storage contract.
In a strawman design, a PoRA iteration consists of a computing stage and a loading stage. In the computing stage, a miner computes a random recall position (the universal offset in the flow) based on an arbitrary picked random nonce and a mining status read from the host chain. In the loading stage, a miner loads the archived data chunks at the given recall position, and computes output digest by hashing the tuple of mining status and the data chunks. If the output digest satisfies the target difficulty, the miner can construct a legitimate PoRA consists of the chosen random nonce, the loaded data chunk and the proof for the correctness of data chunk to the mining contract.

View File

@ -27,4 +27,4 @@ The mining process of 0G Storage requires to prove data accessibility to random
## Data Flow
In 0G Storage, committed data are organized sequentially. Such a sequence of data is called a data flow, which can be interpreted as a list of data entries or equivalently a sequence of fixed-size data sectors. Thus, every piece of data in ZeroGravity can be indexed conveniently with a universal offset. This offset will be used to sample challenges in the mining process of PoRA. The default data flow is called the "main flow" of ZeroGravity. It incorporates all new log entries (unless otherwise specified) in an append-only manner. There are also specialized flows that only accept some category of log entries, e.g. data related to a specifc application. The most significant advantage of specialized flows is a consecutive addressing space, which may be crucial in some use cases. Furthermore, a specialized flow can apply customized storage price, which is typically significantly higher than the floor price of the default flow, and hence achieves better data availability and reliability.
In 0G Storage, committed data are organized sequentially. Such a sequence of data is called a data flow, which can be interpreted as a list of data entries or equivalently a sequence of fixed-size data sectors. Thus, every piece of data in ZeroGravity can be indexed conveniently with a universal offset. This offset will be used to sample challenges in the mining process of PoRA. The default data flow is called the "main flow" of ZeroGravity. It incorporates all new log entries (unless otherwise specified) in an append-only manner. There are also specialized flows that only accept some category of log entries, e.g. data related to a specific application. The most significant advantage of specialized flows is a consecutive addressing space, which may be crucial in some use cases. Furthermore, a specialized flow can apply customized storage price, which is typically significantly higher than the floor price of the default flow, and hence achieves better data availability and reliability.

View File

@ -5,7 +5,7 @@ edition = "2021"
[dependencies]
anyhow = { version = "1.0.58", features = ["backtrace"] }
clap = { version = "3.2.5", features = ["cargo"] }
clap = { version = "4.5.17", features = ["cargo", "string"] }
ctrlc = "3.2.2"
error-chain = "0.12.4"
ethereum-types = "0.14"
@ -35,8 +35,10 @@ chunk_pool = { path = "./chunk_pool" }
itertools = "0.10.5"
serde = { version = "1.0.137", features = ["derive"] }
duration-str = "0.5.1"
config = "0.13.1"
config = "0.14"
public-ip = "0.2"
ethers = "2.0.14"
metrics = { workspace = true }
[dependencies.libp2p]
version = "0.45.1"

View File

@ -13,3 +13,5 @@ tokio = { version = "1.19.2", features = ["sync"] }
async-lock = "2.5.0"
hashlink = "0.8.0"
tracing = "0.1.35"
lazy_static = "1.4.0"
metrics = { workspace = true }

View File

@ -1,11 +1,16 @@
use super::mem_pool::MemoryChunkPool;
use crate::mem_pool::FileID;
use anyhow::Result;
use network::NetworkMessage;
use metrics::{Histogram, Sample};
use network::{NetworkMessage, NetworkSender};
use shared_types::{ChunkArray, FileProof};
use std::{sync::Arc, time::SystemTime};
use std::{sync::Arc, time::Instant};
use storage_async::{ShardConfig, Store};
use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
use tokio::sync::mpsc::UnboundedReceiver;
lazy_static::lazy_static! {
pub static ref FINALIZE_FILE_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register("chunk_pool_finalize_file_latency", 1024);
}
/// Handle the cached file when uploaded completely and verified from blockchain.
/// Generally, the file will be persisted into log store.
@ -13,7 +18,7 @@ pub struct ChunkPoolHandler {
receiver: UnboundedReceiver<ChunkPoolMessage>,
mem_pool: Arc<MemoryChunkPool>,
log_store: Arc<Store>,
sender: UnboundedSender<NetworkMessage>,
sender: NetworkSender,
}
impl ChunkPoolHandler {
@ -21,7 +26,7 @@ impl ChunkPoolHandler {
receiver: UnboundedReceiver<ChunkPoolMessage>,
mem_pool: Arc<MemoryChunkPool>,
log_store: Arc<Store>,
sender: UnboundedSender<NetworkMessage>,
sender: NetworkSender,
) -> Self {
ChunkPoolHandler {
receiver,
@ -68,7 +73,7 @@ impl ChunkPoolHandler {
}
}
let start = SystemTime::now();
let start = Instant::now();
if !self
.log_store
.finalize_tx_with_hash(id.tx_id.seq, id.tx_id.hash)
@ -77,8 +82,9 @@ impl ChunkPoolHandler {
return Ok(false);
}
let elapsed = start.elapsed()?;
let elapsed = start.elapsed();
debug!(?id, ?elapsed, "Transaction finalized");
FINALIZE_FILE_LATENCY.update_since(start);
// always remove file from pool after transaction finalized
self.mem_pool.remove_file(&id.root).await;

View File

@ -29,7 +29,7 @@ impl Config {
pub fn unbounded(
config: Config,
log_store: Arc<storage_async::Store>,
network_send: tokio::sync::mpsc::UnboundedSender<network::NetworkMessage>,
network_send: network::NetworkSender,
) -> (Arc<MemoryChunkPool>, ChunkPoolHandler) {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();

View File

@ -13,3 +13,5 @@ tracing = "0.1.35"
priority-queue = "1.2.3"
shared_types = { path = "../shared_types" }
serde = { version = "1.0.137", features = ["derive"] }
lazy_static = "1.4.0"
metrics = { workspace = true }

View File

@ -1,4 +1,5 @@
use crate::Config;
use metrics::{register_meter_with_group, Histogram, Meter, Sample};
use network::types::SignedAnnounceFile;
use network::PeerId;
use parking_lot::Mutex;
@ -7,8 +8,15 @@ use rand::seq::IteratorRandom;
use shared_types::{timestamp_now, TxID};
use std::cmp::Reverse;
use std::collections::HashMap;
use std::sync::Arc;
use storage::config::ShardConfig;
lazy_static::lazy_static! {
pub static ref INSERT_QPS: Arc<dyn Meter> = register_meter_with_group("file_location_cache_insert", "qps");
pub static ref INSERT_BATCH: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("file_location_cache_insert", "batch", 1024);
pub static ref TOTAL_CACHED: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register("file_location_cache_size", 1024);
}
/// Caches limited announcements of specified file from different peers.
struct AnnouncementCache {
/// Maximum number of announcements in cache.
@ -161,9 +169,7 @@ impl FileCache {
}
/// Insert the specified `announcement` into cache.
fn insert(&mut self, announcement: SignedAnnounceFile) {
let tx_id = announcement.tx_id;
fn insert(&mut self, tx_id: TxID, announcement: SignedAnnounceFile) {
let item = self.files.entry(tx_id).or_insert_with(|| {
AnnouncementCache::new(
self.config.max_entries_per_file,
@ -203,6 +209,7 @@ impl FileCache {
let item = self.files.get_mut(&tx_id)?;
let (result, collected) = item.random();
self.update_on_announcement_cache_changed(&tx_id, collected);
TOTAL_CACHED.update(self.total_announcements as u64);
result
}
@ -234,6 +241,7 @@ impl FileCache {
let item = self.files.get_mut(&tx_id)?;
let (result, collected) = item.all();
self.update_on_announcement_cache_changed(&tx_id, collected);
TOTAL_CACHED.update(self.total_announcements as u64);
Some(result)
}
@ -242,6 +250,7 @@ impl FileCache {
let item = self.files.get_mut(tx_id)?;
let result = item.remove(peer_id)?;
self.update_on_announcement_cache_changed(tx_id, 1);
TOTAL_CACHED.update(self.total_announcements as u64);
Some(result)
}
}
@ -284,14 +293,23 @@ impl FileLocationCache {
}
pub fn insert(&self, announcement: SignedAnnounceFile) {
INSERT_QPS.mark(1);
INSERT_BATCH.update(announcement.tx_ids.len() as u64);
let peer_id = *announcement.peer_id;
// FIXME: Check validity.
let shard_config = ShardConfig {
shard_id: announcement.shard_id,
num_shard: announcement.num_shard,
};
self.cache.lock().insert(announcement);
self.insert_peer_config(peer_id, shard_config);
let mut cache = self.cache.lock();
for tx_id in announcement.tx_ids.iter() {
cache.insert(*tx_id, announcement.clone());
}
TOTAL_CACHED.update(cache.total_announcements as u64);
}
pub fn get_one(&self, tx_id: TxID) -> Option<SignedAnnounceFile> {
@ -534,7 +552,7 @@ mod tests {
}
fn assert_file(file: &SignedAnnounceFile, tx_id: TxID, peer_id: PeerId, timestamp: u32) {
assert_eq!(file.tx_id, tx_id);
assert_eq!(file.tx_ids[0], tx_id);
assert_eq!(PeerId::from(file.peer_id.clone()), peer_id);
assert_eq!(file.timestamp, timestamp);
}
@ -551,11 +569,11 @@ mod tests {
let tx1 = TxID::random_hash(1);
let tx2 = TxID::random_hash(2);
cache.insert(create_file_2(tx1, peer1, now - 1));
cache.insert(tx1, create_file_2(tx1, peer1, now - 1));
assert_eq!(cache.total_announcements, 1);
cache.insert(create_file_2(tx2, peer1, now - 2));
cache.insert(tx2, create_file_2(tx2, peer1, now - 2));
assert_eq!(cache.total_announcements, 2);
cache.insert(create_file_2(tx1, peer2, now - 3));
cache.insert(tx1, create_file_2(tx1, peer2, now - 3));
assert_eq!(cache.total_announcements, 3);
assert_file(&cache.pop().unwrap(), tx1, peer2, now - 3);
@ -573,18 +591,18 @@ mod tests {
let now = timestamp_now();
let tx1 = TxID::random_hash(1);
cache.insert(create_file_2(tx1, PeerId::random(), now - 7));
cache.insert(create_file_2(tx1, PeerId::random(), now - 8));
cache.insert(create_file_2(tx1, PeerId::random(), now - 9));
cache.insert(tx1, create_file_2(tx1, PeerId::random(), now - 7));
cache.insert(tx1, create_file_2(tx1, PeerId::random(), now - 8));
cache.insert(tx1, create_file_2(tx1, PeerId::random(), now - 9));
assert_eq!(cache.total_announcements, 3);
// insert more files and cause to max entries limited
let tx2 = TxID::random_hash(2);
cache.insert(create_file_2(tx2, PeerId::random(), now - 1));
cache.insert(tx2, create_file_2(tx2, PeerId::random(), now - 1));
assert_all_files(cache.all(tx1).unwrap_or_default(), vec![now - 8, now - 7]);
cache.insert(create_file_2(tx2, PeerId::random(), now - 2));
cache.insert(tx2, create_file_2(tx2, PeerId::random(), now - 2));
assert_all_files(cache.all(tx1).unwrap_or_default(), vec![now - 7]);
cache.insert(create_file_2(tx2, PeerId::random(), now - 3));
cache.insert(tx2, create_file_2(tx2, PeerId::random(), now - 3));
assert_all_files(cache.all(tx1).unwrap_or_default(), vec![]);
assert_all_files(

View File

@ -16,9 +16,9 @@ pub struct Config {
impl Default for Config {
fn default() -> Self {
Config {
max_entries_total: 4096,
max_entries_total: 1000000,
max_entries_per_file: 4,
entry_expiration_time_secs: 3600,
entry_expiration_time_secs: 86400,
}
}
}

View File

@ -35,7 +35,7 @@ impl AnnounceFileBuilder {
let timestamp = self.timestamp.unwrap_or_else(timestamp_now);
let msg = AnnounceFile {
tx_id,
tx_ids: vec![tx_id],
num_shard: 1,
shard_id: 0,
peer_id: peer_id.into(),

View File

@ -11,7 +11,7 @@ append_merkle = { path = "../../common/append_merkle" }
async-trait = "0.1.56"
ethereum-types = "0.14"
futures = "0.3.21"
jsonrpsee = { version = "0.14.0", features = ["full"] }
jsonrpsee = { version = "0.14", features = ["full"] }
shared_types = { path = "../shared_types" }
task_executor = { path = "../../common/task_executor" }
tokio = "1.19.2"
@ -21,4 +21,8 @@ storage = { path = "../storage" }
contract-interface = { path = "../../common/contract-interface" }
futures-core = "0.3.28"
futures-util = "0.3.28"
thiserror = "1.0.44"
thiserror = "1.0.44"
lazy_static = "1.4.0"
metrics = { workspace = true }
reqwest = {version = "0.11", features = ["json"]}
url = { version = "2.4", default-features = false }

View File

@ -1,3 +1,5 @@
use std::time::Duration;
use crate::ContractAddress;
pub struct LogSyncConfig {
@ -32,6 +34,11 @@ pub struct LogSyncConfig {
pub remove_finalized_block_interval_minutes: u64,
// watch_loop (eth_getLogs) trigger interval
pub watch_loop_wait_time_ms: u64,
// force to sync log from start block number
pub force_log_sync_from_start_block_number: bool,
// the timeout for blockchain rpc connection
pub blockchain_rpc_timeout: Duration,
}
#[derive(Clone)]
@ -58,6 +65,8 @@ impl LogSyncConfig {
default_finalized_block_count: u64,
remove_finalized_block_interval_minutes: u64,
watch_loop_wait_time_ms: u64,
force_log_sync_from_start_block_number: bool,
blockchain_rpc_timeout: Duration,
) -> Self {
Self {
rpc_endpoint_url,
@ -73,6 +82,8 @@ impl LogSyncConfig {
default_finalized_block_count,
remove_finalized_block_interval_minutes,
watch_loop_wait_time_ms,
force_log_sync_from_start_block_number,
blockchain_rpc_timeout,
}
}
}

View File

@ -1,6 +1,6 @@
use crate::sync_manager::log_query::LogQuery;
use crate::sync_manager::RETRY_WAIT_MS;
use crate::ContractAddress;
use crate::sync_manager::{metrics, RETRY_WAIT_MS};
use crate::{ContractAddress, LogSyncConfig};
use anyhow::{anyhow, bail, Result};
use append_merkle::{Algorithm, Sha3Algorithm};
use contract_interface::{SubmissionNode, SubmitFilter, ZgsFlow};
@ -12,9 +12,8 @@ use futures::StreamExt;
use jsonrpsee::tracing::{debug, error, info, warn};
use shared_types::{DataRoot, Transaction};
use std::collections::{BTreeMap, HashMap};
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use storage::log_store::{tx_store::BlockHashAndSubmissionIndex, Store};
use task_executor::TaskExecutor;
use tokio::sync::{
@ -31,28 +30,29 @@ pub struct LogEntryFetcher {
}
impl LogEntryFetcher {
pub async fn new(
url: &str,
contract_address: ContractAddress,
log_page_size: u64,
confirmation_delay: u64,
rate_limit_retries: u32,
timeout_retries: u32,
initial_backoff: u64,
) -> Result<Self> {
pub async fn new(config: &LogSyncConfig) -> Result<Self> {
let provider = Arc::new(Provider::new(
RetryClientBuilder::default()
.rate_limit_retries(rate_limit_retries)
.timeout_retries(timeout_retries)
.initial_backoff(Duration::from_millis(initial_backoff))
.build(Http::from_str(url)?, Box::new(HttpRateLimitRetryPolicy)),
.rate_limit_retries(config.rate_limit_retries)
.timeout_retries(config.timeout_retries)
.initial_backoff(Duration::from_millis(config.initial_backoff))
.build(
Http::new_with_client(
url::Url::parse(&config.rpc_endpoint_url)?,
reqwest::Client::builder()
.timeout(config.blockchain_rpc_timeout)
.connect_timeout(config.blockchain_rpc_timeout)
.build()?,
),
Box::new(HttpRateLimitRetryPolicy),
),
));
// TODO: `error` types are removed from the ABI json file.
Ok(Self {
contract_address,
contract_address: config.contract_address,
provider,
log_page_size,
confirmation_delay,
log_page_size: config.log_page_size,
confirmation_delay: config.confirmation_block_count,
})
}
@ -142,6 +142,15 @@ impl LogEntryFetcher {
}
};
let log_latest_block_number = match store.get_log_latest_block_number() {
Ok(Some(b)) => b,
Ok(None) => 0,
Err(e) => {
error!("get log latest block number error: e={:?}", e);
0
}
};
if let Some(processed_block_number) = processed_block_number {
let finalized_block_number =
match provider.get_block(BlockNumber::Finalized).await {
@ -165,25 +174,27 @@ impl LogEntryFetcher {
};
if let Some(finalized_block_number) = finalized_block_number {
if processed_block_number >= finalized_block_number {
let mut pending_keys = vec![];
for (key, _) in block_hash_cache.read().await.iter() {
if *key < finalized_block_number {
pending_keys.push(*key);
} else {
break;
}
let safe_block_number = std::cmp::min(
std::cmp::min(
log_latest_block_number.saturating_sub(1),
finalized_block_number,
),
processed_block_number,
);
let mut pending_keys = vec![];
for (key, _) in block_hash_cache.read().await.iter() {
if *key < safe_block_number {
pending_keys.push(*key);
} else {
break;
}
}
for key in pending_keys.into_iter() {
if let Err(e) = store.delete_block_hash_by_number(key) {
error!(
"remove block tx for number {} error: e={:?}",
key, e
);
} else {
block_hash_cache.write().await.remove(&key);
}
for key in pending_keys.into_iter() {
if let Err(e) = store.delete_block_hash_by_number(key) {
error!("remove block tx for number {} error: e={:?}", key, e);
} else {
block_hash_cache.write().await.remove(&key);
}
}
}
@ -208,7 +219,7 @@ impl LogEntryFetcher {
) -> UnboundedReceiver<LogFetchProgress> {
let provider = self.provider.clone();
let (recover_tx, recover_rx) = tokio::sync::mpsc::unbounded_channel();
let contract = ZgsFlow::new(self.contract_address, provider.clone());
let contract = self.flow_contract();
let log_page_size = self.log_page_size;
executor.spawn(
@ -222,22 +233,30 @@ impl LogEntryFetcher {
.filter;
let mut stream = LogQuery::new(&provider, &filter, log_query_delay)
.with_page_size(log_page_size);
debug!(
info!(
"start_recover starts, start={} end={}",
start_block_number, end_block_number
);
let (mut block_hash_sent, mut block_number_sent) = (None, None);
while let Some(maybe_log) = stream.next().await {
let start_time = Instant::now();
match maybe_log {
Ok(log) => {
let sync_progress =
if log.block_hash.is_some() && log.block_number.is_some() {
let synced_block = LogFetchProgress::SyncedBlock((
log.block_number.unwrap().as_u64(),
log.block_hash.unwrap(),
None,
));
progress = log.block_number.unwrap().as_u64();
Some(synced_block)
if block_hash_sent != log.block_hash
|| block_number_sent != log.block_number
{
let synced_block = LogFetchProgress::SyncedBlock((
log.block_number.unwrap().as_u64(),
log.block_hash.unwrap(),
None,
));
progress = log.block_number.unwrap().as_u64();
Some(synced_block)
} else {
None
}
} else {
None
};
@ -249,13 +268,22 @@ impl LogEntryFetcher {
}) {
Ok(event) => {
if let Err(e) = recover_tx
.send(submission_event_to_transaction(event))
.send(submission_event_to_transaction(
event,
log.block_number.expect("block number exist").as_u64(),
))
.and_then(|_| match sync_progress {
Some(b) => recover_tx.send(b),
Some(b) => {
recover_tx.send(b)?;
block_hash_sent = log.block_hash;
block_number_sent = log.block_number;
Ok(())
}
None => Ok(()),
})
{
error!("send error: e={:?}", e);
break;
}
}
Err(e) => {
@ -271,7 +299,10 @@ impl LogEntryFetcher {
tokio::time::sleep(Duration::from_millis(RETRY_WAIT_MS)).await;
}
}
metrics::RECOVER_LOG.update_since(start_time);
}
info!("log recover end");
},
"log recover",
);
@ -285,11 +316,14 @@ impl LogEntryFetcher {
executor: &TaskExecutor,
block_hash_cache: Arc<RwLock<BTreeMap<u64, Option<BlockHashAndSubmissionIndex>>>>,
watch_loop_wait_time_ms: u64,
mut watch_progress_rx: UnboundedReceiver<u64>,
) -> UnboundedReceiver<LogFetchProgress> {
let (watch_tx, watch_rx) = tokio::sync::mpsc::unbounded_channel();
let contract = ZgsFlow::new(self.contract_address, self.provider.clone());
let contract = self.flow_contract();
let provider = self.provider.clone();
let confirmation_delay = self.confirmation_delay;
let log_page_size = self.log_page_size;
let mut progress_reset_history = BTreeMap::new();
executor.spawn(
async move {
debug!("start_watch starts, start={}", start_block_number);
@ -297,6 +331,17 @@ impl LogEntryFetcher {
let mut parent_block_hash = parent_block_hash;
loop {
check_watch_process(
&mut watch_progress_rx,
&mut progress,
&mut parent_block_hash,
&mut progress_reset_history,
watch_loop_wait_time_ms,
&block_hash_cache,
provider.as_ref(),
)
.await;
match Self::watch_loop(
provider.as_ref(),
progress,
@ -305,6 +350,7 @@ impl LogEntryFetcher {
confirmation_delay,
&contract,
&block_hash_cache,
log_page_size,
)
.await
{
@ -340,6 +386,7 @@ impl LogEntryFetcher {
confirmation_delay: u64,
contract: &ZgsFlow<Provider<RetryClient<Http>>>,
block_hash_cache: &Arc<RwLock<BTreeMap<u64, Option<BlockHashAndSubmissionIndex>>>>,
log_page_size: u64,
) -> Result<Option<(u64, H256, Option<Option<u64>>)>> {
let latest_block_number = provider.get_block_number().await?.as_u64();
debug!(
@ -363,6 +410,10 @@ impl LogEntryFetcher {
);
}
if block.logs_bloom.is_none() {
bail!("block {:?} logs bloom is none", block.number);
}
if from_block_number > 0 && block.parent_hash != parent_block_hash {
// reorg happened
let (parent_block_number, block_hash) = revert_one_block(
@ -391,13 +442,22 @@ impl LogEntryFetcher {
block.number
);
}
if Some(block.parent_hash) != parent_block_hash {
if parent_block_hash.is_none() || Some(block.parent_hash) != parent_block_hash {
bail!(
"parent block hash mismatch, expected {:?}, actual {}",
parent_block_hash,
block.parent_hash
);
}
if block_number == to_block_number && block.hash.is_none() {
bail!("block {:?} hash is none", block.number);
}
if block.logs_bloom.is_none() {
bail!("block {:?} logs bloom is none", block.number);
}
parent_block_hash = block.hash;
blocks.insert(block_number, block);
}
@ -408,8 +468,11 @@ impl LogEntryFetcher {
.to_block(to_block_number)
.address(contract.address().into())
.filter;
let mut stream = LogQuery::new(provider, &filter, Duration::from_millis(10))
.with_page_size(log_page_size);
let mut block_logs: BTreeMap<u64, Vec<Log>> = BTreeMap::new();
for log in provider.get_logs(&filter).await? {
while let Some(maybe_log) = stream.next().await {
let log = maybe_log?;
let block_number = log
.block_number
.ok_or_else(|| anyhow!("block number missing"))?
@ -446,7 +509,7 @@ impl LogEntryFetcher {
}
let tx = txs_hm[&log.transaction_index];
if log.transaction_hash != Some(tx.hash) {
if log.transaction_hash.is_none() || log.transaction_hash != Some(tx.hash) {
warn!(
"log tx hash mismatch, log transaction {:?}, block transaction {:?}",
log.transaction_hash,
@ -454,7 +517,9 @@ impl LogEntryFetcher {
);
return Ok(progress);
}
if log.transaction_index != tx.transaction_index {
if log.transaction_index.is_none()
|| log.transaction_index != tx.transaction_index
{
warn!(
"log tx index mismatch, log tx index {:?}, block transaction index {:?}",
log.transaction_index,
@ -483,8 +548,11 @@ impl LogEntryFetcher {
first_submission_index = Some(submit_filter.submission_index.as_u64());
}
log_events.push(submission_event_to_transaction(submit_filter));
log_events
.push(submission_event_to_transaction(submit_filter, block_number));
}
info!("synced {} events", log_events.len());
}
let new_progress = if block.hash.is_some() && block.number.is_some() {
@ -496,20 +564,29 @@ impl LogEntryFetcher {
} else {
None
};
if let Some(p) = &new_progress {
if let Err(e) = watch_tx.send(LogFetchProgress::SyncedBlock(*p)) {
warn!("send LogFetchProgress failed: {:?}", e);
return Ok(progress);
} else {
block_hash_cache.write().await.insert(p.0, None);
}
}
for log in log_events.into_iter() {
if let Err(e) = watch_tx.send(log) {
warn!("send log failed: {:?}", e);
warn!("send LogFetchProgress::Transaction failed: {:?}", e);
return Ok(progress);
}
}
if let Some(p) = &new_progress {
if let Err(e) = watch_tx.send(LogFetchProgress::SyncedBlock(*p)) {
warn!("send LogFetchProgress::SyncedBlock failed: {:?}", e);
return Ok(progress);
} else {
let mut cache = block_hash_cache.write().await;
match cache.get(&p.0) {
Some(Some(v))
if v.block_hash == p.1
&& v.first_submission_index == p.2.unwrap() => {}
_ => {
cache.insert(p.0, None);
}
}
}
}
progress = new_progress;
}
}
@ -520,6 +597,108 @@ impl LogEntryFetcher {
pub fn provider(&self) -> &Provider<RetryClient<Http>> {
self.provider.as_ref()
}
pub fn flow_contract(&self) -> ZgsFlow<Provider<RetryClient<Http>>> {
ZgsFlow::new(self.contract_address, self.provider.clone())
}
}
async fn check_watch_process(
watch_progress_rx: &mut UnboundedReceiver<u64>,
progress: &mut u64,
parent_block_hash: &mut H256,
progress_reset_history: &mut BTreeMap<u64, (Instant, usize)>,
watch_loop_wait_time_ms: u64,
block_hash_cache: &Arc<RwLock<BTreeMap<u64, Option<BlockHashAndSubmissionIndex>>>>,
provider: &Provider<RetryClient<Http>>,
) {
let mut min_received_progress = None;
while let Ok(v) = watch_progress_rx.try_recv() {
min_received_progress = match min_received_progress {
Some(min) if min > v => Some(v),
None => Some(v),
_ => min_received_progress,
};
}
let mut reset = false;
if let Some(v) = min_received_progress {
if *progress <= v {
error!(
"received unexpected progress, current {}, received {}",
*progress, v
);
return;
}
let now = Instant::now();
match progress_reset_history.get_mut(&v) {
Some((last_update, counter)) => {
if *counter >= 3 {
error!("maximum reset attempts have been reached.");
watch_progress_rx.close();
return;
}
if now.duration_since(*last_update)
>= Duration::from_millis(watch_loop_wait_time_ms * 30)
{
info!("reset to progress from {} to {}", *progress, v);
*progress = v;
*last_update = now;
*counter += 1;
reset = true;
}
}
None => {
info!("reset to progress from {} to {}", *progress, v);
*progress = v;
progress_reset_history.insert(v, (now, 1usize));
reset = true;
}
}
}
if reset {
*parent_block_hash = loop {
if let Some(block) = block_hash_cache.read().await.get(&(*progress - 1)) {
if let Some(v) = block {
break v.block_hash;
} else {
debug!(
"block_hash_cache wait for SyncedBlock processed for {}",
*progress - 1
);
tokio::time::sleep(Duration::from_secs(RETRY_WAIT_MS)).await;
}
} else {
warn!(
"get block hash for block {} from RPC, assume there is no org",
*progress - 1
);
let hash = loop {
match provider.get_block(*progress - 1).await {
Ok(Some(v)) => {
break v.hash.expect("parent block hash expect exist");
}
Ok(None) => {
panic!("parent block {} expect exist", *progress - 1);
}
Err(e) => {
if e.to_string().contains("server is too busy") {
warn!("server busy, wait for parent block {}", *progress - 1);
} else {
panic!("parent block {} expect exist, error {}", *progress - 1, e);
}
}
}
};
break hash;
}
};
}
progress_reset_history.retain(|k, _| k + 1000 >= *progress);
}
async fn revert_one_block(
@ -575,26 +754,29 @@ async fn revert_one_block(
#[derive(Debug)]
pub enum LogFetchProgress {
SyncedBlock((u64, H256, Option<Option<u64>>)),
Transaction(Transaction),
Transaction((Transaction, u64)),
Reverted(u64),
}
fn submission_event_to_transaction(e: SubmitFilter) -> LogFetchProgress {
LogFetchProgress::Transaction(Transaction {
stream_ids: vec![],
data: vec![],
data_merkle_root: nodes_to_root(&e.submission.nodes),
merkle_nodes: e
.submission
.nodes
.iter()
// the submission height is the height of the root node starting from height 0.
.map(|SubmissionNode { root, height }| (height.as_usize() + 1, root.into()))
.collect(),
start_entry_index: e.start_pos.as_u64(),
size: e.submission.length.as_u64(),
seq: e.submission_index.as_u64(),
})
fn submission_event_to_transaction(e: SubmitFilter, block_number: u64) -> LogFetchProgress {
LogFetchProgress::Transaction((
Transaction {
stream_ids: vec![],
data: vec![],
data_merkle_root: nodes_to_root(&e.submission.nodes),
merkle_nodes: e
.submission
.nodes
.iter()
// the submission height is the height of the root node starting from height 0.
.map(|SubmissionNode { root, height }| (height.as_usize() + 1, root.into()))
.collect(),
start_entry_index: e.start_pos.as_u64(),
size: e.submission.length.as_u64(),
seq: e.submission_index.as_u64(),
},
block_number,
))
}
fn nodes_to_root(node_list: &[SubmissionNode]) -> DataRoot {

View File

@ -14,6 +14,8 @@ use thiserror::Error;
pub(crate) type PinBoxFut<'a, T> =
Pin<Box<dyn Future<Output = Result<T, ProviderError>> + Send + 'a>>;
const TOO_MANY_LOGS_ERROR_MSG: [&str; 2] = ["query returned more than", "too large with more than"];
/// A log query provides streaming access to historical logs via a paginated
/// request. For streaming access to future logs, use [`Middleware::watch`] or
/// [`Middleware::subscribe_logs`]
@ -21,6 +23,9 @@ pub struct LogQuery<'a, P> {
provider: &'a Provider<P>,
filter: Filter,
from_block: Option<U64>,
expected_page_size: u64,
/// It may be smaller than `expected_page_size` if the server cannot return all the logs.
page_size: u64,
current_logs: VecDeque<Log>,
last_block: Option<U64>,
@ -31,7 +36,8 @@ pub struct LogQuery<'a, P> {
enum LogQueryState<'a> {
Initial,
LoadLastBlock(PinBoxFut<'a, U64>),
LoadLogs(PinBoxFut<'a, Vec<Log>>),
/// `(from_block, get_logs_fut)`. `from_block` is used to resume if the request fails.
LoadLogs((Option<U64>, PinBoxFut<'a, Vec<Log>>)),
Consume,
}
@ -45,6 +51,7 @@ where
provider,
filter: filter.clone(),
from_block: filter.get_from_block(),
expected_page_size: 10000,
page_size: 10000,
current_logs: VecDeque::new(),
last_block: None,
@ -56,6 +63,7 @@ where
/// set page size for pagination
pub fn with_page_size(mut self, page_size: u64) -> Self {
self.page_size = page_size;
self.expected_page_size = page_size;
self
}
}
@ -98,7 +106,7 @@ where
tokio::time::sleep(delay).await;
provider.get_logs(&filter).await
});
rewake_with_new_state!(ctx, self, LogQueryState::LoadLogs(fut));
rewake_with_new_state!(ctx, self, LogQueryState::LoadLogs((None, fut)));
} else {
// if paginatable, load last block
let fut = match self.filter.get_to_block() {
@ -119,7 +127,7 @@ where
// this is okay because we will only enter this state when the filter is
// paginatable i.e. from block is set
let from_block = self.filter.get_from_block().unwrap();
let to_block = min(from_block + self.page_size, last_block);
let to_block = min(from_block + self.page_size - 1, last_block);
self.from_block = Some(to_block + 1);
let filter = self
@ -134,18 +142,34 @@ where
tokio::time::sleep(delay).await;
provider.get_logs(&filter).await
});
rewake_with_new_state!(ctx, self, LogQueryState::LoadLogs(fut));
rewake_with_new_state!(
ctx,
self,
LogQueryState::LoadLogs((Some(from_block), fut))
);
}
Err(err) => Poll::Ready(Some(Err(LogQueryError::LoadLastBlockError(err)))),
}
}
LogQueryState::LoadLogs(fut) => match futures_util::ready!(fut.as_mut().poll(ctx)) {
Ok(logs) => {
self.current_logs = VecDeque::from(logs);
rewake_with_new_state!(ctx, self, LogQueryState::Consume);
LogQueryState::LoadLogs((from_block, fut)) => {
match futures_util::ready!(fut.as_mut().poll(ctx)) {
Ok(logs) => {
self.current_logs = VecDeque::from(logs);
self.page_size = self.expected_page_size;
rewake_with_new_state!(ctx, self, LogQueryState::Consume);
}
Err(err) => {
for msg in TOO_MANY_LOGS_ERROR_MSG.iter() {
if err.to_string().contains(msg) {
self.from_block = *from_block;
self.page_size /= 2;
rewake_with_new_state!(ctx, self, LogQueryState::Consume);
}
}
Poll::Ready(Some(Err(LogQueryError::LoadLogsError(err))))
}
}
Err(err) => Poll::Ready(Some(Err(LogQueryError::LoadLogsError(err)))),
},
}
LogQueryState::Consume => {
let log = self.current_logs.pop_front();
if log.is_none() {
@ -158,9 +182,9 @@ where
let from_block = self.from_block.unwrap();
let to_block = if let Some(l) = self.last_block {
// if last_block is not none, only getLogs from to_block to last_block
min(from_block + self.page_size, l)
min(from_block + self.page_size - 1, l)
} else {
from_block + self.page_size
from_block + self.page_size - 1
};
// no more pages to load, and everything is consumed
@ -183,7 +207,11 @@ where
provider.get_logs(&filter).await
});
rewake_with_new_state!(ctx, self, LogQueryState::LoadLogs(fut));
rewake_with_new_state!(
ctx,
self,
LogQueryState::LoadLogs((Some(from_block), fut))
);
}
} else {
Poll::Ready(log.map(Ok))

View File

@ -0,0 +1,13 @@
use std::sync::Arc;
use metrics::{register_timer, Gauge, GaugeUsize, Timer};
lazy_static::lazy_static! {
pub static ref LOG_MANAGER_HANDLE_DATA_TRANSACTION: Arc<dyn Timer> = register_timer("log_manager_handle_data_transaction");
pub static ref STORE_PUT_TX: Arc<dyn Timer> = register_timer("log_entry_sync_manager_put_tx_inner");
pub static ref STORE_PUT_TX_SPEED_IN_BYTES: Arc<dyn Gauge<usize>> = GaugeUsize::register("log_entry_sync_manager_put_tx_speed_in_bytes");
pub static ref RECOVER_LOG: Arc<dyn Timer> = register_timer("log_entry_sync_manager_recover_log");
}

View File

@ -5,24 +5,39 @@ use anyhow::{anyhow, bail, Result};
use ethereum_types::H256;
use ethers::{prelude::Middleware, types::BlockNumber};
use futures::FutureExt;
use jsonrpsee::tracing::{debug, error, trace, warn};
use shared_types::{ChunkArray, Transaction};
use jsonrpsee::tracing::{debug, error, warn};
use shared_types::{bytes_to_chunks, ChunkArray, Transaction};
use std::collections::BTreeMap;
use std::fmt::Debug;
use std::future::Future;
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use storage::log_store::log_manager::PORA_CHUNK_SIZE;
use storage::log_store::{tx_store::BlockHashAndSubmissionIndex, Store};
use task_executor::{ShutdownReason, TaskExecutor};
use thiserror::Error;
use tokio::sync::broadcast;
use tokio::sync::mpsc::UnboundedReceiver;
use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
use tokio::sync::{oneshot, RwLock};
const RETRY_WAIT_MS: u64 = 500;
// A RPC query can return at most 10000 entries.
const BROADCAST_CHANNEL_CAPACITY: usize = 10000;
// Each tx has less than 10KB, so the cache size should be acceptable.
const BROADCAST_CHANNEL_CAPACITY: usize = 25000;
const CATCH_UP_END_GAP: u64 = 10;
const CHECK_ROOT_INTERVAL: u64 = 500;
/// Errors while handle data
#[derive(Error, Debug)]
pub enum HandleDataError {
/// Sequence Error
#[error("transaction seq is great than expected, expect block number {0}")]
SeqError(u64),
/// Other Errors
#[error("{0}")]
CommonError(#[from] anyhow::Error),
}
#[derive(Clone, Debug)]
pub enum LogSyncEvent {
@ -72,16 +87,7 @@ impl LogSyncManager {
.expect("shutdown send error")
},
async move {
let log_fetcher = LogEntryFetcher::new(
&config.rpc_endpoint_url,
config.contract_address,
config.log_page_size,
config.confirmation_block_count,
config.rate_limit_retries,
config.timeout_retries,
config.initial_backoff,
)
.await?;
let log_fetcher = LogEntryFetcher::new(&config).await?;
let data_cache = DataCache::new(config.cache_config.clone());
let block_hash_cache = Arc::new(RwLock::new(
@ -102,16 +108,7 @@ impl LogSyncManager {
};
let (mut start_block_number, mut start_block_hash) =
match log_sync_manager.store.get_sync_progress()? {
// No previous progress, so just use config.
None => {
let block_number = log_sync_manager.config.start_block_number;
let block_hash =
log_sync_manager.get_block(block_number.into()).await?.1;
(block_number, block_hash)
}
Some((block_number, block_hash)) => (block_number, block_hash),
};
get_start_block_number_with_hash(&log_sync_manager).await?;
let (mut finalized_block_number, mut finalized_block_hash) =
match log_sync_manager.get_block(BlockNumber::Finalized).await {
@ -145,7 +142,7 @@ impl LogSyncManager {
&executor_clone,
log_sync_manager.block_hash_cache.clone(),
);
log_sync_manager.handle_data(reorg_rx).await?;
log_sync_manager.handle_data(reorg_rx, &None).await?;
if let Some((block_number, block_hash)) =
log_sync_manager.store.get_sync_progress()?
{
@ -197,13 +194,51 @@ impl LogSyncManager {
} else {
// Keep catching-up data until we are close to the latest height.
loop {
log_sync_manager
// wait tx receipt is ready
if let Ok(Some(block)) = log_sync_manager
.log_fetcher
.provider()
.get_block_with_txs(finalized_block_number)
.await
{
if let Some(tx) = block.transactions.first() {
loop {
match log_sync_manager
.log_fetcher
.provider()
.get_transaction_receipt(tx.hash)
.await
{
Ok(Some(_)) => break,
_ => {
tokio::time::sleep(Duration::from_secs(1)).await;
continue;
}
}
}
}
}
while let Err(e) = log_sync_manager
.catch_up_data(
executor_clone.clone(),
start_block_number,
finalized_block_number,
)
.await?;
.await
{
match e {
HandleDataError::SeqError(block_number) => {
warn!("seq error occurred, retry from {}", block_number);
start_block_number = block_number;
tokio::time::sleep(Duration::from_secs(1)).await;
}
_ => {
return Err(e.into());
}
}
}
start_block_number = finalized_block_number.saturating_add(1);
let new_finalized_block =
@ -222,15 +257,35 @@ impl LogSyncManager {
warn!("catch_up_end send fails, possibly auto_sync is not enabled");
}
log_sync_manager
.log_fetcher
.start_remove_finalized_block_task(
&executor_clone,
log_sync_manager.store.clone(),
log_sync_manager.block_hash_cache.clone(),
log_sync_manager.config.default_finalized_block_count,
log_sync_manager
.config
.remove_finalized_block_interval_minutes,
);
// start the pad data store
log_sync_manager.store.start_padding(&executor_clone);
let (watch_progress_tx, watch_progress_rx) =
tokio::sync::mpsc::unbounded_channel();
let watch_rx = log_sync_manager.log_fetcher.start_watch(
start_block_number,
parent_block_hash,
&executor_clone,
log_sync_manager.block_hash_cache.clone(),
log_sync_manager.config.watch_loop_wait_time_ms,
watch_progress_rx,
);
// Syncing `watch_rx` is supposed to block forever.
log_sync_manager.handle_data(watch_rx).await?;
log_sync_manager
.handle_data(watch_rx, &Some(watch_progress_tx))
.await?;
Ok::<(), anyhow::Error>(())
},
)
@ -240,20 +295,20 @@ impl LogSyncManager {
Ok((event_send_cloned, catch_up_end_receiver))
}
async fn put_tx(&mut self, tx: Transaction) -> bool {
async fn put_tx(&mut self, tx: Transaction) -> Option<bool> {
// We call this after process chain reorg, so the sequence number should match.
match tx.seq.cmp(&self.next_tx_seq) {
std::cmp::Ordering::Less => true,
std::cmp::Ordering::Less => Some(true),
std::cmp::Ordering::Equal => {
debug!("log entry sync get entry: {:?}", tx);
self.put_tx_inner(tx).await
Some(self.put_tx_inner(tx).await)
}
std::cmp::Ordering::Greater => {
error!(
"Unexpected transaction seq: next={} get={}",
self.next_tx_seq, tx.seq
);
false
None
}
}
}
@ -295,9 +350,20 @@ impl LogSyncManager {
let _ = self.event_send.send(LogSyncEvent::Reverted { tx_seq });
}
async fn handle_data(&mut self, mut rx: UnboundedReceiver<LogFetchProgress>) -> Result<()> {
async fn handle_data(
&mut self,
mut rx: UnboundedReceiver<LogFetchProgress>,
watch_progress_tx: &Option<UnboundedSender<u64>>,
) -> Result<(), HandleDataError> {
let mut log_latest_block_number =
if let Some(block_number) = self.store.get_log_latest_block_number()? {
block_number
} else {
0
};
while let Some(data) = rx.recv().await {
trace!("handle_data: data={:?}", data);
debug!("handle_data: data={:?}", data);
match data {
LogFetchProgress::SyncedBlock((
block_number,
@ -335,11 +401,35 @@ impl LogSyncManager {
}
}
}
LogFetchProgress::Transaction(tx) => {
if !self.put_tx(tx.clone()).await {
LogFetchProgress::Transaction((tx, block_number)) => {
let mut stop = false;
let start_time = Instant::now();
match self.put_tx(tx.clone()).await {
Some(false) => stop = true,
Some(true) => {
if let Err(e) = self.store.put_log_latest_block_number(block_number) {
warn!("failed to put log latest block number, error={:?}", e);
}
log_latest_block_number = block_number;
}
_ => {
stop = true;
if let Some(progress_tx) = watch_progress_tx {
if let Err(e) = progress_tx.send(log_latest_block_number) {
error!("failed to send watch progress, error={:?}", e);
} else {
continue;
}
} else {
return Err(HandleDataError::SeqError(log_latest_block_number));
}
}
}
if stop {
// Unexpected error.
error!("log sync write error");
break;
return Err(anyhow!("log sync write error").into());
}
if let Err(e) = self.event_send.send(LogSyncEvent::TxSynced { tx }) {
// TODO: Do we need to wait until all receivers are initialized?
@ -347,6 +437,8 @@ impl LogSyncManager {
// no receivers will be created.
warn!("log sync broadcast error, error={:?}", e);
}
metrics::LOG_MANAGER_HANDLE_DATA_TRANSACTION.update_since(start_time);
}
LogFetchProgress::Reverted(reverted) => {
self.process_reverted(reverted).await;
@ -357,7 +449,10 @@ impl LogSyncManager {
}
async fn put_tx_inner(&mut self, tx: Transaction) -> bool {
if let Err(e) = self.store.put_tx(tx.clone()) {
let start_time = Instant::now();
let result = self.store.put_tx(tx.clone());
if let Err(e) = result {
error!("put_tx error: e={:?}", e);
false
} else {
@ -380,9 +475,81 @@ impl LogSyncManager {
error!("put_tx data error: e={:?}", e);
return false;
}
} else {
// check if current node need to save at least one segment
let store = self.store.clone();
let shard_config = store.get_shard_config();
let start_segment_index = tx.start_entry_index as usize / PORA_CHUNK_SIZE;
let end_segment_index =
(tx.start_entry_index as usize + bytes_to_chunks(tx.size as usize) - 1)
/ PORA_CHUNK_SIZE;
let mut can_finalize = false;
if end_segment_index < shard_config.shard_id {
can_finalize = true;
} else {
// check if there is a number N between [start_segment_index, end_segment_index] that satisfy:
// N % num_shard = shard_id
let min_n_gte_start =
(start_segment_index + shard_config.num_shard - 1 - shard_config.shard_id)
/ shard_config.num_shard;
let max_n_lte_end =
(end_segment_index - shard_config.shard_id) / shard_config.num_shard;
if min_n_gte_start > max_n_lte_end {
can_finalize = true;
}
}
if can_finalize {
if let Err(e) = store.finalize_tx_with_hash(tx.seq, tx.hash()) {
error!("finalize file that does not need to store: e={:?}", e);
return false;
}
}
}
self.data_cache.garbage_collect(self.next_tx_seq);
self.next_tx_seq += 1;
// Check if the computed data root matches on-chain state.
// If the call fails, we won't check the root here and return `true` directly.
if self.next_tx_seq % CHECK_ROOT_INTERVAL == 0 {
let flow_contract = self.log_fetcher.flow_contract();
match flow_contract
.get_flow_root_by_tx_seq(tx.seq.into())
.call()
.await
{
Ok(contract_root_bytes) => {
let contract_root = H256::from_slice(&contract_root_bytes);
// contract_root is zero for tx submitted before upgrading.
if !contract_root.is_zero() {
match self.store.get_context() {
Ok((local_root, _)) => {
if contract_root != local_root {
error!(
?contract_root,
?local_root,
"local flow root and on-chain flow root mismatch"
);
return false;
}
}
Err(e) => {
warn!(?e, "fail to read the local flow root");
}
}
}
}
Err(e) => {
warn!(?e, "fail to read the on-chain flow root");
}
}
}
metrics::STORE_PUT_TX_SPEED_IN_BYTES
.update((tx.size * 1000 / start_time.elapsed().as_micros() as u64) as usize);
metrics::STORE_PUT_TX.update_since(start_time);
true
}
}
@ -414,7 +581,7 @@ impl LogSyncManager {
executor_clone: TaskExecutor,
start_block_number: u64,
finalized_block_number: u64,
) -> Result<()> {
) -> Result<(), HandleDataError> {
if start_block_number < finalized_block_number {
let recover_rx = self.log_fetcher.start_recover(
start_block_number,
@ -422,20 +589,52 @@ impl LogSyncManager {
&executor_clone,
Duration::from_millis(self.config.recover_query_delay),
);
self.handle_data(recover_rx).await?;
self.handle_data(recover_rx, &None).await?;
}
self.log_fetcher.start_remove_finalized_block_task(
&executor_clone,
self.store.clone(),
self.block_hash_cache.clone(),
self.config.default_finalized_block_count,
self.config.remove_finalized_block_interval_minutes,
);
Ok(())
}
}
async fn get_start_block_number_with_hash(
log_sync_manager: &LogSyncManager,
) -> Result<(u64, H256), anyhow::Error> {
if log_sync_manager
.config
.force_log_sync_from_start_block_number
{
let block_number = log_sync_manager.config.start_block_number;
let block_hash = log_sync_manager.get_block(block_number.into()).await?.1;
return Ok((block_number, block_hash));
}
if let Some(block_number) = log_sync_manager.store.get_log_latest_block_number()? {
if let Some(Some(val)) = log_sync_manager
.block_hash_cache
.read()
.await
.get(&block_number)
{
return Ok((block_number, val.block_hash));
} else {
warn!("get block hash for block {} from RPC", block_number);
let block_hash = log_sync_manager.get_block(block_number.into()).await?.1;
return Ok((block_number, block_hash));
}
}
let (start_block_number, start_block_hash) = match log_sync_manager.store.get_sync_progress()? {
// No previous progress, so just use config.
None => {
let block_number = log_sync_manager.config.start_block_number;
let block_hash = log_sync_manager.get_block(block_number.into()).await?.1;
(block_number, block_hash)
}
Some((block_number, block_hash)) => (block_number, block_hash),
};
Ok((start_block_number, start_block_hash))
}
async fn run_and_log<R, E>(
mut on_error: impl FnMut(),
f: impl Future<Output = std::result::Result<R, E>> + Send,
@ -457,3 +656,4 @@ pub(crate) mod config;
mod data_cache;
mod log_entry_fetcher;
mod log_query;
mod metrics;

View File

@ -1,9 +1,16 @@
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use ethereum_types::{Address, H256, U256};
use ethers::core::k256::SecretKey;
use ethers::middleware::SignerMiddleware;
use ethers::providers::Http;
use ethers::providers::HttpRateLimitRetryPolicy;
use ethers::providers::Middleware;
use ethers::providers::Provider;
use ethers::providers::RetryClient;
use ethers::providers::RetryClientBuilder;
use ethers::signers::LocalWallet;
use ethers::signers::Signer;
use storage::config::ShardConfig;
@ -18,9 +25,13 @@ pub struct MinerConfig {
pub(crate) cpu_percentage: u64,
pub(crate) iter_batch: usize,
pub(crate) shard_config: ShardConfig,
pub(crate) context_query_interval: Duration,
pub(crate) rate_limit_retries: u32,
pub(crate) timeout_retries: u32,
pub(crate) initial_backoff: u64,
}
pub type MineServiceMiddleware = SignerMiddleware<Provider<Http>, LocalWallet>;
pub type MineServiceMiddleware = SignerMiddleware<Arc<Provider<RetryClient<Http>>>, LocalWallet>;
impl MinerConfig {
#[allow(clippy::too_many_arguments)]
@ -33,7 +44,11 @@ impl MinerConfig {
submission_gas: Option<U256>,
cpu_percentage: u64,
iter_batch: usize,
context_query_seconds: u64,
shard_config: ShardConfig,
rate_limit_retries: u32,
timeout_retries: u32,
initial_backoff: u64,
) -> Option<MinerConfig> {
miner_key.map(|miner_key| MinerConfig {
miner_id,
@ -45,12 +60,29 @@ impl MinerConfig {
cpu_percentage,
iter_batch,
shard_config,
context_query_interval: Duration::from_secs(context_query_seconds),
rate_limit_retries,
timeout_retries,
initial_backoff,
})
}
pub(crate) async fn make_provider(&self) -> Result<MineServiceMiddleware, String> {
let provider = Provider::<Http>::try_from(&self.rpc_endpoint_url)
.map_err(|e| format!("Can not parse blockchain endpoint: {:?}", e))?;
pub(crate) fn make_provider(&self) -> Result<Arc<Provider<RetryClient<Http>>>, String> {
Ok(Arc::new(Provider::new(
RetryClientBuilder::default()
.rate_limit_retries(self.rate_limit_retries)
.timeout_retries(self.timeout_retries)
.initial_backoff(Duration::from_millis(self.initial_backoff))
.build(
Http::from_str(&self.rpc_endpoint_url)
.map_err(|e| format!("Cannot parse blockchain endpoint: {:?}", e))?,
Box::new(HttpRateLimitRetryPolicy),
),
)))
}
pub(crate) async fn make_signing_provider(&self) -> Result<MineServiceMiddleware, String> {
let provider = self.make_provider()?;
let chain_id = provider
.get_chainid()
.await
@ -59,6 +91,7 @@ impl MinerConfig {
.map_err(|e| format!("Cannot parse private key: {:?}", e))?;
let signer = LocalWallet::from(secret_key).with_chain_id(chain_id.as_u64());
let middleware = SignerMiddleware::new(provider, signer);
Ok(middleware)
}
}

View File

@ -9,6 +9,7 @@ mod loader;
mod metrics;
mod mine;
mod miner_id;
mod monitor;
pub mod pora;
mod recall_range;
mod sealer;

View File

@ -9,7 +9,6 @@ use tokio::time::{sleep, Duration, Instant};
use storage::config::ShardConfig;
use zgs_spec::{SECTORS_PER_LOAD, SECTORS_PER_MAX_MINING_RANGE, SECTORS_PER_PRICING};
use super::metrics;
use crate::recall_range::RecallRange;
use crate::{
pora::{AnswerWithoutProof, Miner},
@ -20,7 +19,7 @@ use crate::{
use std::sync::Arc;
pub struct PoraService {
mine_context_receiver: mpsc::UnboundedReceiver<MineContextMessage>,
mine_context_receiver: broadcast::Receiver<MineContextMessage>,
mine_answer_sender: mpsc::UnboundedSender<AnswerWithoutProof>,
msg_recv: broadcast::Receiver<MinerMessage>,
loader: Arc<dyn PoraLoader>,
@ -33,9 +32,29 @@ pub struct PoraService {
iter_batch: usize,
}
struct PoraPuzzle {
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct PoraPuzzle {
context: MineContext,
target_quality: U256,
max_shards: u64,
}
impl PoraPuzzle {
pub fn new(context: MineContext, target_quality: U256, max_shards: u64) -> Self {
Self {
context,
target_quality,
max_shards,
}
}
pub fn max_shards(&self) -> u64 {
self.max_shards
}
pub fn context_digest(&self) -> H256 {
H256(self.context.digest)
}
}
#[derive(Clone, Debug, Default)]
pub struct MineRangeConfig {
@ -56,7 +75,11 @@ impl MineRangeConfig {
let minable_length =
(context.flow_length.as_u64() / SECTORS_PER_LOAD as u64) * SECTORS_PER_LOAD as u64;
let mining_length = std::cmp::min(minable_length, SECTORS_PER_MAX_MINING_RANGE as u64);
let num_shards = 1u64 << self.shard_config.miner_shard_mask().count_zeros();
let mining_length = std::cmp::min(
minable_length,
(SECTORS_PER_MAX_MINING_RANGE as u64).saturating_mul(num_shards),
);
let start_position = std::cmp::min(self_start_position, minable_length - mining_length);
let start_position =
@ -75,12 +98,12 @@ impl MineRangeConfig {
let self_start_position = self.start_position?;
let self_end_position = self.end_position?;
if self.start_position >= self.end_position {
if self_start_position >= self_end_position {
return Some(false);
}
Some(
self_start_position <= recall_position + SECTORS_PER_LOAD as u64
|| self_end_position > recall_position,
&& self_end_position > recall_position,
)
}
}
@ -89,7 +112,7 @@ impl PoraService {
pub fn spawn(
executor: TaskExecutor,
msg_recv: broadcast::Receiver<MinerMessage>,
mine_context_receiver: mpsc::UnboundedReceiver<MineContextMessage>,
mine_context_receiver: broadcast::Receiver<MineContextMessage>,
loader: Arc<dyn PoraLoader>,
config: &MinerConfig,
miner_id: H256,
@ -138,15 +161,19 @@ impl PoraService {
Ok(MinerMessage::SetStartPosition(pos)) => {
info!("Change start position to: {:?}", pos);
self.mine_range.start_position = pos;
self.report_reason_if_mine_stop("update mine range");
}
Ok(MinerMessage::SetEndPosition(pos)) => {
info!("Change end position to: {:?}", pos);
self.mine_range.end_position = pos;
self.report_reason_if_mine_stop("update mine range");
}
Ok(MinerMessage::SetShardConfig(shard_config)) => {
self.mine_range.shard_config = shard_config;
self.report_reason_if_mine_stop("update shard");
}
Err(broadcast::error::RecvError::Closed)=>{
Err(broadcast::error::RecvError::Closed) => {
warn!("Unexpected: Mine service config channel closed.");
channel_opened = false;
}
@ -157,28 +184,33 @@ impl PoraService {
}
maybe_msg = self.mine_context_receiver.recv() => {
if let Some(msg) = maybe_msg {
info!("Update mine service: {:?}", msg);
info!("Mine iterations statistics: {}", metrics::report());
self.puzzle = msg.map(|(context, target_quality)| PoraPuzzle {
context, target_quality
});
} else {
warn!("Mine context channel closed.");
match maybe_msg {
Ok(msg) => {
info!("Update mine service: {:?}", msg);
self.puzzle = msg;
self.report_reason_if_mine_stop("update mine context");
},
Err(broadcast::error::RecvError::Closed) => {
warn!("Mine context channel closed.");
},
Err(_) => {}
}
}
() = &mut diastole, if !diastole.is_elapsed() => {
}
_ = async {}, if mining_enabled && cpu_percent > 0 && self.as_miner().map_or(false, |miner| miner.range.mining_length > 0) && diastole.is_elapsed() => {
_ = async {}, if mining_enabled
&& cpu_percent > 0
&& self.as_miner().is_ok()
&& diastole.is_elapsed() => {
let nonce = H256(rand::thread_rng().gen());
let miner = self.as_miner().unwrap();
let timer = time::Instant::now();
if let Some(answer) = miner.batch_iteration(nonce, self.iter_batch).await {
debug!("Hit Pora answer {:?}", answer);
info!("Hit Pora answer {:?}", answer);
if self.mine_answer_sender.send(answer).is_err() {
warn!("Mine submitter channel closed");
}
@ -194,13 +226,31 @@ impl PoraService {
}
#[inline]
fn as_miner(&self) -> Option<Miner> {
let puzzle = self.puzzle.as_ref()?;
fn as_miner(&self) -> Result<Miner, &'static str> {
let puzzle = self.puzzle.as_ref().ok_or("no mine context")?;
let range = self.mine_range.to_valid_range(&puzzle.context)?;
(range.mining_length > 0).then_some(())?;
let range = self
.mine_range
.to_valid_range(&puzzle.context)
.ok_or("no mine range")?;
Some(Miner {
if range.mining_length == 0 {
return Err("mine range is zero");
}
if puzzle.max_shards() < self.mine_range.shard_config.num_shard as u64 {
return Err("too many mine shards");
}
if puzzle.context.flow_length <= U256::one() {
return Err("no data submitted");
}
if self.mine_range.shard_config.num_shard as u64 > puzzle.context.flow_length.as_u64() {
return Err("Not enough flow length to shard");
}
Ok(Miner {
range,
miner_id: &self.miner_id,
mine_range_config: &self.mine_range,
@ -209,4 +259,10 @@ impl PoraService {
loader: &*self.loader,
})
}
fn report_reason_if_mine_stop(&self, event: &'static str) {
if let Err(reason) = self.as_miner() {
info!(reason, "Mine stopped on {}", event);
}
}
}

View File

@ -5,17 +5,20 @@ use ethereum_types::Address;
use ethers::contract::ContractCall;
use ethers::contract::EthEvent;
use std::sync::Arc;
use storage::log_store::log_manager::DATA_DB_KEY;
use storage::H256;
use storage_async::Store;
const MINER_ID: &str = "mine.miner_id";
pub async fn load_miner_id(store: &Store) -> storage::error::Result<Option<H256>> {
store.get_config_decoded(&MINER_ID).await
store.get_config_decoded(&MINER_ID, DATA_DB_KEY).await
}
async fn set_miner_id(store: &Store, miner_id: &H256) -> storage::error::Result<()> {
store.set_config_encoded(&MINER_ID, miner_id).await
store
.set_config_encoded(&MINER_ID, miner_id, DATA_DB_KEY)
.await
}
pub(crate) async fn check_and_request_miner_id(
@ -37,6 +40,7 @@ pub(crate) async fn check_and_request_miner_id(
d_id, c_id
))
} else {
check_miner_id(&mine_contract, d_id).await?;
Ok(d_id)
}
}
@ -99,7 +103,7 @@ async fn request_miner_id(
.retries(3)
.await
.map_err(|e| format!("Fail to execute mine answer transaction: {:?}", e))?
.ok_or("Request miner id transaction dropped after 3 retires")?;
.ok_or("Request miner id transaction dropped after 3 retries")?;
let first_log = receipt
.logs

27
node/miner/src/monitor.rs Normal file
View File

@ -0,0 +1,27 @@
use std::time::Duration;
use task_executor::TaskExecutor;
use tokio::time::sleep;
use super::metrics;
pub struct Monitor {
period: Duration,
}
impl Monitor {
pub fn spawn(executor: TaskExecutor, period: Duration) {
let monitor = Monitor { period };
executor.spawn(
async move { Box::pin(monitor.start()).await },
"pora_master",
);
}
async fn start(&self) {
loop {
info!("Mine iterations statistics: {}", metrics::report());
let _ = sleep(self.period).await;
}
}
}

View File

@ -79,7 +79,7 @@ impl<'a> Miner<'a> {
inc_counter(&LOADING_COUNT);
let MineLoadChunk {
loaded_chunk,
avalibilities,
availabilities,
} = self
.loader
.load_sealed_data(recall_position / SECTORS_PER_LOAD as u64)
@ -92,8 +92,8 @@ impl<'a> Miner<'a> {
.into_iter()
.enumerate()
.zip(scratch_pad.iter().cycle())
.zip(avalibilities.into_iter())
.filter_map(|(data, avaliable)| avaliable.then_some(data))
.zip(availabilities.into_iter())
.filter_map(|(data, availiable)| availiable.then_some(data))
{
inc_counter(&PAD_MIX_COUNT);
// Rust can optimize this loop well.
@ -102,18 +102,19 @@ impl<'a> Miner<'a> {
}
let quality = self.pora(idx, &sealed_data, pad_seed);
let quality_scale = self.range.shard_mask.count_zeros();
if quality <= U256::MAX >> quality_scale
&& quality << quality_scale <= *self.target_quality
{
let difficulty_scale_x64 = self
.range
.difficulty_scale_x64(self.context.flow_length.as_u64());
if quality <= (self.target_quality / difficulty_scale_x64) << 64 {
debug!(
"Find a PoRA valid answer, quality: {}, target_quality {}, scale {}",
"Find a PoRA valid answer, quality: {}, target_quality {}, scale {:.3}",
U256::MAX / quality,
U256::MAX / self.target_quality,
quality_scale
difficulty_scale_x64.as_u128() as f64 / u64::MAX as f64
);
inc_counter(&HIT_COUNT);
// Undo mix data when find a valid solition
// Undo mix data when find a valid solution
for (x, y) in sealed_data.iter_mut().zip(scratch_pad.iter()) {
*x ^= y;
}
@ -170,7 +171,7 @@ impl<'a> Miner<'a> {
) -> U256 {
let mut hasher = Blake2b512::new();
hasher.update([0u8; 24]);
hasher.update(seal_index.to_be_bytes());
hasher.update((seal_index as u64).to_be_bytes());
hasher.update(pad_seed);
hasher.update([0u8; 32]);

View File

@ -1,6 +1,6 @@
use ethereum_types::U256;
use tiny_keccak::{Hasher, Keccak};
use zgs_spec::SECTORS_PER_LOAD;
use zgs_spec::{SECTORS_PER_LOAD, SECTORS_PER_MAX_MINING_RANGE};
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub struct RecallRange {
@ -31,12 +31,23 @@ impl RecallRange {
}
pub fn load_position(&self, seed: [u8; 32]) -> Option<u64> {
let (_, origin_recall_offset) = U256::from_big_endian(&seed)
.div_mod(U256::from((self.mining_length as usize) / SECTORS_PER_LOAD));
let origin_recall_offset = U256::from_big_endian(&seed)
.checked_rem(U256::from((self.mining_length as usize) / SECTORS_PER_LOAD))?;
let origin_recall_offset = origin_recall_offset.as_u64();
let recall_offset = (origin_recall_offset & self.shard_mask) | self.shard_id;
Some(self.start_position + recall_offset * SECTORS_PER_LOAD as u64)
self.start_position
.checked_add(recall_offset * SECTORS_PER_LOAD as u64)
}
pub fn difficulty_scale_x64(&self, flow_length: u64) -> U256 {
let no_shard_mine_length = std::cmp::min(flow_length, SECTORS_PER_MAX_MINING_RANGE as u64);
let sharded_mine_length = std::cmp::min(
flow_length >> self.shard_mask.count_zeros(),
SECTORS_PER_MAX_MINING_RANGE as u64,
);
(U256::from(no_shard_mine_length) << 64) / sharded_mine_length
}
}

View File

@ -1,6 +1,7 @@
use std::{collections::BTreeMap, sync::Arc};
use ethereum_types::H256;
use ethers::prelude::{Http, Provider, RetryClient};
use tokio::time::{sleep, Duration, Instant};
use contract_interface::{EpochRangeWithContextDigest, ZgsFlow};
@ -12,14 +13,14 @@ use storage_async::Store;
use task_executor::TaskExecutor;
use zgs_spec::SECTORS_PER_SEAL;
use crate::config::{MineServiceMiddleware, MinerConfig};
use crate::config::MinerConfig;
const DB_QUERY_PERIOD_ON_NO_TASK: u64 = 1;
const DB_QUERY_PERIOD_ON_ERROR: u64 = 5;
const CHAIN_STATUS_QUERY_PERIOD: u64 = 5;
pub struct Sealer {
flow_contract: ZgsFlow<MineServiceMiddleware>,
flow_contract: ZgsFlow<Provider<RetryClient<Http>>>,
store: Arc<Store>,
context_cache: BTreeMap<u128, EpochRangeWithContextDigest>,
last_context_flow_length: u64,
@ -29,7 +30,7 @@ pub struct Sealer {
impl Sealer {
pub fn spawn(
executor: TaskExecutor,
provider: Arc<MineServiceMiddleware>,
provider: Arc<Provider<RetryClient<Http>>>,
store: Arc<Store>,
config: &MinerConfig,
miner_id: H256,

View File

@ -1,13 +1,14 @@
use crate::miner_id::check_and_request_miner_id;
use crate::monitor::Monitor;
use crate::sealer::Sealer;
use crate::submitter::Submitter;
use crate::{config::MinerConfig, mine::PoraService, watcher::MineContextWatcher};
use network::NetworkMessage;
use network::NetworkSender;
use std::sync::Arc;
use std::time::Duration;
use storage::config::ShardConfig;
use storage_async::Store;
use tokio::sync::broadcast;
use tokio::sync::mpsc;
#[derive(Clone, Debug)]
pub enum MinerMessage {
@ -27,15 +28,17 @@ pub struct MineService;
impl MineService {
pub async fn spawn(
executor: task_executor::TaskExecutor,
_network_send: mpsc::UnboundedSender<NetworkMessage>,
_network_send: NetworkSender,
config: MinerConfig,
store: Arc<Store>,
) -> Result<broadcast::Sender<MinerMessage>, String> {
let provider = Arc::new(config.make_provider().await?);
let provider = config.make_provider()?;
let signing_provider = Arc::new(config.make_signing_provider().await?);
let (msg_send, msg_recv) = broadcast::channel(1024);
let miner_id = check_and_request_miner_id(&config, store.as_ref(), &provider).await?;
let miner_id =
check_and_request_miner_id(&config, store.as_ref(), &signing_provider).await?;
debug!("miner id setting complete.");
let mine_context_receiver = MineContextWatcher::spawn(
@ -48,7 +51,7 @@ impl MineService {
let mine_answer_receiver = PoraService::spawn(
executor.clone(),
msg_recv.resubscribe(),
mine_context_receiver,
mine_context_receiver.resubscribe(),
store.clone(),
&config,
miner_id,
@ -57,12 +60,16 @@ impl MineService {
Submitter::spawn(
executor.clone(),
mine_answer_receiver,
mine_context_receiver,
provider.clone(),
signing_provider,
store.clone(),
&config,
);
Sealer::spawn(executor, provider, store, &config, miner_id);
Sealer::spawn(executor.clone(), provider, store, &config, miner_id);
Monitor::spawn(executor, Duration::from_secs(5));
debug!("Starting miner service");

View File

@ -2,25 +2,30 @@ use contract_interface::PoraAnswer;
use contract_interface::{PoraMine, ZgsFlow};
use ethereum_types::U256;
use ethers::contract::ContractCall;
use ethers::prelude::{Http, Provider, RetryClient};
use ethers::providers::PendingTransaction;
use hex::ToHex;
use shared_types::FlowRangeProof;
use std::sync::Arc;
use std::time::Duration;
use storage::H256;
use storage_async::Store;
use task_executor::TaskExecutor;
use tokio::sync::mpsc;
use tokio::sync::{broadcast, mpsc};
use crate::config::{MineServiceMiddleware, MinerConfig};
use crate::pora::AnswerWithoutProof;
use crate::watcher::MineContextMessage;
use zgs_spec::{BYTES_PER_SEAL, SECTORS_PER_SEAL};
const SUBMISSION_RETIES: usize = 3;
const SUBMISSION_RETRIES: usize = 15;
pub struct Submitter {
mine_answer_receiver: mpsc::UnboundedReceiver<AnswerWithoutProof>,
mine_context_receiver: broadcast::Receiver<MineContextMessage>,
mine_contract: PoraMine<MineServiceMiddleware>,
flow_contract: ZgsFlow<MineServiceMiddleware>,
flow_contract: ZgsFlow<Provider<RetryClient<Http>>>,
default_gas_limit: Option<U256>,
store: Arc<Store>,
}
@ -29,16 +34,19 @@ impl Submitter {
pub fn spawn(
executor: TaskExecutor,
mine_answer_receiver: mpsc::UnboundedReceiver<AnswerWithoutProof>,
provider: Arc<MineServiceMiddleware>,
mine_context_receiver: broadcast::Receiver<MineContextMessage>,
provider: Arc<Provider<RetryClient<Http>>>,
signing_provider: Arc<MineServiceMiddleware>,
store: Arc<Store>,
config: &MinerConfig,
) {
let mine_contract = PoraMine::new(config.mine_address, provider.clone());
let mine_contract = PoraMine::new(config.mine_address, signing_provider);
let flow_contract = ZgsFlow::new(config.flow_address, provider);
let default_gas_limit = config.submission_gas;
let submitter = Submitter {
mine_answer_receiver,
mine_context_receiver,
mine_contract,
flow_contract,
store,
@ -51,18 +59,39 @@ impl Submitter {
}
async fn start(mut self) {
let mut current_context_digest: Option<H256> = None;
loop {
match self.mine_answer_receiver.recv().await {
Some(answer) => {
if let Err(e) = self.submit_answer(answer).await {
warn!(e)
tokio::select! {
answer_msg = self.mine_answer_receiver.recv() => {
match answer_msg {
Some(answer) => {
if Some(answer.context_digest) != current_context_digest {
info!("Skip submission because of inconsistent context digest");
continue;
}
if let Err(e) = self.submit_answer(answer).await {
warn!(e);
}
}
None => {
warn!("Mine submitter stopped because mine answer channel is closed.");
return;
}
}
}
None => {
warn!("Mine submitter stopped because mine answer channel is closed.");
break;
context_msg = self.mine_context_receiver.recv() => {
match context_msg {
Ok(puzzle) => {
current_context_digest = puzzle.map(|p| p.context_digest());
}
Err(broadcast::error::RecvError::Closed) => {
warn!("Mine context channel closed.");
},
Err(_) => {}
}
}
};
}
}
}
@ -127,7 +156,7 @@ impl Submitter {
let pending_transaction: PendingTransaction<'_, _> = submission_call
.send()
.await
.map_err(|e| format!("Fail to send mine answer transaction: {:?}", e))?;
.map_err(|e| format!("Fail to send PoRA submission transaction: {:?}", e))?;
debug!(
"Signed submission transaction hash: {:?}",
@ -135,16 +164,16 @@ impl Submitter {
);
let receipt = pending_transaction
.retries(SUBMISSION_RETIES)
.retries(SUBMISSION_RETRIES)
.interval(Duration::from_secs(2))
.await
.map_err(|e| format!("Fail to execute mine answer transaction: {:?}", e))?
.map_err(|e| format!("Fail to execute PoRA submission transaction: {:?}", e))?
.ok_or(format!(
"Mine answer transaction dropped after {} retires",
SUBMISSION_RETIES
"PoRA submission transaction dropped after {} retries",
SUBMISSION_RETRIES
))?;
info!("Submit PoRA success");
debug!("Receipt: {:?}", receipt);
info!("Submit PoRA success, receipt: {:?}", receipt);
Ok(())
}

View File

@ -14,14 +14,14 @@ use tokio::{
try_join,
};
use crate::{config::MineServiceMiddleware, mine::PoraPuzzle, MinerConfig, MinerMessage};
use ethers::prelude::{Http, RetryClient};
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use std::{ops::DerefMut, str::FromStr};
use crate::{config::MineServiceMiddleware, MinerConfig, MinerMessage};
pub type MineContextMessage = Option<(MineContext, U256)>;
pub type MineContextMessage = Option<PoraPuzzle>;
lazy_static! {
pub static ref EMPTY_HASH: H256 =
@ -29,12 +29,13 @@ lazy_static! {
}
pub struct MineContextWatcher {
provider: Arc<MineServiceMiddleware>,
flow_contract: ZgsFlow<MineServiceMiddleware>,
mine_contract: PoraMine<MineServiceMiddleware>,
provider: Arc<Provider<RetryClient<Http>>>,
flow_contract: ZgsFlow<Provider<RetryClient<Http>>>,
mine_contract: PoraMine<Provider<RetryClient<Http>>>,
mine_context_sender: mpsc::UnboundedSender<MineContextMessage>,
mine_context_sender: broadcast::Sender<MineContextMessage>,
last_report: MineContextMessage,
query_interval: Duration,
msg_recv: broadcast::Receiver<MinerMessage>,
}
@ -43,16 +44,14 @@ impl MineContextWatcher {
pub fn spawn(
executor: TaskExecutor,
msg_recv: broadcast::Receiver<MinerMessage>,
provider: Arc<MineServiceMiddleware>,
provider: Arc<Provider<RetryClient<Http>>>,
config: &MinerConfig,
) -> mpsc::UnboundedReceiver<MineContextMessage> {
let provider = provider;
) -> broadcast::Receiver<MineContextMessage> {
let mine_contract = PoraMine::new(config.mine_address, provider.clone());
let flow_contract = ZgsFlow::new(config.flow_address, provider.clone());
let (mine_context_sender, mine_context_receiver) =
mpsc::unbounded_channel::<MineContextMessage>();
broadcast::channel::<MineContextMessage>(4096);
let watcher = MineContextWatcher {
provider,
flow_contract,
@ -60,6 +59,7 @@ impl MineContextWatcher {
mine_context_sender,
msg_recv,
last_report: None,
query_interval: config.context_query_interval,
};
executor.spawn(
async move { Box::pin(watcher.start()).await },
@ -95,7 +95,7 @@ impl MineContextWatcher {
}
_ = async {}, if mining_enabled && mining_throttle.is_elapsed() => {
mining_throttle.as_mut().reset(Instant::now() + Duration::from_secs(1));
mining_throttle.as_mut().reset(Instant::now() + self.query_interval);
if let Err(err) = self.query_recent_context().await {
warn!(err);
}
@ -108,12 +108,17 @@ impl MineContextWatcher {
let context_call = self.flow_contract.make_context_with_result();
let valid_call = self.mine_contract.can_submit();
let quality_call = self.mine_contract.pora_target();
let shards_call = self.mine_contract.max_shards();
let (context, can_submit, quality) =
try_join!(context_call.call(), valid_call.call(), quality_call.call())
.map_err(|e| format!("Failed to query mining context: {:?}", e))?;
let (context, can_submit, quality, max_shards) = try_join!(
context_call.call(),
valid_call.call(),
quality_call.call(),
shards_call.call()
)
.map_err(|e| format!("Failed to query mining context: {:?}", e))?;
let report = if can_submit && context.digest != EMPTY_HASH.0 {
Some((context, quality))
Some(PoraPuzzle::new(context, quality, max_shards))
} else {
None
};

View File

@ -21,7 +21,6 @@ lazy_static = "1.4.0"
lighthouse_metrics = { path = "../../common/lighthouse_metrics" }
lru = "0.7.7"
parking_lot = "0.12.1"
prometheus-client = "0.16.0"
rand = "0.8.5"
regex = "1.5.6"
serde = { version = "1.0.137", features = ["derive"] }
@ -41,6 +40,8 @@ unsigned-varint = { version = "=0.7.1", features = ["codec"] }
if-addrs = "0.10.1"
slog = "2.7.0"
igd = "0.12.1"
duration-str = "0.5.1"
channel = { path = "../../common/channel" }
[dependencies.libp2p]
version = "0.45.1"
@ -49,7 +50,7 @@ features = ["websocket", "identify", "mplex", "yamux", "noise", "gossipsub", "dn
[dev-dependencies]
exit-future = "0.2.0"
tempfile = "3.3.0"
tempfile = "3.12.0"
tracing-test = "0.2.2"
unused_port = { path = "../../common/unused_port" }
void = "1.0.2"

View File

@ -20,6 +20,8 @@ pub struct GossipCache {
topic_msgs: HashMap<GossipTopic, HashMap<Vec<u8>, Key>>,
/// Timeout for Example messages.
example: Option<Duration>,
/// Timeout for NewFile messages.
new_file: Option<Duration>,
/// Timeout for FindFile messages.
find_file: Option<Duration>,
/// Timeout for FindChunks messages.
@ -37,6 +39,8 @@ pub struct GossipCacheBuilder {
default_timeout: Option<Duration>,
/// Timeout for Example messages.
example: Option<Duration>,
/// Timeout for NewFile messages.
new_file: Option<Duration>,
/// Timeout for blocks FindFile messages.
find_file: Option<Duration>,
/// Timeout for blocks FindChunks messages.
@ -64,6 +68,12 @@ impl GossipCacheBuilder {
self
}
/// Timeout for NewFile messages.
pub fn new_file_timeout(mut self, timeout: Duration) -> Self {
self.new_file = Some(timeout);
self
}
/// Timeout for FindFile messages.
pub fn find_file_timeout(mut self, timeout: Duration) -> Self {
self.find_file = Some(timeout);
@ -98,6 +108,7 @@ impl GossipCacheBuilder {
let GossipCacheBuilder {
default_timeout,
example,
new_file,
find_file,
find_chunks,
announce_file,
@ -109,6 +120,7 @@ impl GossipCacheBuilder {
expirations: DelayQueue::default(),
topic_msgs: HashMap::default(),
example: example.or(default_timeout),
new_file: new_file.or(default_timeout),
find_file: find_file.or(default_timeout),
find_chunks: find_chunks.or(default_timeout),
announce_file: announce_file.or(default_timeout),
@ -129,6 +141,7 @@ impl GossipCache {
pub fn insert(&mut self, topic: GossipTopic, data: Vec<u8>) {
let expire_timeout = match topic.kind() {
GossipKind::Example => self.example,
GossipKind::NewFile => self.new_file,
GossipKind::FindFile => self.find_file,
GossipKind::FindChunks => self.find_chunks,
GossipKind::AnnounceFile => self.announce_file,

View File

@ -6,6 +6,7 @@ use crate::peer_manager::{
ConnectionDirection, PeerManager, PeerManagerEvent,
};
use crate::rpc::methods::DataByHashRequest;
use crate::rpc::methods::FileAnnouncement;
use crate::rpc::methods::GetChunksRequest;
use crate::rpc::*;
use crate::service::Context as ServiceContext;
@ -13,6 +14,7 @@ use crate::types::{GossipEncoding, GossipKind, GossipTopic, SnappyTransform};
use crate::{error, metrics, Enr, NetworkGlobals, PubsubMessage, TopicHash};
use futures::stream::StreamExt;
use libp2p::gossipsub::error::PublishError;
use libp2p::gossipsub::TopicScoreParams;
use libp2p::{
core::{
connection::ConnectionId, identity::Keypair, multiaddr::Protocol as MProtocol, Multiaddr,
@ -226,7 +228,33 @@ impl<AppReqId: ReqId> Behaviour<AppReqId> {
// trace!(behaviour_log, "Using peer score params"; "params" => ?params);
let params = libp2p::gossipsub::PeerScoreParams::default();
let mut params = libp2p::gossipsub::PeerScoreParams::default();
let get_hash = |kind: GossipKind| -> TopicHash {
let topic: Topic = GossipTopic::new(kind, GossipEncoding::default()).into();
topic.hash()
};
params
.topics
.insert(get_hash(GossipKind::NewFile), TopicScoreParams::default());
params
.topics
.insert(get_hash(GossipKind::FindFile), TopicScoreParams::default());
params.topics.insert(
get_hash(GossipKind::FindChunks),
TopicScoreParams::default(),
);
params.topics.insert(
get_hash(GossipKind::AnnounceFile),
TopicScoreParams::default(),
);
params.topics.insert(
get_hash(GossipKind::AnnounceShardConfig),
TopicScoreParams::default(),
);
params.topics.insert(
get_hash(GossipKind::AnnounceChunks),
TopicScoreParams::default(),
);
// Set up a scoring update interval
let update_gossipsub_scores = tokio::time::interval(params.decay_interval);
@ -239,7 +267,7 @@ impl<AppReqId: ReqId> Behaviour<AppReqId> {
discovery_enabled: !config.disable_discovery,
metrics_enabled: config.metrics_enabled,
target_peer_count: config.target_peers,
..Default::default()
..config.peer_manager
};
let slot_duration = std::time::Duration::from_secs(12);
@ -519,6 +547,9 @@ impl<AppReqId: ReqId> Behaviour<AppReqId> {
Request::DataByHash { .. } => {
metrics::inc_counter_vec(&metrics::TOTAL_RPC_REQUESTS, &["data_by_hash"])
}
Request::AnnounceFile { .. } => {
metrics::inc_counter_vec(&metrics::TOTAL_RPC_REQUESTS, &["announce_file"])
}
Request::GetChunks { .. } => {
metrics::inc_counter_vec(&metrics::TOTAL_RPC_REQUESTS, &["get_chunks"])
}
@ -561,7 +592,7 @@ where
// peer that originally published the message.
match PubsubMessage::decode(&gs_msg.topic, &gs_msg.data) {
Err(e) => {
debug!(topic = ?gs_msg.topic, error = ?e, "Could not decode gossipsub message");
debug!(topic = ?gs_msg.topic, %propagation_source, error = ?e, "Could not decode gossipsub message");
//reject the message
if let Err(e) = self.gossipsub.report_message_validation_result(
&id,
@ -570,6 +601,24 @@ where
) {
warn!(message_id = %id, peer_id = %propagation_source, error = ?e, "Failed to report message validation");
}
self.peer_manager.report_peer(
&propagation_source,
PeerAction::Fatal,
ReportSource::Gossipsub,
None,
"gossipsub message decode error",
);
if let Some(source) = &gs_msg.source {
self.peer_manager.report_peer(
source,
PeerAction::Fatal,
ReportSource::Gossipsub,
None,
"gossipsub message decode error",
);
}
}
Ok(msg) => {
// Notify the network
@ -731,6 +780,9 @@ where
InboundRequest::DataByHash(req) => {
self.propagate_request(peer_request_id, peer_id, Request::DataByHash(req))
}
InboundRequest::AnnounceFile(req) => {
self.propagate_request(peer_request_id, peer_id, Request::AnnounceFile(req))
}
InboundRequest::GetChunks(req) => {
self.propagate_request(peer_request_id, peer_id, Request::GetChunks(req))
}
@ -945,6 +997,8 @@ pub enum Request {
Status(StatusMessage),
/// A data by hash request.
DataByHash(DataByHashRequest),
/// An AnnounceFile message.
AnnounceFile(FileAnnouncement),
/// A GetChunks request.
GetChunks(GetChunksRequest),
}
@ -954,6 +1008,7 @@ impl std::convert::From<Request> for OutboundRequest {
match req {
Request::Status(s) => OutboundRequest::Status(s),
Request::DataByHash(r) => OutboundRequest::DataByHash(r),
Request::AnnounceFile(r) => OutboundRequest::AnnounceFile(r),
Request::GetChunks(r) => OutboundRequest::GetChunks(r),
}
}

View File

@ -1,5 +1,5 @@
use crate::types::GossipKind;
use crate::{Enr, PeerIdSerialized};
use crate::{peer_manager, Enr, PeerIdSerialized};
use directory::{
DEFAULT_BEACON_NODE_DIR, DEFAULT_HARDCODED_NETWORK, DEFAULT_NETWORK_DIR, DEFAULT_ROOT_DIR,
};
@ -11,6 +11,7 @@ use libp2p::gossipsub::{
use libp2p::Multiaddr;
use serde_derive::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use shared_types::NetworkIdentity;
use std::path::PathBuf;
use std::time::Duration;
@ -122,6 +123,16 @@ pub struct Config {
/// Whether metrics are enabled.
pub metrics_enabled: bool,
/// The id of the storage network.
pub network_id: NetworkIdentity,
pub peer_db: peer_manager::peerdb::PeerDBConfig,
pub peer_manager: peer_manager::config::Config,
/// Whether to disable network identity in ENR.
/// This is for test purpose only.
pub disable_enr_network_id: bool,
}
impl Default for Config {
@ -146,8 +157,8 @@ impl Default for Config {
let filter_rate_limiter = Some(
discv5::RateLimiterBuilder::new()
.total_n_every(300, Duration::from_secs(1)) // Allow bursts, average 300 per second
.ip_n_every(300, Duration::from_secs(1)) // Allow bursts, average 300 per second
.node_n_every(300, Duration::from_secs(1)) // Allow bursts, average 300 per second
.ip_n_every(9, Duration::from_secs(1)) // Allow bursts, average 9 per second
.node_n_every(8, Duration::from_secs(1)) // Allow bursts, average 8 per second
.build()
.expect("The total rate limit has been specified"),
);
@ -199,6 +210,10 @@ impl Default for Config {
shutdown_after_sync: false,
topics: Vec::new(),
metrics_enabled: false,
network_id: Default::default(),
peer_db: Default::default(),
peer_manager: Default::default(),
disable_enr_network_id: false,
}
}
}

View File

@ -1,9 +1,10 @@
//! Helper functions and an extension trait for Ethereum 2 ENRs.
pub use discv5::enr::{CombinedKey, EnrBuilder};
use ssz::Encode;
use super::enr_ext::CombinedKeyExt;
use super::ENR_FILENAME;
use super::enr_ext::{CombinedKeyExt, ENR_CONTENT_KEY_NETWORK_ID};
use super::{EnrExt, ENR_FILENAME};
use crate::types::Enr;
use crate::NetworkConfig;
use discv5::enr::EnrKey;
@ -32,7 +33,9 @@ pub fn use_or_load_enr(
Ok(disk_enr) => {
// if the same node id, then we may need to update our sequence number
if local_enr.node_id() == disk_enr.node_id() {
if compare_enr(local_enr, &disk_enr) {
if compare_enr(local_enr, &disk_enr)
&& is_disk_enr_network_id_unchanged(&disk_enr, config)
{
debug!(file = ?enr_f, "ENR loaded from disk");
// the stored ENR has the same configuration, use it
*local_enr = disk_enr;
@ -94,6 +97,13 @@ pub fn create_enr_builder_from_config<T: EnrKey>(
let tcp_port = config.enr_tcp_port.unwrap_or(config.libp2p_port);
builder.tcp(tcp_port);
}
// add network identity info in ENR if not disabled
if !config.disable_enr_network_id {
builder.add_value(
ENR_CONTENT_KEY_NETWORK_ID,
&config.network_id.as_ssz_bytes(),
);
}
builder
}
@ -117,6 +127,14 @@ fn compare_enr(local_enr: &Enr, disk_enr: &Enr) -> bool {
&& (local_enr.udp().is_none() || local_enr.udp() == disk_enr.udp())
}
fn is_disk_enr_network_id_unchanged(disk_enr: &Enr, config: &NetworkConfig) -> bool {
match disk_enr.network_identity() {
Some(Ok(id)) => !config.disable_enr_network_id && id == config.network_id,
Some(Err(_)) => false,
None => config.disable_enr_network_id,
}
}
/// Loads enr from the given directory
pub fn load_enr_from_disk(dir: &Path) -> Result<Enr, String> {
let enr_f = dir.join(ENR_FILENAME);

View File

@ -2,8 +2,12 @@
use crate::{Enr, Multiaddr, PeerId};
use discv5::enr::{CombinedKey, CombinedPublicKey};
use libp2p::core::{identity::Keypair, identity::PublicKey, multiaddr::Protocol};
use shared_types::NetworkIdentity;
use ssz::Decode;
use tiny_keccak::{Hasher, Keccak};
pub(crate) const ENR_CONTENT_KEY_NETWORK_ID: &'static str = "network_identity";
/// Extend ENR for libp2p types.
pub trait EnrExt {
/// The libp2p `PeerId` for the record.
@ -24,6 +28,9 @@ pub trait EnrExt {
/// Returns any multiaddrs that contain the TCP protocol.
fn multiaddr_tcp(&self) -> Vec<Multiaddr>;
/// Returns network identity in content.
fn network_identity(&self) -> Option<Result<NetworkIdentity, ssz::DecodeError>>;
}
/// Extend ENR CombinedPublicKey for libp2p types.
@ -189,6 +196,12 @@ impl EnrExt for Enr {
}
multiaddrs
}
/// Returns network identity in content.
fn network_identity(&self) -> Option<Result<NetworkIdentity, ssz::DecodeError>> {
let value = self.get(ENR_CONTENT_KEY_NETWORK_ID)?;
Some(NetworkIdentity::from_ssz_bytes(value))
}
}
impl CombinedKeyPublicExt for CombinedPublicKey {

View File

@ -139,6 +139,7 @@ impl Discovery {
udp = ?local_enr.udp(),
tcp = ?local_enr.tcp(),
udp4_socket = ?local_enr.udp_socket(),
network_id = ?local_enr.network_identity(),
"ENR Initialised",
);
@ -158,6 +159,7 @@ impl Discovery {
ip = ?bootnode_enr.ip(),
udp = ?bootnode_enr.udp(),
tcp = ?bootnode_enr.tcp(),
network_id = ?bootnode_enr.network_identity(),
"Adding node to routing table",
);
let repr = bootnode_enr.to_string();
@ -205,13 +207,37 @@ impl Discovery {
match result {
Ok(enr) => {
debug!(
multiaddr = %original_addr.to_string(),
node_id = %enr.node_id(),
peer_id = %enr.peer_id(),
ip = ?enr.ip(),
udp = ?enr.udp(),
tcp = ?enr.tcp(),
"Adding node to routing table",
network_id = ?enr.network_identity(),
"Adding bootnode to routing table",
);
// check network identity in bootnode ENR if required
if !config.disable_enr_network_id {
match enr.network_identity() {
Some(Ok(id)) => {
if id != config.network_id {
error!(bootnode=?id, local=?config.network_id, "Bootnode network identity mismatch");
continue;
}
}
Some(Err(err)) => {
error!(?err, "Failed to decode bootnode network identity");
continue;
}
None => {
error!("Bootnode has no network identity");
continue;
}
}
}
// add bootnode into routing table
let _ = discv5.add_enr(enr).map_err(|e| {
error!(
addr = %original_addr.to_string(),
@ -401,10 +427,16 @@ impl Discovery {
// Generate a random target node id.
let random_node = NodeId::random();
// only discover nodes with same network identity
let local_network_id = self.network_globals.network_id();
let predicate = move |enr: &Enr| -> bool {
matches!(enr.network_identity(), Some(Ok(id)) if id == local_network_id)
};
// Build the future
let query_future = self
.discv5
.find_node_predicate(random_node, Box::new(|_| true), target_peers)
.find_node_predicate(random_node, Box::new(predicate), target_peers)
.map(|v| QueryResult {
query_type: query,
result: v,

View File

@ -25,6 +25,7 @@ pub mod types;
pub use config::gossip_max_size;
use std::net::SocketAddr;
use std::time::Instant;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use shared_types::TxID;
@ -73,8 +74,6 @@ impl<'de> Deserialize<'de> for PeerIdSerialized {
pub use crate::types::{error, Enr, GossipTopic, NetworkGlobals, PubsubMessage};
pub use prometheus_client;
pub use behaviour::{BehaviourEvent, Gossipsub, PeerRequestId, Request, Response};
pub use config::Config as NetworkConfig;
pub use discovery::{CombinedKeyExt, EnrExt};
@ -94,11 +93,16 @@ pub use peer_manager::{
};
pub use service::{load_private_key, Context, Libp2pEvent, Service, NETWORK_KEY_FILENAME};
/// Defines the current P2P protocol version.
/// - v1: Broadcast FindFile & AnnounceFile messages in the whole network, which caused network too heavey.
/// - v2: Publish NewFile to neighbors only and announce file via RPC message.
pub const PROTOCOL_VERSION: [u8; 3] = [0, 2, 0];
/// Application level requests sent to the network.
#[derive(Debug, Clone, Copy)]
pub enum RequestId {
Router,
Sync(SyncId),
Router(Instant),
Sync(Instant, SyncId),
}
#[derive(Debug, Clone, Copy)]
@ -155,3 +159,10 @@ pub enum NetworkMessage {
udp_socket: Option<SocketAddr>,
},
}
pub type NetworkSender = channel::metrics::Sender<NetworkMessage>;
pub type NetworkReceiver = channel::metrics::Receiver<NetworkMessage>;
pub fn new_network_channel() -> (NetworkSender, NetworkReceiver) {
channel::metrics::unbounded_channel("network")
}

View File

@ -3,10 +3,9 @@
//! Currently supported strategies:
//! - UPnP
use crate::{NetworkConfig, NetworkMessage};
use crate::{NetworkConfig, NetworkMessage, NetworkSender};
use if_addrs::get_if_addrs;
use std::net::{IpAddr, SocketAddr, SocketAddrV4};
use tokio::sync::mpsc;
/// Configuration required to construct the UPnP port mappings.
pub struct UPnPConfig {
@ -36,10 +35,7 @@ impl UPnPConfig {
}
/// Attempts to construct external port mappings with UPnP.
pub fn construct_upnp_mappings(
config: UPnPConfig,
network_send: mpsc::UnboundedSender<NetworkMessage>,
) {
pub fn construct_upnp_mappings(config: UPnPConfig, network_send: NetworkSender) {
info!("UPnP Attempting to initialise routes");
match igd::search_gateway(Default::default()) {
Err(e) => info!(error = %e, "UPnP not available"),

View File

@ -1,3 +1,8 @@
use std::time::Duration;
use duration_str::deserialize_duration;
use serde::{Deserialize, Serialize};
/// The time in seconds between re-status's peers.
pub const DEFAULT_STATUS_INTERVAL: u64 = 300;
@ -11,9 +16,14 @@ pub const DEFAULT_PING_INTERVAL_INBOUND: u64 = 20;
pub const DEFAULT_TARGET_PEERS: usize = 50;
/// Configurations for the PeerManager.
#[derive(Debug)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(default)]
pub struct Config {
/* Peer count related configurations */
/// The heartbeat performs regular updates such as updating reputations and performing discovery
/// requests. This defines the interval in seconds.
#[serde(deserialize_with = "deserialize_duration")]
pub heartbeat_interval: Duration,
/// Whether discovery is enabled.
pub discovery_enabled: bool,
/// Whether metrics are enabled.
@ -35,6 +45,7 @@ pub struct Config {
impl Default for Config {
fn default() -> Self {
Config {
heartbeat_interval: Duration::from_secs(30),
discovery_enabled: true,
metrics_enabled: false,
target_peer_count: DEFAULT_TARGET_PEERS,

View File

@ -30,10 +30,6 @@ use std::net::IpAddr;
pub mod config;
mod network_behaviour;
/// The heartbeat performs regular updates such as updating reputations and performing discovery
/// requests. This defines the interval in seconds.
const HEARTBEAT_INTERVAL: u64 = 30;
/// This is used in the pruning logic. We avoid pruning peers on sync-committees if doing so would
/// lower our peer count below this number. Instead we favour a non-uniform distribution of subnet
/// peers.
@ -105,6 +101,7 @@ impl PeerManager {
network_globals: Arc<NetworkGlobals>,
) -> error::Result<Self> {
let config::Config {
heartbeat_interval,
discovery_enabled,
metrics_enabled,
target_peer_count,
@ -114,7 +111,7 @@ impl PeerManager {
} = cfg;
// Set up the peer manager heartbeat interval
let heartbeat = tokio::time::interval(tokio::time::Duration::from_secs(HEARTBEAT_INTERVAL));
let heartbeat = tokio::time::interval(heartbeat_interval);
Ok(PeerManager {
network_globals,
@ -460,6 +457,7 @@ impl PeerManager {
Protocol::Goodbye => PeerAction::LowToleranceError,
Protocol::Status => PeerAction::LowToleranceError,
Protocol::DataByHash => PeerAction::MidToleranceError,
Protocol::AnnounceFile => PeerAction::MidToleranceError,
Protocol::GetChunks => PeerAction::MidToleranceError,
},
},
@ -474,6 +472,7 @@ impl PeerManager {
Protocol::Goodbye => return,
Protocol::Status => PeerAction::LowToleranceError,
Protocol::DataByHash => return,
Protocol::AnnounceFile => return,
Protocol::GetChunks => return,
}
}
@ -488,6 +487,7 @@ impl PeerManager {
Protocol::Goodbye => return,
Protocol::Status => return,
Protocol::DataByHash => PeerAction::MidToleranceError,
Protocol::AnnounceFile => PeerAction::MidToleranceError,
Protocol::GetChunks => PeerAction::MidToleranceError,
},
},

View File

@ -3,13 +3,15 @@ use crate::{
multiaddr::{Multiaddr, Protocol},
Enr, Gossipsub, PeerId,
};
use duration_str::deserialize_duration;
use peer_info::{ConnectionDirection, PeerConnectionStatus, PeerInfo};
use rand::seq::SliceRandom;
use score::{PeerAction, ReportSource, Score, ScoreState};
use std::cmp::Ordering;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::net::{IpAddr, SocketAddr};
use std::time::Instant;
use std::{cmp::Ordering, time::Duration};
use sync_status::SyncStatus;
pub mod client;
@ -17,21 +19,41 @@ pub mod peer_info;
pub mod score;
pub mod sync_status;
/// Max number of disconnected nodes to remember.
const MAX_DC_PEERS: usize = 500;
/// The maximum number of banned nodes to remember.
pub const MAX_BANNED_PEERS: usize = 1000;
/// We ban an IP if there are more than `BANNED_PEERS_PER_IP_THRESHOLD` banned peers with this IP.
const BANNED_PEERS_PER_IP_THRESHOLD: usize = 5;
/// Relative factor of peers that are allowed to have a negative gossipsub score without penalizing
/// them in lighthouse.
const ALLOWED_NEGATIVE_GOSSIPSUB_FACTOR: f32 = 0.1;
/// The time we allow peers to be in the dialing state in our PeerDb before we revert them to a
/// disconnected state.
const DIAL_TIMEOUT: u64 = 15;
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(default)]
pub struct PeerDBConfig {
/// The maximum number of disconnected nodes to remember.
pub max_disconnected_peers: usize,
/// The maximum number of banned nodes to remember.
pub max_banned_peers: usize,
/// We ban an IP if there are more than `BANNED_PEERS_PER_IP_THRESHOLD` banned peers with this IP.
pub banned_peers_per_ip_threshold: usize,
/// Relative factor of peers that are allowed to have a negative gossipsub score without penalizing them in lighthouse.
pub allowed_negative_gossipsub_factor: f32,
/// The time we allow peers to be in the dialing state in our PeerDb before we revert them to a disconnected state.
#[serde(deserialize_with = "deserialize_duration")]
pub dail_timeout: Duration,
}
impl Default for PeerDBConfig {
fn default() -> Self {
Self {
max_disconnected_peers: 500,
max_banned_peers: 1000,
banned_peers_per_ip_threshold: 5,
allowed_negative_gossipsub_factor: 0.1,
dail_timeout: Duration::from_secs(15),
}
}
}
/// Storage of known peers, their reputation and information
pub struct PeerDB {
config: PeerDBConfig,
/// The collection of known connected peers, their status and reputation
peers: HashMap<PeerId, PeerInfo>,
/// The number of disconnected nodes in the database.
@ -41,13 +63,14 @@ pub struct PeerDB {
}
impl PeerDB {
pub fn new(trusted_peers: Vec<PeerId>) -> Self {
pub fn new(config: PeerDBConfig, trusted_peers: Vec<PeerId>) -> Self {
// Initialize the peers hashmap with trusted peers
let peers = trusted_peers
.into_iter()
.map(|peer_id| (peer_id, PeerInfo::trusted_peer_info()))
.collect();
Self {
config,
disconnected_peers: 0,
banned_peers_count: BannedPeersCount::default(),
peers,
@ -316,9 +339,7 @@ impl PeerDB {
.iter()
.filter_map(|(peer_id, info)| {
if let PeerConnectionStatus::Dialing { since } = info.connection_status() {
if (*since) + std::time::Duration::from_secs(DIAL_TIMEOUT)
< std::time::Instant::now()
{
if (*since) + self.config.dail_timeout < std::time::Instant::now() {
return Some(*peer_id);
}
}
@ -422,7 +443,7 @@ impl PeerDB {
peers.sort_unstable_by(|(.., s1), (.., s2)| s2.partial_cmp(s1).unwrap_or(Ordering::Equal));
let mut to_ignore_negative_peers =
(target_peers as f32 * ALLOWED_NEGATIVE_GOSSIPSUB_FACTOR).ceil() as usize;
(target_peers as f32 * self.config.allowed_negative_gossipsub_factor).ceil() as usize;
for (peer_id, info, score) in peers {
let previous_state = info.score_state();
@ -946,11 +967,11 @@ impl PeerDB {
let excess_peers = self
.banned_peers_count
.banned_peers()
.saturating_sub(MAX_BANNED_PEERS);
.saturating_sub(self.config.max_banned_peers);
let mut unbanned_peers = Vec::with_capacity(excess_peers);
// Remove excess banned peers
while self.banned_peers_count.banned_peers() > MAX_BANNED_PEERS {
while self.banned_peers_count.banned_peers() > self.config.max_banned_peers {
if let Some((to_drop, unbanned_ips)) = if let Some((id, info, _)) = self
.peers
.iter()
@ -982,7 +1003,7 @@ impl PeerDB {
}
// Remove excess disconnected peers
while self.disconnected_peers > MAX_DC_PEERS {
while self.disconnected_peers > self.config.max_disconnected_peers {
if let Some(to_drop) = self
.peers
.iter()
@ -1210,7 +1231,7 @@ mod tests {
}
fn get_db() -> PeerDB {
PeerDB::new(vec![])
PeerDB::new(PeerDBConfig::default(), vec![])
}
#[test]
@ -1265,7 +1286,7 @@ mod tests {
use std::collections::BTreeMap;
let mut peer_list = BTreeMap::new();
for id in 0..MAX_DC_PEERS + 1 {
for id in 0..pdb.config.max_disconnected_peers + 1 {
let new_peer = PeerId::random();
pdb.connect_ingoing(&new_peer, "/ip4/0.0.0.0".parse().unwrap(), None);
peer_list.insert(id, new_peer);
@ -1276,11 +1297,15 @@ mod tests {
pdb.inject_disconnect(p);
// Allow the timing to update correctly
}
assert_eq!(pdb.disconnected_peers, MAX_DC_PEERS);
assert_eq!(pdb.disconnected_peers, pdb.config.max_disconnected_peers);
assert_eq!(pdb.disconnected_peers, pdb.disconnected_peers().count());
// Only the oldest peer should have been removed
for (id, peer_id) in peer_list.iter().rev().take(MAX_DC_PEERS) {
for (id, peer_id) in peer_list
.iter()
.rev()
.take(pdb.config.max_disconnected_peers)
{
println!("Testing id {}", id);
assert!(
pdb.peer_info(peer_id).is_some(),
@ -1301,7 +1326,7 @@ mod tests {
use std::collections::BTreeMap;
let mut peer_list = BTreeMap::new();
for id in 0..MAX_DC_PEERS + 20 {
for id in 0..pdb.config.max_disconnected_peers + 20 {
let new_peer = PeerId::random();
pdb.connect_ingoing(&new_peer, "/ip4/0.0.0.0".parse().unwrap(), None);
peer_list.insert(id, new_peer);
@ -1314,7 +1339,7 @@ mod tests {
println!("{}", pdb.disconnected_peers);
peer_list.clear();
for id in 0..MAX_DC_PEERS + 20 {
for id in 0..pdb.config.max_disconnected_peers + 20 {
let new_peer = PeerId::random();
pdb.connect_ingoing(&new_peer, "/ip4/0.0.0.0".parse().unwrap(), None);
peer_list.insert(id, new_peer);
@ -1345,7 +1370,7 @@ mod tests {
fn test_disconnected_are_bounded() {
let mut pdb = get_db();
for _ in 0..MAX_DC_PEERS + 1 {
for _ in 0..pdb.config.max_disconnected_peers + 1 {
let p = PeerId::random();
pdb.connect_ingoing(&p, "/ip4/0.0.0.0".parse().unwrap(), None);
}
@ -1356,14 +1381,14 @@ mod tests {
}
assert_eq!(pdb.disconnected_peers, pdb.disconnected_peers().count());
assert_eq!(pdb.disconnected_peers, MAX_DC_PEERS);
assert_eq!(pdb.disconnected_peers, pdb.config.max_disconnected_peers);
}
#[test]
fn test_banned_are_bounded() {
let mut pdb = get_db();
for _ in 0..MAX_BANNED_PEERS + 1 {
for _ in 0..pdb.config.max_banned_peers + 1 {
let p = PeerId::random();
pdb.connect_ingoing(&p, "/ip4/0.0.0.0".parse().unwrap(), None);
}
@ -1374,7 +1399,10 @@ mod tests {
pdb.inject_disconnect(&p);
}
assert_eq!(pdb.banned_peers_count.banned_peers(), MAX_BANNED_PEERS);
assert_eq!(
pdb.banned_peers_count.banned_peers(),
pdb.config.max_banned_peers
);
}
#[test]
@ -1908,7 +1936,7 @@ mod tests {
#[allow(clippy::float_cmp)]
fn test_trusted_peers_score() {
let trusted_peer = PeerId::random();
let mut pdb: PeerDB = PeerDB::new(vec![trusted_peer]);
let mut pdb: PeerDB = PeerDB::new(PeerDBConfig::default(), vec![trusted_peer]);
pdb.connect_ingoing(&trusted_peer, "/ip4/0.0.0.0".parse().unwrap(), None);

View File

@ -159,6 +159,7 @@ impl Encoder<OutboundRequest> for SSZSnappyOutboundCodec {
OutboundRequest::Goodbye(req) => req.as_ssz_bytes(),
OutboundRequest::Ping(req) => req.as_ssz_bytes(),
OutboundRequest::DataByHash(req) => req.hashes.as_ssz_bytes(),
OutboundRequest::AnnounceFile(req) => req.as_ssz_bytes(),
OutboundRequest::GetChunks(req) => req.as_ssz_bytes(),
};
// SSZ encoded bytes should be within `max_packet_size`
@ -346,6 +347,9 @@ fn handle_v1_request(
Protocol::DataByHash => Ok(Some(InboundRequest::DataByHash(DataByHashRequest {
hashes: VariableList::from_ssz_bytes(decoded_buffer)?,
}))),
Protocol::AnnounceFile => Ok(Some(InboundRequest::AnnounceFile(
FileAnnouncement::from_ssz_bytes(decoded_buffer)?,
))),
Protocol::GetChunks => Ok(Some(InboundRequest::GetChunks(
GetChunksRequest::from_ssz_bytes(decoded_buffer)?,
))),
@ -373,6 +377,10 @@ fn handle_v1_response(
Protocol::DataByHash => Ok(Some(RPCResponse::DataByHash(Box::new(
ZgsData::from_ssz_bytes(decoded_buffer)?,
)))),
// This case should be unreachable as `AnnounceFile` has no response.
Protocol::AnnounceFile => Err(RPCError::InvalidData(
"AnnounceFile RPC message has no valid response".to_string(),
)),
Protocol::GetChunks => Ok(Some(RPCResponse::Chunks(
ChunkArrayWithProof::from_ssz_bytes(decoded_buffer)?,
))),
@ -391,7 +399,9 @@ mod tests {
use std::io::Write;
fn status_message() -> StatusMessage {
StatusMessage { data: 1 }
StatusMessage {
data: Default::default(),
}
}
fn ping_message() -> Ping {
@ -560,7 +570,10 @@ mod tests {
assert_eq!(stream_identifier.len(), 10);
// Status message is 84 bytes uncompressed. `max_compressed_len` is 32 + 84 + 84/6 = 130.
let status_message_bytes = StatusMessage { data: 1 }.as_ssz_bytes();
let status_message_bytes = StatusMessage {
data: Default::default(),
}
.as_ssz_bytes();
let mut uvi_codec: Uvi<usize> = Uvi::default();
let mut dst = BytesMut::with_capacity(1024);

View File

@ -9,7 +9,7 @@ use ssz_types::{
use std::ops::Deref;
use strum::IntoStaticStr;
pub type Hash256 = ethereum_types::H256;
use shared_types::{ChunkArrayWithProof, TxID};
use shared_types::{ChunkArrayWithProof, NetworkIdentity, TxID};
pub use ssz_types::{typenum, typenum::Unsigned, BitList, BitVector, FixedVector};
@ -71,7 +71,7 @@ impl ToString for ErrorType {
/// The STATUS request/response handshake message.
#[derive(Encode, Decode, Clone, Debug, PartialEq, Eq)]
pub struct StatusMessage {
pub data: u64,
pub data: NetworkIdentity,
}
/// The PING request/response message.
@ -178,6 +178,14 @@ pub struct DataByHashRequest {
pub hashes: VariableList<Hash256, MaxRequestBlocks>,
}
// The message of `AnnounceFile` RPC message.
#[derive(Encode, Decode, Clone, Debug, PartialEq, Eq)]
pub struct FileAnnouncement {
pub tx_id: TxID,
pub num_shard: usize,
pub shard_id: usize,
}
/// Request a chunk array from a peer.
#[derive(Encode, Decode, Clone, Debug, PartialEq, Eq)]
pub struct GetChunksRequest {

View File

@ -118,6 +118,7 @@ impl<Id: ReqId> RPC<Id> {
.n_every(Protocol::Status, 5, Duration::from_secs(15))
.one_every(Protocol::Goodbye, Duration::from_secs(10))
.n_every(Protocol::DataByHash, 128, Duration::from_secs(10))
.n_every(Protocol::AnnounceFile, 256, Duration::from_secs(10))
.n_every(Protocol::GetChunks, 4096, Duration::from_secs(10))
.build()
.expect("Configuration parameters are valid");

View File

@ -34,6 +34,7 @@ pub enum OutboundRequest {
Goodbye(GoodbyeReason),
Ping(Ping),
DataByHash(DataByHashRequest),
AnnounceFile(FileAnnouncement),
GetChunks(GetChunksRequest),
}
@ -72,6 +73,11 @@ impl OutboundRequest {
Version::V1,
Encoding::SSZSnappy,
)],
OutboundRequest::AnnounceFile(_) => vec![ProtocolId::new(
Protocol::AnnounceFile,
Version::V1,
Encoding::SSZSnappy,
)],
OutboundRequest::GetChunks(_) => vec![ProtocolId::new(
Protocol::GetChunks,
Version::V1,
@ -89,6 +95,7 @@ impl OutboundRequest {
OutboundRequest::Goodbye(_) => 0,
OutboundRequest::Ping(_) => 1,
OutboundRequest::DataByHash(req) => req.hashes.len() as u64,
OutboundRequest::AnnounceFile(_) => 0,
OutboundRequest::GetChunks(_) => 1,
}
}
@ -100,6 +107,7 @@ impl OutboundRequest {
OutboundRequest::Goodbye(_) => Protocol::Goodbye,
OutboundRequest::Ping(_) => Protocol::Ping,
OutboundRequest::DataByHash(_) => Protocol::DataByHash,
OutboundRequest::AnnounceFile(_) => Protocol::AnnounceFile,
OutboundRequest::GetChunks(_) => Protocol::GetChunks,
}
}
@ -114,6 +122,7 @@ impl OutboundRequest {
OutboundRequest::Status(_) => unreachable!(),
OutboundRequest::Goodbye(_) => unreachable!(),
OutboundRequest::Ping(_) => unreachable!(),
OutboundRequest::AnnounceFile(_) => unreachable!(),
OutboundRequest::GetChunks(_) => unreachable!(),
}
}
@ -170,6 +179,9 @@ impl std::fmt::Display for OutboundRequest {
OutboundRequest::DataByHash(req) => {
write!(f, "Data by hash: {:?}", req)
}
OutboundRequest::AnnounceFile(req) => {
write!(f, "AnnounceFile: {:?}", req)
}
OutboundRequest::GetChunks(req) => {
write!(f, "GetChunks: {:?}", req)
}

View File

@ -91,6 +91,8 @@ pub enum Protocol {
/// TODO
DataByHash,
/// The file announce protocol.
AnnounceFile,
/// The Chunk sync protocol.
GetChunks,
}
@ -115,6 +117,7 @@ impl std::fmt::Display for Protocol {
Protocol::Goodbye => "goodbye",
Protocol::Ping => "ping",
Protocol::DataByHash => "data_by_hash",
Protocol::AnnounceFile => "announce_file",
Protocol::GetChunks => "get_chunks",
};
f.write_str(repr)
@ -155,6 +158,7 @@ impl UpgradeInfo for RPCProtocol {
ProtocolId::new(Protocol::Goodbye, Version::V1, Encoding::SSZSnappy),
ProtocolId::new(Protocol::Ping, Version::V1, Encoding::SSZSnappy),
ProtocolId::new(Protocol::DataByHash, Version::V1, Encoding::SSZSnappy),
ProtocolId::new(Protocol::AnnounceFile, Version::V1, Encoding::SSZSnappy),
ProtocolId::new(Protocol::GetChunks, Version::V1, Encoding::SSZSnappy),
]
}
@ -216,6 +220,10 @@ impl ProtocolId {
// TODO
RpcLimits::new(1, *DATA_BY_HASH_REQUEST_MAX)
}
Protocol::AnnounceFile => RpcLimits::new(
<FileAnnouncement as Encode>::ssz_fixed_len(),
<FileAnnouncement as Encode>::ssz_fixed_len(),
),
Protocol::GetChunks => RpcLimits::new(
<GetChunksRequest as Encode>::ssz_fixed_len(),
<GetChunksRequest as Encode>::ssz_fixed_len(),
@ -243,6 +251,7 @@ impl ProtocolId {
<ZgsData as Encode>::ssz_fixed_len(),
),
Protocol::AnnounceFile => RpcLimits::new(0, 0), // AnnounceFile request has no response
Protocol::GetChunks => RpcLimits::new(*CHUNKS_RESPONSE_MIN, *CHUNKS_RESPONSE_MAX),
}
}
@ -325,6 +334,7 @@ pub enum InboundRequest {
Goodbye(GoodbyeReason),
Ping(Ping),
DataByHash(DataByHashRequest),
AnnounceFile(FileAnnouncement),
GetChunks(GetChunksRequest),
}
@ -363,6 +373,11 @@ impl InboundRequest {
Version::V1,
Encoding::SSZSnappy,
)],
InboundRequest::AnnounceFile(_) => vec![ProtocolId::new(
Protocol::AnnounceFile,
Version::V1,
Encoding::SSZSnappy,
)],
InboundRequest::GetChunks(_) => vec![ProtocolId::new(
Protocol::GetChunks,
Version::V1,
@ -380,6 +395,7 @@ impl InboundRequest {
InboundRequest::Goodbye(_) => 0,
InboundRequest::DataByHash(req) => req.hashes.len() as u64,
InboundRequest::Ping(_) => 1,
InboundRequest::AnnounceFile(_) => 0,
InboundRequest::GetChunks(_) => 1,
}
}
@ -391,6 +407,7 @@ impl InboundRequest {
InboundRequest::Goodbye(_) => Protocol::Goodbye,
InboundRequest::Ping(_) => Protocol::Ping,
InboundRequest::DataByHash(_) => Protocol::DataByHash,
InboundRequest::AnnounceFile(_) => Protocol::AnnounceFile,
InboundRequest::GetChunks(_) => Protocol::GetChunks,
}
}
@ -405,6 +422,7 @@ impl InboundRequest {
InboundRequest::Status(_) => unreachable!(),
InboundRequest::Goodbye(_) => unreachable!(),
InboundRequest::Ping(_) => unreachable!(),
InboundRequest::AnnounceFile(_) => unreachable!(),
InboundRequest::GetChunks(_) => unreachable!(),
}
}
@ -523,6 +541,9 @@ impl std::fmt::Display for InboundRequest {
InboundRequest::DataByHash(req) => {
write!(f, "Data by hash: {:?}", req)
}
InboundRequest::AnnounceFile(req) => {
write!(f, "Announce File: {:?}", req)
}
InboundRequest::GetChunks(req) => {
write!(f, "Get Chunks: {:?}", req)
}

View File

@ -68,6 +68,8 @@ pub struct RPCRateLimiter {
status_rl: Limiter<PeerId>,
/// DataByHash rate limiter.
data_by_hash_rl: Limiter<PeerId>,
/// AnnounceFile rate limiter.
announce_file_rl: Limiter<PeerId>,
/// GetChunks rate limiter.
get_chunks_rl: Limiter<PeerId>,
}
@ -91,6 +93,8 @@ pub struct RPCRateLimiterBuilder {
status_quota: Option<Quota>,
/// Quota for the DataByHash protocol.
data_by_hash_quota: Option<Quota>,
/// Quota for the AnnounceFile protocol.
announce_file_quota: Option<Quota>,
/// Quota for the GetChunks protocol.
get_chunks_quota: Option<Quota>,
}
@ -109,6 +113,7 @@ impl RPCRateLimiterBuilder {
Protocol::Status => self.status_quota = q,
Protocol::Goodbye => self.goodbye_quota = q,
Protocol::DataByHash => self.data_by_hash_quota = q,
Protocol::AnnounceFile => self.announce_file_quota = q,
Protocol::GetChunks => self.get_chunks_quota = q,
}
self
@ -145,6 +150,9 @@ impl RPCRateLimiterBuilder {
let data_by_hash_quota = self
.data_by_hash_quota
.ok_or("DataByHash quota not specified")?;
let announce_file_quota = self
.announce_file_quota
.ok_or("AnnounceFile quota not specified")?;
let get_chunks_quota = self
.get_chunks_quota
.ok_or("GetChunks quota not specified")?;
@ -154,6 +162,7 @@ impl RPCRateLimiterBuilder {
let status_rl = Limiter::from_quota(status_quota)?;
let goodbye_rl = Limiter::from_quota(goodbye_quota)?;
let data_by_hash_rl = Limiter::from_quota(data_by_hash_quota)?;
let announce_file_rl = Limiter::from_quota(announce_file_quota)?;
let get_chunks_rl = Limiter::from_quota(get_chunks_quota)?;
// check for peers to prune every 30 seconds, starting in 30 seconds
@ -166,6 +175,7 @@ impl RPCRateLimiterBuilder {
status_rl,
goodbye_rl,
data_by_hash_rl,
announce_file_rl,
get_chunks_rl,
init_time: Instant::now(),
})
@ -210,6 +220,7 @@ impl RPCRateLimiter {
Protocol::Status => &mut self.status_rl,
Protocol::Goodbye => &mut self.goodbye_rl,
Protocol::DataByHash => &mut self.data_by_hash_rl,
Protocol::AnnounceFile => &mut self.announce_file_rl,
Protocol::GetChunks => &mut self.get_chunks_rl,
};
check(limiter)

View File

@ -4,7 +4,7 @@ use crate::discovery::enr;
use crate::multiaddr::Protocol;
use crate::rpc::{GoodbyeReason, RPCResponseErrorCode, ReqId};
use crate::types::{error, GossipKind};
use crate::{EnrExt, NetworkMessage};
use crate::{EnrExt, NetworkSender};
use crate::{NetworkConfig, NetworkGlobals, PeerAction, ReportSource};
use futures::prelude::*;
use libp2p::core::{
@ -21,7 +21,6 @@ use std::io::prelude::*;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc::UnboundedSender;
use crate::peer_manager::{MIN_OUTBOUND_ONLY_FACTOR, PEER_EXCESS_FACTOR, PRIORITY_PEER_EXCESS};
@ -60,7 +59,7 @@ pub struct Context<'a> {
impl<AppReqId: ReqId> Service<AppReqId> {
pub async fn new(
executor: task_executor::TaskExecutor,
network_sender: UnboundedSender<NetworkMessage>,
network_sender: NetworkSender,
ctx: Context<'_>,
) -> error::Result<(Arc<NetworkGlobals>, Keypair, Self)> {
trace!("Libp2p Service starting");
@ -84,6 +83,8 @@ impl<AppReqId: ReqId> Service<AppReqId> {
.iter()
.map(|x| PeerId::from(x.clone()))
.collect(),
config.peer_db,
config.network_id.clone(),
));
// try and construct UPnP port mappings if required.

View File

@ -1,9 +1,11 @@
//! A collection of variables that are accessible outside of the network thread itself.
use crate::peer_manager::peerdb::PeerDB;
use crate::peer_manager::peerdb::PeerDBConfig;
use crate::Client;
use crate::EnrExt;
use crate::{Enr, GossipTopic, Multiaddr, PeerId};
use parking_lot::RwLock;
use shared_types::NetworkIdentity;
use std::collections::HashSet;
use std::sync::atomic::{AtomicU16, Ordering};
@ -22,18 +24,29 @@ pub struct NetworkGlobals {
pub peers: RwLock<PeerDB>,
/// The current gossipsub topic subscriptions.
pub gossipsub_subscriptions: RwLock<HashSet<GossipTopic>>,
/// The id of the storage network.
pub network_id: RwLock<NetworkIdentity>,
}
impl NetworkGlobals {
pub fn new(enr: Enr, tcp_port: u16, udp_port: u16, trusted_peers: Vec<PeerId>) -> Self {
pub fn new(
enr: Enr,
tcp_port: u16,
udp_port: u16,
trusted_peers: Vec<PeerId>,
peer_db_config: PeerDBConfig,
network_id: NetworkIdentity,
) -> Self {
NetworkGlobals {
local_enr: RwLock::new(enr.clone()),
peer_id: RwLock::new(enr.peer_id()),
listen_multiaddrs: RwLock::new(Vec::new()),
listen_port_tcp: AtomicU16::new(tcp_port),
listen_port_udp: AtomicU16::new(udp_port),
peers: RwLock::new(PeerDB::new(trusted_peers)),
peers: RwLock::new(PeerDB::new(peer_db_config, trusted_peers)),
gossipsub_subscriptions: RwLock::new(HashSet::new()),
network_id: RwLock::new(network_id),
}
}
@ -63,6 +76,10 @@ impl NetworkGlobals {
self.listen_port_udp.load(Ordering::Relaxed)
}
pub fn network_id(&self) -> NetworkIdentity {
self.network_id.read().clone()
}
/// Returns the number of libp2p connected peers.
pub fn connected_peers(&self) -> usize {
self.peers.read().connected_peer_ids().count()
@ -95,6 +112,13 @@ impl NetworkGlobals {
let enr_key: discv5::enr::CombinedKey =
discv5::enr::CombinedKey::from_libp2p(&keypair).unwrap();
let enr = discv5::enr::EnrBuilder::new("v4").build(&enr_key).unwrap();
NetworkGlobals::new(enr, 9000, 9000, vec![])
NetworkGlobals::new(
enr,
9000,
9000,
vec![],
Default::default(),
Default::default(),
)
}
}

View File

@ -7,7 +7,7 @@ pub type Enr = discv5::enr::Enr<discv5::enr::CombinedKey>;
pub use globals::NetworkGlobals;
pub use pubsub::{
AnnounceChunks, AnnounceFile, AnnounceShardConfig, FindChunks, FindFile, HasSignature,
AnnounceChunks, AnnounceFile, AnnounceShardConfig, FindChunks, FindFile, HasSignature, NewFile,
PubsubMessage, SignedAnnounceChunks, SignedAnnounceFile, SignedAnnounceShardConfig,
SignedMessage, SnappyTransform,
};

View File

@ -114,9 +114,22 @@ impl ssz::Decode for WrappedPeerId {
}
}
/// Published when file uploaded or completed to sync from other peers.
#[derive(Debug, Clone, PartialEq, Eq, Encode, Decode)]
pub struct NewFile {
pub tx_id: TxID,
pub num_shard: usize,
pub shard_id: usize,
pub timestamp: u32,
}
#[derive(Debug, Clone, PartialEq, Eq, Encode, Decode)]
pub struct FindFile {
pub tx_id: TxID,
pub num_shard: usize,
pub shard_id: usize,
/// Indicates whether publish to neighboar nodes only.
pub neighbors_only: bool,
pub timestamp: u32,
}
@ -130,7 +143,7 @@ pub struct FindChunks {
#[derive(Debug, Clone, PartialEq, Eq, Hash, Encode, Decode)]
pub struct AnnounceFile {
pub tx_id: TxID,
pub tx_ids: Vec<TxID>,
pub num_shard: usize,
pub shard_id: usize,
pub peer_id: WrappedPeerId,
@ -200,12 +213,15 @@ pub type SignedAnnounceFile = SignedMessage<AnnounceFile>;
pub type SignedAnnounceShardConfig = SignedMessage<AnnounceShardConfig>;
pub type SignedAnnounceChunks = SignedMessage<AnnounceChunks>;
type SignedAnnounceFiles = Vec<SignedAnnounceFile>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PubsubMessage {
ExampleMessage(u64),
NewFile(NewFile),
FindFile(FindFile),
FindChunks(FindChunks),
AnnounceFile(SignedAnnounceFile),
AnnounceFile(Vec<SignedAnnounceFile>),
AnnounceShardConfig(SignedAnnounceShardConfig),
AnnounceChunks(SignedAnnounceChunks),
}
@ -281,6 +297,7 @@ impl PubsubMessage {
pub fn kind(&self) -> GossipKind {
match self {
PubsubMessage::ExampleMessage(_) => GossipKind::Example,
PubsubMessage::NewFile(_) => GossipKind::NewFile,
PubsubMessage::FindFile(_) => GossipKind::FindFile,
PubsubMessage::FindChunks(_) => GossipKind::FindChunks,
PubsubMessage::AnnounceFile(_) => GossipKind::AnnounceFile,
@ -307,6 +324,9 @@ impl PubsubMessage {
GossipKind::Example => Ok(PubsubMessage::ExampleMessage(
u64::from_ssz_bytes(data).map_err(|e| format!("{:?}", e))?,
)),
GossipKind::NewFile => Ok(PubsubMessage::NewFile(
NewFile::from_ssz_bytes(data).map_err(|e| format!("{:?}", e))?,
)),
GossipKind::FindFile => Ok(PubsubMessage::FindFile(
FindFile::from_ssz_bytes(data).map_err(|e| format!("{:?}", e))?,
)),
@ -314,7 +334,8 @@ impl PubsubMessage {
FindChunks::from_ssz_bytes(data).map_err(|e| format!("{:?}", e))?,
)),
GossipKind::AnnounceFile => Ok(PubsubMessage::AnnounceFile(
SignedAnnounceFile::from_ssz_bytes(data).map_err(|e| format!("{:?}", e))?,
SignedAnnounceFiles::from_ssz_bytes(data)
.map_err(|e| format!("{:?}", e))?,
)),
GossipKind::AnnounceChunks => Ok(PubsubMessage::AnnounceChunks(
SignedAnnounceChunks::from_ssz_bytes(data)
@ -338,6 +359,7 @@ impl PubsubMessage {
// messages for us.
match &self {
PubsubMessage::ExampleMessage(data) => data.as_ssz_bytes(),
PubsubMessage::NewFile(data) => data.as_ssz_bytes(),
PubsubMessage::FindFile(data) => data.as_ssz_bytes(),
PubsubMessage::FindChunks(data) => data.as_ssz_bytes(),
PubsubMessage::AnnounceFile(data) => data.as_ssz_bytes(),
@ -353,6 +375,9 @@ impl std::fmt::Display for PubsubMessage {
PubsubMessage::ExampleMessage(msg) => {
write!(f, "Example message: {}", msg)
}
PubsubMessage::NewFile(msg) => {
write!(f, "NewFile message: {:?}", msg)
}
PubsubMessage::FindFile(msg) => {
write!(f, "FindFile message: {:?}", msg)
}

View File

@ -8,13 +8,15 @@ use strum::AsRefStr;
pub const TOPIC_PREFIX: &str = "eth2";
pub const SSZ_SNAPPY_ENCODING_POSTFIX: &str = "ssz_snappy";
pub const EXAMPLE_TOPIC: &str = "example";
pub const NEW_FILE_TOPIC: &str = "new_file";
pub const FIND_FILE_TOPIC: &str = "find_file";
pub const FIND_CHUNKS_TOPIC: &str = "find_chunks";
pub const ANNOUNCE_FILE_TOPIC: &str = "announce_file";
pub const ANNOUNCE_CHUNKS_TOPIC: &str = "announce_chunks";
pub const ANNOUNCE_SHARD_CONFIG_TOPIC: &str = "announce_shard_config";
pub const CORE_TOPICS: [GossipKind; 4] = [
pub const CORE_TOPICS: [GossipKind; 5] = [
GossipKind::NewFile,
GossipKind::FindFile,
GossipKind::FindChunks,
GossipKind::AnnounceFile,
@ -37,6 +39,7 @@ pub struct GossipTopic {
#[strum(serialize_all = "snake_case")]
pub enum GossipKind {
Example,
NewFile,
FindFile,
FindChunks,
AnnounceFile,
@ -77,6 +80,7 @@ impl GossipTopic {
let kind = match topic_parts[2] {
EXAMPLE_TOPIC => GossipKind::Example,
NEW_FILE_TOPIC => GossipKind::NewFile,
FIND_FILE_TOPIC => GossipKind::FindFile,
FIND_CHUNKS_TOPIC => GossipKind::FindChunks,
ANNOUNCE_FILE_TOPIC => GossipKind::AnnounceFile,
@ -106,6 +110,7 @@ impl From<GossipTopic> for String {
let kind = match topic.kind {
GossipKind::Example => EXAMPLE_TOPIC,
GossipKind::NewFile => NEW_FILE_TOPIC,
GossipKind::FindFile => FIND_FILE_TOPIC,
GossipKind::FindChunks => FIND_CHUNKS_TOPIC,
GossipKind::AnnounceFile => ANNOUNCE_FILE_TOPIC,
@ -125,6 +130,7 @@ impl std::fmt::Display for GossipTopic {
let kind = match self.kind {
GossipKind::Example => EXAMPLE_TOPIC,
GossipKind::NewFile => NEW_FILE_TOPIC,
GossipKind::FindFile => FIND_FILE_TOPIC,
GossipKind::FindChunks => FIND_CHUNKS_TOPIC,
GossipKind::AnnounceFile => ANNOUNCE_FILE_TOPIC,

View File

@ -1,6 +1,7 @@
#![cfg(test)]
use libp2p::gossipsub::GossipsubConfigBuilder;
use network::new_network_channel;
use network::Enr;
use network::EnrExt;
use network::Multiaddr;
@ -22,7 +23,6 @@ pub mod swarm;
type ReqId = usize;
use tempfile::Builder as TempBuilder;
use tokio::sync::mpsc::unbounded_channel;
#[allow(unused)]
pub struct Libp2pInstance(LibP2PService<ReqId>, exit_future::Signal);
@ -72,7 +72,7 @@ pub async fn build_libp2p_instance(rt: Weak<Runtime>, boot_nodes: Vec<Enr>) -> L
let (shutdown_tx, _) = futures::channel::mpsc::channel(1);
let executor = task_executor::TaskExecutor::new(rt, exit, shutdown_tx);
let libp2p_context = network::Context { config: &config };
let (sender, _) = unbounded_channel();
let (sender, _) = new_network_channel();
Libp2pInstance(
LibP2PService::new(executor, sender, libp2p_context)
.await

View File

@ -9,7 +9,7 @@ use common::{
swarm,
};
use network::{
peer_manager::{self, config::Config, PeerManagerEvent},
peer_manager::{config::Config, peerdb::PeerDBConfig, PeerManagerEvent},
NetworkGlobals, PeerAction, PeerInfo, PeerManager, ReportSource,
};
@ -101,7 +101,7 @@ async fn banned_peers_consistency() {
};
let excess_banned_peers = 15;
let peers_to_ban = peer_manager::peerdb::MAX_BANNED_PEERS + excess_banned_peers;
let peers_to_ban = PeerDBConfig::default().max_banned_peers + excess_banned_peers;
// Build all the dummy peers needed.
let (mut swarm_pool, peers) = {

View File

@ -23,10 +23,14 @@ fn test_status_rpc() {
let (mut sender, mut receiver) = common::build_node_pair(Arc::downgrade(&rt)).await;
// Dummy STATUS RPC message
let rpc_request = Request::Status(StatusMessage { data: 2 });
let rpc_request = Request::Status(StatusMessage {
data: Default::default(),
});
// Dummy STATUS RPC message
let rpc_response = Response::Status(StatusMessage { data: 3 });
let rpc_response = Response::Status(StatusMessage {
data: Default::default(),
});
// build the sender future
let sender_future = async {

View File

@ -11,4 +11,8 @@ anyhow = "1.0.86"
tokio = "1.37.0"
rand = "0.8.5"
task_executor = { path = "../../common/task_executor" }
tracing = "0.1.40"
tracing = "0.1.40"
ethereum-types = "0.14.1"
contract-interface = { path = "../../common/contract-interface" }
ethers = "^2"
zgs_spec = { path = "../../common/spec" }

View File

@ -1,40 +1,64 @@
use anyhow::Result;
use anyhow::{bail, Result};
use contract_interface::ChunkLinearReward;
use ethereum_types::Address;
use ethers::prelude::{Http, Provider};
use ethers::providers::{HttpRateLimitRetryPolicy, RetryClient, RetryClientBuilder};
use miner::MinerMessage;
use rand::Rng;
use std::cmp::Ordering;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use storage::config::{ShardConfig, SHARD_CONFIG_KEY};
use storage::log_store::log_manager::{DATA_DB_KEY, PORA_CHUNK_SIZE};
use storage_async::Store;
use task_executor::TaskExecutor;
use tokio::sync::{broadcast, mpsc};
use tracing::{debug, info};
use tracing::{debug, error, info};
use zgs_spec::SECTORS_PER_PRICING;
// Start pruning when the db directory size exceeds 0.9 * limit.
const PRUNE_THRESHOLD: f32 = 0.9;
const FIRST_REWARDABLE_CHUNK_KEY: &str = "first_rewardable_chunk";
const CHUNKS_PER_PRICING: u64 = (SECTORS_PER_PRICING / PORA_CHUNK_SIZE) as u64;
#[derive(Debug)]
pub struct PrunerConfig {
pub shard_config: ShardConfig,
pub db_path: PathBuf,
pub max_num_chunks: usize,
pub max_num_sectors: usize,
pub check_time: Duration,
pub batch_size: usize,
pub batch_wait_time: Duration,
pub rpc_endpoint_url: String,
pub reward_address: Address,
pub rate_limit_retries: u32,
pub timeout_retries: u32,
pub initial_backoff: u64,
}
impl PrunerConfig {
fn start_prune_size(&self) -> u64 {
(self.max_num_chunks as f32 * PRUNE_THRESHOLD) as u64
(self.max_num_sectors as f32 * PRUNE_THRESHOLD) as u64
}
}
pub struct Pruner {
config: PrunerConfig,
first_rewardable_chunk: u64,
first_tx_seq: u64,
store: Arc<Store>,
sender: mpsc::UnboundedSender<PrunerMessage>,
miner_sender: Option<broadcast::Sender<MinerMessage>>,
reward_contract: ChunkLinearReward<Arc<Provider<RetryClient<Http>>>>,
}
impl Pruner {
@ -47,12 +71,30 @@ impl Pruner {
if let Some(shard_config) = get_shard_config(store.as_ref()).await? {
config.shard_config = shard_config;
}
let (first_rewardable_chunk, first_tx_seq) = get_first_rewardable_chunk(store.as_ref())
.await?
.unwrap_or((0, 0));
let provider = Arc::new(Provider::new(
RetryClientBuilder::default()
.rate_limit_retries(config.rate_limit_retries)
.timeout_retries(config.timeout_retries)
.initial_backoff(Duration::from_millis(config.initial_backoff))
.build(
Http::from_str(&config.rpc_endpoint_url)?,
Box::new(HttpRateLimitRetryPolicy),
),
));
let reward_contract = ChunkLinearReward::new(config.reward_address, Arc::new(provider));
let (tx, rx) = mpsc::unbounded_channel();
let pruner = Pruner {
config,
first_rewardable_chunk,
first_tx_seq,
store,
sender: tx,
miner_sender,
reward_contract,
};
pruner.put_shard_config().await?;
executor.spawn(
@ -66,21 +108,43 @@ impl Pruner {
pub async fn start(mut self) -> Result<()> {
loop {
// Check shard config update and prune unneeded data.
if let Some(delete_list) = self.maybe_update().await? {
info!(new_config = ?self.config.shard_config, "new shard config");
self.put_shard_config().await?;
let mut batch = Vec::with_capacity(self.config.batch_size);
let mut iter = delete_list.peekable();
while let Some(index) = iter.next() {
batch.push(index);
if batch.len() == self.config.batch_size || iter.peek().is_none() {
debug!(start = batch.first(), end = batch.last(), "prune batch");
self.store.remove_chunks_batch(&batch).await?;
batch = Vec::with_capacity(self.config.batch_size);
tokio::time::sleep(self.config.batch_wait_time).await;
self.prune_in_batch(delete_list).await?;
}
// Check no reward chunks and prune.
match self.reward_contract.first_rewardable_chunk().call().await {
Ok(new_first_rewardable) => {
if let Some(no_reward_list) = self
.maybe_forward_first_rewardable(new_first_rewardable)
.await?
{
info!(
?new_first_rewardable,
"first rewardable chunk moves forward, start pruning"
);
self.prune_tx(
self.first_rewardable_chunk * SECTORS_PER_PRICING as u64,
new_first_rewardable * SECTORS_PER_PRICING as u64,
)
.await?;
self.prune_in_batch(no_reward_list).await?;
self.first_rewardable_chunk = new_first_rewardable;
self.put_first_rewardable_chunk_index(
self.first_rewardable_chunk,
self.first_tx_seq,
)
.await?;
}
}
}
e => {
error!("handle reward contract read fails, e={:?}", e);
}
};
tokio::time::sleep(self.config.check_time).await;
}
}
@ -92,7 +156,9 @@ impl Pruner {
config = ?self.config.shard_config,
"maybe_update"
);
if current_size >= self.config.start_prune_size() {
if current_size < self.config.start_prune_size() {
Ok(None)
} else {
// Update config and generate delete list should be done in a single lock to ensure
// the list is complete.
let config = &mut self.config.shard_config;
@ -108,13 +174,77 @@ impl Pruner {
config.num_shard *= 2;
// Generate delete list
let flow_len = self.store.get_context().await?.1;
let flow_len = self
.store
.get_context()
.await?
.1
.div_ceil(PORA_CHUNK_SIZE as u64);
let start_index = old_shard_id + (!rand_bit) as usize * old_num_shard;
return Ok(Some(Box::new(
Ok(Some(Box::new(
(start_index as u64..flow_len).step_by(config.num_shard),
)));
)))
}
Ok(None)
}
async fn maybe_forward_first_rewardable(
&mut self,
new_first_rewardable: u64,
) -> Result<Option<Box<dyn Send + Iterator<Item = u64>>>> {
match self.first_rewardable_chunk.cmp(&new_first_rewardable) {
Ordering::Less => Ok(Some(Box::new(
self.first_rewardable_chunk * CHUNKS_PER_PRICING
..new_first_rewardable * CHUNKS_PER_PRICING,
))),
Ordering::Equal => Ok(None),
Ordering::Greater => {
error!(
"Unexpected first_rewardable_chunk revert: old={} new={}",
self.first_rewardable_chunk, new_first_rewardable
);
Ok(None)
}
}
}
async fn prune_in_batch(&self, to_prune: Box<dyn Send + Iterator<Item = u64>>) -> Result<()> {
let mut batch = Vec::with_capacity(self.config.batch_size);
let mut iter = to_prune.peekable();
while let Some(index) = iter.next() {
batch.push(index);
if batch.len() == self.config.batch_size || iter.peek().is_none() {
debug!(start = batch.first(), end = batch.last(), "prune batch");
self.store.remove_chunks_batch(&batch).await?;
batch = Vec::with_capacity(self.config.batch_size);
tokio::time::sleep(self.config.batch_wait_time).await;
}
}
Ok(())
}
async fn prune_tx(&mut self, start_sector: u64, end_sector: u64) -> Result<()> {
loop {
if let Some(tx) = self.store.get_tx_by_seq_number(self.first_tx_seq).await? {
// If a part of the tx data is pruned, we mark the tx as pruned.
if tx.start_entry_index() >= start_sector && tx.start_entry_index() < end_sector {
self.store.prune_tx(tx.seq).await?;
} else if tx.start_entry_index() >= end_sector {
break;
} else {
bail!(
"prune tx out of range: tx={:?}, start={} end={}",
tx,
start_sector,
end_sector
);
}
self.first_tx_seq += 1;
} else {
// Wait for `first_tx_seq` to be processed.
tokio::time::sleep(Duration::from_secs(60)).await;
}
}
Ok(())
}
async fn put_shard_config(&self) -> Result<()> {
@ -127,13 +257,35 @@ impl Pruner {
.update_shard_config(self.config.shard_config)
.await;
self.store
.set_config_encoded(&SHARD_CONFIG_KEY, &self.config.shard_config)
.set_config_encoded(&SHARD_CONFIG_KEY, &self.config.shard_config, DATA_DB_KEY)
.await
}
async fn put_first_rewardable_chunk_index(
&self,
new_first_rewardable_chunk: u64,
new_first_tx_seq: u64,
) -> Result<()> {
self.store
.set_config_encoded(
&FIRST_REWARDABLE_CHUNK_KEY,
&(new_first_rewardable_chunk, new_first_tx_seq),
DATA_DB_KEY,
)
.await
}
}
async fn get_shard_config(store: &Store) -> Result<Option<ShardConfig>> {
store.get_config_decoded(&SHARD_CONFIG_KEY).await
store
.get_config_decoded(&SHARD_CONFIG_KEY, DATA_DB_KEY)
.await
}
async fn get_first_rewardable_chunk(store: &Store) -> Result<Option<(u64, u64)>> {
store
.get_config_decoded(&FIRST_REWARDABLE_CHUNK_KEY, DATA_DB_KEY)
.await
}
#[derive(Debug)]

View File

@ -24,6 +24,7 @@ rand = "0.8.5"
serde = { version = "1.0.137", features = ["derive"] }
duration-str = "0.5.1"
public-ip = "0.2"
metrics = { workspace = true }
[dev-dependencies]
channel = { path = "../../common/channel" }

117
node/router/src/batcher.rs Normal file
View File

@ -0,0 +1,117 @@
use std::{
collections::VecDeque,
sync::Arc,
time::{Duration, Instant},
};
use ::metrics::{Histogram, Sample};
/// `Batcher` is used to handle data in batch, when `capacity` or `timeout` matches.
pub(crate) struct Batcher<T> {
items: VecDeque<T>,
earliest_time: Option<Instant>,
capacity: usize,
timeout: Duration,
metrics_batch_size: Arc<dyn Histogram>,
}
impl<T> Batcher<T> {
pub fn new(capacity: usize, timeout: Duration, name: &str) -> Self {
Self {
items: VecDeque::with_capacity(capacity),
earliest_time: None,
capacity,
timeout,
metrics_batch_size: Sample::ExpDecay(0.015).register_with_group(
"router_batcher_size",
name,
1024,
),
}
}
fn remove_all(&mut self) -> Option<Vec<T>> {
let size = self.items.len();
if size == 0 {
return None;
}
self.metrics_batch_size.update(size as u64);
self.earliest_time = None;
Some(Vec::from_iter(self.items.split_off(0).into_iter().rev()))
}
pub fn add(&mut self, value: T) -> Option<Vec<T>> {
self.add_with_time(value, Instant::now())
}
fn add_with_time(&mut self, value: T, now: Instant) -> Option<Vec<T>> {
// push at front so as to use `split_off` to remove expired items
self.items.push_front(value);
if self.earliest_time.is_none() {
self.earliest_time = Some(now);
}
// cache if not full
let size = self.items.len();
if size < self.capacity {
return None;
}
// cache is full
self.remove_all()
}
pub fn expire(&mut self) -> Option<Vec<T>> {
self.expire_with_time(Instant::now())
}
fn expire_with_time(&mut self, now: Instant) -> Option<Vec<T>> {
if now.duration_since(self.earliest_time?) < self.timeout {
None
} else {
self.remove_all()
}
}
}
#[cfg(test)]
mod tests {
use std::time::{Duration, Instant};
use super::Batcher;
#[test]
fn test_add() {
let mut batcher: Batcher<usize> = Batcher::new(3, Duration::from_secs(10), "test");
assert_eq!(batcher.add(1), None);
assert_eq!(batcher.add(2), None);
assert_eq!(batcher.add(3), Some(vec![1, 2, 3]));
assert_eq!(batcher.items.len(), 0);
}
#[test]
fn test_expire() {
let mut batcher: Batcher<usize> = Batcher::new(5, Duration::from_secs(10), "test");
let now = Instant::now();
// enqueue: 1, 2, 3, 4
assert_eq!(batcher.add_with_time(1, now + Duration::from_secs(1)), None);
assert_eq!(batcher.add_with_time(2, now + Duration::from_secs(2)), None);
assert_eq!(batcher.add_with_time(3, now + Duration::from_secs(4)), None);
assert_eq!(batcher.add_with_time(4, now + Duration::from_secs(5)), None);
// expire None
assert_eq!(batcher.expire_with_time(now + Duration::from_secs(6)), None);
// expire all
assert_eq!(
batcher.expire_with_time(now + Duration::from_secs(13)),
Some(vec![1, 2, 3, 4])
);
assert_eq!(batcher.items.len(), 0);
}
}

View File

@ -1,14 +1,16 @@
#[macro_use]
extern crate tracing;
mod batcher;
mod libp2p_event_handler;
mod metrics;
mod peer_manager;
mod service;
use duration_str::deserialize_duration;
use network::Multiaddr;
use serde::Deserialize;
use std::time::Duration;
use std::{net::IpAddr, time::Duration};
pub use crate::service::RouterService;
@ -23,6 +25,17 @@ pub struct Config {
pub max_idle_outgoing_peers: usize,
pub libp2p_nodes: Vec<Multiaddr>,
pub private_ip_enabled: bool,
pub check_announced_ip: bool,
pub public_address: Option<IpAddr>,
// batcher
/// Timeout to publish messages in batch
#[serde(deserialize_with = "deserialize_duration")]
pub batcher_timeout: Duration,
/// Number of files in an announcement
pub batcher_file_capacity: usize,
/// Number of announcements in a pubsub message
pub batcher_announcement_capacity: usize,
}
impl Default for Config {
@ -34,6 +47,12 @@ impl Default for Config {
max_idle_outgoing_peers: 20,
libp2p_nodes: vec![],
private_ip_enabled: false,
check_announced_ip: false,
public_address: None,
batcher_timeout: Duration::from_secs(1),
batcher_file_capacity: 1,
batcher_announcement_capacity: 1,
}
}
}

View File

@ -1,11 +1,12 @@
use std::net::IpAddr;
use std::time::Instant;
use std::{ops::Neg, sync::Arc};
use chunk_pool::ChunkPoolMessage;
use file_location_cache::FileLocationCache;
use network::multiaddr::Protocol;
use network::types::{AnnounceShardConfig, SignedAnnounceShardConfig};
use network::Multiaddr;
use network::rpc::methods::FileAnnouncement;
use network::types::{AnnounceShardConfig, NewFile, SignedAnnounceShardConfig};
use network::{
rpc::StatusMessage,
types::{
@ -15,29 +16,43 @@ use network::{
Keypair, MessageAcceptance, MessageId, NetworkGlobals, NetworkMessage, PeerId, PeerRequestId,
PublicKey, PubsubMessage, Request, RequestId, Response,
};
use shared_types::{bytes_to_chunks, timestamp_now, TxID};
use network::{Multiaddr, NetworkSender, PeerAction, ReportSource};
use shared_types::{bytes_to_chunks, timestamp_now, NetworkIdentity, TxID};
use storage::config::ShardConfig;
use storage_async::Store;
use sync::{SyncMessage, SyncSender};
use tokio::sync::mpsc::UnboundedSender;
use tokio::sync::{mpsc, RwLock};
use crate::batcher::Batcher;
use crate::metrics;
use crate::peer_manager::PeerManager;
use crate::Config;
lazy_static::lazy_static! {
pub static ref FIND_FILE_TIMEOUT: chrono::Duration = chrono::Duration::minutes(2);
pub static ref ANNOUNCE_FILE_TIMEOUT: chrono::Duration = chrono::Duration::minutes(2);
pub static ref ANNOUNCE_SHARD_CONFIG_TIMEOUT: chrono::Duration = chrono::Duration::minutes(2);
pub static ref TOLERABLE_DRIFT: chrono::Duration = chrono::Duration::seconds(5);
/// Timeout to publish NewFile message to neighbor nodes.
pub static ref NEW_FILE_TIMEOUT: chrono::Duration = chrono::Duration::seconds(30);
/// Timeout to publish FindFile message to neighbor nodes.
pub static ref FIND_FILE_NEIGHBORS_TIMEOUT: chrono::Duration = chrono::Duration::seconds(30);
/// Timeout to publish FindFile message in the whole network.
pub static ref FIND_FILE_TIMEOUT: chrono::Duration = chrono::Duration::minutes(5);
pub static ref ANNOUNCE_FILE_TIMEOUT: chrono::Duration = chrono::Duration::minutes(5);
pub static ref ANNOUNCE_SHARD_CONFIG_TIMEOUT: chrono::Duration = chrono::Duration::minutes(5);
pub static ref TOLERABLE_DRIFT: chrono::Duration = chrono::Duration::seconds(10);
}
#[allow(deprecated)]
fn duration_since(timestamp: u32) -> chrono::Duration {
fn duration_since(timestamp: u32, metric: Arc<dyn ::metrics::Histogram>) -> chrono::Duration {
let timestamp = i64::from(timestamp);
let timestamp = chrono::NaiveDateTime::from_timestamp_opt(timestamp, 0).expect("should fit");
let now = chrono::Utc::now().naive_utc();
now.signed_duration_since(timestamp)
let timestamp = chrono::DateTime::from_timestamp(timestamp, 0).expect("should fit");
let now = chrono::Utc::now();
let duration = now.signed_duration_since(timestamp);
let num_secs = duration.num_seconds();
if num_secs > 0 {
metric.update(num_secs as u64);
}
duration
}
fn peer_id_to_public_key(peer_id: &PeerId) -> Result<PublicKey, String> {
@ -73,7 +88,7 @@ pub struct Libp2pEventHandler {
/// A collection of global variables, accessible outside of the network service.
network_globals: Arc<NetworkGlobals>,
/// A channel to the router service.
network_send: mpsc::UnboundedSender<NetworkMessage>,
network_send: NetworkSender,
/// A channel to the syncing service.
sync_send: SyncSender,
/// A channel to the RPC chunk pool service.
@ -86,6 +101,10 @@ pub struct Libp2pEventHandler {
file_location_cache: Arc<FileLocationCache>,
/// All connected peers.
peers: Arc<RwLock<PeerManager>>,
/// Files to announce in batch
file_batcher: RwLock<Batcher<TxID>>,
/// Announcements to publish in batch
announcement_batcher: RwLock<Batcher<SignedAnnounceFile>>,
}
impl Libp2pEventHandler {
@ -93,7 +112,7 @@ impl Libp2pEventHandler {
pub fn new(
config: Config,
network_globals: Arc<NetworkGlobals>,
network_send: mpsc::UnboundedSender<NetworkMessage>,
network_send: NetworkSender,
sync_send: SyncSender,
chunk_pool_send: UnboundedSender<ChunkPoolMessage>,
local_keypair: Keypair,
@ -101,6 +120,18 @@ impl Libp2pEventHandler {
file_location_cache: Arc<FileLocationCache>,
peers: Arc<RwLock<PeerManager>>,
) -> Self {
let file_batcher = RwLock::new(Batcher::new(
config.batcher_file_capacity,
config.batcher_timeout,
"file",
));
let announcement_batcher = RwLock::new(Batcher::new(
config.batcher_announcement_capacity,
config.batcher_timeout,
"announcement",
));
Self {
config,
network_globals,
@ -111,6 +142,8 @@ impl Libp2pEventHandler {
store,
file_location_cache,
peers,
file_batcher,
announcement_batcher,
}
}
@ -139,14 +172,18 @@ impl Libp2pEventHandler {
}
pub fn send_status(&self, peer_id: PeerId) {
let status_message = StatusMessage { data: 123 }; // dummy status message
let status_message = StatusMessage {
data: self.network_globals.network_id(),
};
debug!(%peer_id, ?status_message, "Sending Status request");
self.send_to_network(NetworkMessage::SendRequest {
peer_id,
request_id: RequestId::Router,
request_id: RequestId::Router(Instant::now()),
request: Request::Status(status_message),
});
metrics::LIBP2P_SEND_STATUS.mark(1);
}
pub async fn on_peer_connected(&self, peer_id: PeerId, outgoing: bool) {
@ -155,12 +192,16 @@ impl Libp2pEventHandler {
if outgoing {
self.send_status(peer_id);
self.send_to_sync(SyncMessage::PeerConnected { peer_id });
metrics::LIBP2P_HANDLE_PEER_CONNECTED_OUTGOING.mark(1);
} else {
metrics::LIBP2P_HANDLE_PEER_CONNECTED_INCOMING.mark(1);
}
}
pub async fn on_peer_disconnected(&self, peer_id: PeerId) {
self.peers.write().await.remove(&peer_id);
self.send_to_sync(SyncMessage::PeerDisconnected { peer_id });
metrics::LIBP2P_HANDLE_PEER_DISCONNECTED.mark(1);
}
pub async fn on_rpc_request(
@ -174,6 +215,7 @@ impl Libp2pEventHandler {
match request {
Request::Status(status) => {
self.on_status_request(peer_id, request_id, status);
metrics::LIBP2P_HANDLE_STATUS_REQUEST.mark(1);
}
Request::GetChunks(request) => {
self.send_to_sync(SyncMessage::RequestChunks {
@ -181,6 +223,26 @@ impl Libp2pEventHandler {
request_id,
request,
});
metrics::LIBP2P_HANDLE_GET_CHUNKS_REQUEST.mark(1);
}
Request::AnnounceFile(announcement) => {
match ShardConfig::new(announcement.shard_id, announcement.num_shard) {
Ok(v) => {
self.file_location_cache.insert_peer_config(peer_id, v);
self.send_to_sync(SyncMessage::AnnounceFile {
peer_id,
request_id,
announcement,
});
}
Err(_) => self.send_to_network(NetworkMessage::ReportPeer {
peer_id,
action: PeerAction::Fatal,
source: ReportSource::RPC,
msg: "Invalid shard config in AnnounceFile RPC message",
}),
}
}
Request::DataByHash(_) => {
// ignore
@ -191,7 +253,10 @@ impl Libp2pEventHandler {
fn on_status_request(&self, peer_id: PeerId, request_id: PeerRequestId, status: StatusMessage) {
debug!(%peer_id, ?status, "Received Status request");
let status_message = StatusMessage { data: 456 }; // dummy status message
let network_id = self.network_globals.network_id();
let status_message = StatusMessage {
data: network_id.clone(),
};
debug!(%peer_id, ?status_message, "Sending Status response");
self.send_to_network(NetworkMessage::SendResponse {
@ -199,6 +264,12 @@ impl Libp2pEventHandler {
id: request_id,
response: Response::Status(status_message),
});
self.on_status_message(peer_id, status, network_id);
}
fn on_status_response(&self, peer_id: PeerId, status: StatusMessage) {
let network_id = self.network_globals.network_id();
self.on_status_message(peer_id, status, network_id);
}
pub async fn on_rpc_response(
@ -212,10 +283,22 @@ impl Libp2pEventHandler {
match response {
Response::Status(status_message) => {
debug!(%peer_id, ?status_message, "Received Status response");
match request_id {
RequestId::Router(since) => {
metrics::LIBP2P_HANDLE_STATUS_RESPONSE.mark(1);
metrics::LIBP2P_HANDLE_STATUS_RESPONSE_LATENCY.update_since(since);
}
_ => unreachable!("All status response belong to router"),
}
self.on_status_response(peer_id, status_message);
}
Response::Chunks(response) => {
let request_id = match request_id {
RequestId::Sync(sync_id) => sync_id,
RequestId::Sync(since, sync_id) => {
metrics::LIBP2P_HANDLE_GET_CHUNKS_RESPONSE.mark(1);
metrics::LIBP2P_HANDLE_GET_CHUNKS_RESPONSE_LATENCY.update_since(since);
sync_id
}
_ => unreachable!("All Chunks responses belong to sync"),
};
@ -235,12 +318,16 @@ impl Libp2pEventHandler {
self.peers.write().await.update(&peer_id);
// Check if the failed RPC belongs to sync
if let RequestId::Sync(request_id) = request_id {
if let RequestId::Sync(since, request_id) = request_id {
self.send_to_sync(SyncMessage::RpcError {
peer_id,
request_id,
});
metrics::LIBP2P_HANDLE_RESPONSE_ERROR_LATENCY.update_since(since);
}
metrics::LIBP2P_HANDLE_RESPONSE_ERROR.mark(1);
}
pub async fn on_pubsub_message(
@ -254,27 +341,119 @@ impl Libp2pEventHandler {
match message {
PubsubMessage::ExampleMessage(_) => MessageAcceptance::Ignore,
PubsubMessage::FindFile(msg) => self.on_find_file(msg).await,
PubsubMessage::FindChunks(msg) => self.on_find_chunks(msg).await,
PubsubMessage::AnnounceFile(msg) => self.on_announce_file(propagation_source, msg),
PubsubMessage::AnnounceChunks(msg) => self.on_announce_chunks(propagation_source, msg),
PubsubMessage::NewFile(msg) => {
metrics::LIBP2P_HANDLE_PUBSUB_NEW_FILE.mark(1);
self.on_new_file(propagation_source, msg).await
}
PubsubMessage::FindFile(msg) => {
metrics::LIBP2P_HANDLE_PUBSUB_FIND_FILE.mark(1);
self.on_find_file(propagation_source, msg).await
}
PubsubMessage::FindChunks(msg) => {
metrics::LIBP2P_HANDLE_PUBSUB_FIND_CHUNKS.mark(1);
self.on_find_chunks(msg).await
}
PubsubMessage::AnnounceFile(msgs) => {
metrics::LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE.mark(1);
for msg in msgs {
match self.on_announce_file(propagation_source, msg) {
MessageAcceptance::Reject => return MessageAcceptance::Reject,
MessageAcceptance::Ignore => return MessageAcceptance::Ignore,
_ => {}
}
}
MessageAcceptance::Accept
}
PubsubMessage::AnnounceChunks(msg) => {
metrics::LIBP2P_HANDLE_PUBSUB_ANNOUNCE_CHUNKS.mark(1);
self.on_announce_chunks(propagation_source, msg)
}
PubsubMessage::AnnounceShardConfig(msg) => {
metrics::LIBP2P_HANDLE_PUBSUB_ANNOUNCE_SHARD.mark(1);
self.on_announce_shard_config(propagation_source, msg)
}
}
}
async fn get_listen_addr_or_add(&self) -> Option<Multiaddr> {
/// Handle NewFile pubsub message `msg` that published by `from` peer.
async fn on_new_file(&self, from: PeerId, msg: NewFile) -> MessageAcceptance {
// verify timestamp
let d = duration_since(
msg.timestamp,
metrics::LIBP2P_HANDLE_PUBSUB_NEW_FILE_LATENCY.clone(),
);
if d < TOLERABLE_DRIFT.neg() || d > *NEW_FILE_TIMEOUT {
debug!(?d, ?msg, "Invalid timestamp, ignoring NewFile message");
metrics::LIBP2P_HANDLE_PUBSUB_NEW_FILE_TIMEOUT.mark(1);
self.send_to_network(NetworkMessage::ReportPeer {
peer_id: from,
action: PeerAction::LowToleranceError,
source: ReportSource::Gossipsub,
msg: "Received out of date NewFile message",
});
return MessageAcceptance::Ignore;
}
// verify announced shard config
let announced_shard_config = match ShardConfig::new(msg.shard_id, msg.num_shard) {
Ok(v) => v,
Err(_) => return MessageAcceptance::Reject,
};
// ignore if shard config mismatch
let my_shard_config = self.store.get_store().get_shard_config();
if !my_shard_config.intersect(&announced_shard_config) {
return MessageAcceptance::Ignore;
}
// ignore if already exists
match self.store.check_tx_completed(msg.tx_id.seq).await {
Ok(true) => return MessageAcceptance::Ignore,
Ok(false) => {}
Err(err) => {
warn!(?err, tx_seq = %msg.tx_id.seq, "Failed to check tx completed");
return MessageAcceptance::Ignore;
}
}
// ignore if already pruned
match self.store.check_tx_pruned(msg.tx_id.seq).await {
Ok(true) => return MessageAcceptance::Ignore,
Ok(false) => {}
Err(err) => {
warn!(?err, tx_seq = %msg.tx_id.seq, "Failed to check tx pruned");
return MessageAcceptance::Ignore;
}
}
// notify sync layer to handle in advance
self.send_to_sync(SyncMessage::NewFile { from, msg });
MessageAcceptance::Ignore
}
async fn construct_announced_ip(&self) -> Option<Multiaddr> {
// public address configured
if let Some(ip) = self.config.public_address {
let mut addr = Multiaddr::empty();
addr.push(ip.into());
addr.push(Protocol::Tcp(self.network_globals.listen_port_tcp()));
return Some(addr);
}
// public listen address
if let Some(addr) = self.get_listen_addr() {
return Some(addr);
}
// auto detect public IP address
let ipv4_addr = public_ip::addr_v4().await?;
let mut addr = Multiaddr::empty();
addr.push(Protocol::Ip4(ipv4_addr));
addr.push(Protocol::Tcp(self.network_globals.listen_port_tcp()));
addr.push(Protocol::P2p(self.network_globals.local_peer_id().into()));
self.network_globals
.listen_multiaddrs
@ -326,16 +505,23 @@ impl Libp2pEventHandler {
false
}
pub async fn construct_announce_file_message(&self, tx_id: TxID) -> Option<PubsubMessage> {
pub async fn construct_announce_file_message(
&self,
tx_ids: Vec<TxID>,
) -> Option<SignedAnnounceFile> {
if tx_ids.is_empty() {
return None;
}
let peer_id = *self.network_globals.peer_id.read();
let addr = self.get_listen_addr_or_add().await?;
let addr = self.construct_announced_ip().await?;
let timestamp = timestamp_now();
let shard_config = self.store.get_store().flow().get_shard_config();
let shard_config = self.store.get_store().get_shard_config();
let msg = AnnounceFile {
tx_id,
tx_ids,
num_shard: shard_config.num_shard,
shard_id: shard_config.shard_id,
peer_id: peer_id.into(),
@ -346,14 +532,14 @@ impl Libp2pEventHandler {
let mut signed = match SignedMessage::sign_message(msg, &self.local_keypair) {
Ok(signed) => signed,
Err(e) => {
error!(%tx_id.seq, %e, "Failed to sign AnnounceFile message");
error!(%e, "Failed to sign AnnounceFile message");
return None;
}
};
signed.resend_timestamp = timestamp;
Some(PubsubMessage::AnnounceFile(signed))
Some(signed)
}
pub async fn construct_announce_shard_config_message(
@ -361,7 +547,7 @@ impl Libp2pEventHandler {
shard_config: ShardConfig,
) -> Option<PubsubMessage> {
let peer_id = *self.network_globals.peer_id.read();
let addr = self.get_listen_addr_or_add().await?;
let addr = self.construct_announced_ip().await?;
let timestamp = timestamp_now();
let msg = AnnounceShardConfig {
@ -385,45 +571,95 @@ impl Libp2pEventHandler {
Some(PubsubMessage::AnnounceShardConfig(signed))
}
async fn on_find_file(&self, msg: FindFile) -> MessageAcceptance {
let FindFile { tx_id, timestamp } = msg;
async fn on_find_file(&self, from: PeerId, msg: FindFile) -> MessageAcceptance {
let FindFile {
tx_id, timestamp, ..
} = msg;
// verify timestamp
let d = duration_since(timestamp);
if d < TOLERABLE_DRIFT.neg() || d > *FIND_FILE_TIMEOUT {
debug!(%timestamp, "Invalid timestamp, ignoring FindFile message");
let d = duration_since(
timestamp,
metrics::LIBP2P_HANDLE_PUBSUB_FIND_FILE_LATENCY.clone(),
);
let timeout = if msg.neighbors_only {
*FIND_FILE_NEIGHBORS_TIMEOUT
} else {
*FIND_FILE_TIMEOUT
};
if d < TOLERABLE_DRIFT.neg() || d > timeout {
debug!(%timestamp, ?d, "Invalid timestamp, ignoring FindFile message");
metrics::LIBP2P_HANDLE_PUBSUB_FIND_FILE_TIMEOUT.mark(1);
if msg.neighbors_only {
self.send_to_network(NetworkMessage::ReportPeer {
peer_id: from,
action: PeerAction::LowToleranceError,
source: ReportSource::Gossipsub,
msg: "Received out of date FindFile message",
});
}
return MessageAcceptance::Ignore;
}
// verify announced shard config
let announced_shard_config = match ShardConfig::new(msg.shard_id, msg.num_shard) {
Ok(v) => v,
Err(_) => return MessageAcceptance::Reject,
};
// handle on shard config mismatch
let my_shard_config = self.store.get_store().get_shard_config();
if !my_shard_config.intersect(&announced_shard_config) {
return if msg.neighbors_only {
MessageAcceptance::Ignore
} else {
MessageAcceptance::Accept
};
}
// check if we have it
if matches!(self.store.check_tx_completed(tx_id.seq).await, Ok(true)) {
if let Ok(Some(tx)) = self.store.get_tx_by_seq_number(tx_id.seq).await {
if tx.id() == tx_id {
debug!(?tx_id, "Found file locally, responding to FindFile query");
trace!(?tx_id, "Found file locally, responding to FindFile query");
return match self.construct_announce_file_message(tx_id).await {
Some(msg) => {
self.publish(msg);
MessageAcceptance::Ignore
}
// propagate FindFile query to other nodes
None => MessageAcceptance::Accept,
};
if msg.neighbors_only {
// announce file via RPC to avoid flooding pubsub message
self.send_to_network(NetworkMessage::SendRequest {
peer_id: from,
request: Request::AnnounceFile(FileAnnouncement {
tx_id,
num_shard: my_shard_config.num_shard,
shard_id: my_shard_config.shard_id,
}),
request_id: RequestId::Router(Instant::now()),
});
} else if self.publish_file(tx_id).await.is_some() {
metrics::LIBP2P_HANDLE_PUBSUB_FIND_FILE_STORE.mark(1);
return MessageAcceptance::Ignore;
}
}
}
}
// do not forward to whole network if only find file from neighbor nodes
if msg.neighbors_only {
return MessageAcceptance::Ignore;
}
// try from cache
if let Some(mut msg) = self.file_location_cache.get_one(tx_id) {
debug!(?tx_id, "Found file in cache, responding to FindFile query");
trace!(?tx_id, "Found file in cache, responding to FindFile query");
msg.resend_timestamp = timestamp_now();
self.publish(PubsubMessage::AnnounceFile(msg));
self.publish_announcement(msg).await;
metrics::LIBP2P_HANDLE_PUBSUB_FIND_FILE_CACHE.mark(1);
return MessageAcceptance::Ignore;
}
// propagate FindFile query to other nodes
metrics::LIBP2P_HANDLE_PUBSUB_FIND_FILE_FORWARD.mark(1);
MessageAcceptance::Accept
}
@ -434,7 +670,7 @@ impl Libp2pEventHandler {
index_end: u64,
) -> Option<PubsubMessage> {
let peer_id = *self.network_globals.peer_id.read();
let addr = self.get_listen_addr_or_add().await?;
let addr = self.construct_announced_ip().await?;
let timestamp = timestamp_now();
let msg = AnnounceChunks {
@ -467,9 +703,12 @@ impl Libp2pEventHandler {
}
// verify timestamp
let d = duration_since(msg.timestamp);
let d = duration_since(
msg.timestamp,
metrics::LIBP2P_HANDLE_PUBSUB_FIND_CHUNKS_LATENCY.clone(),
);
if d < TOLERABLE_DRIFT.neg() || d > *FIND_FILE_TIMEOUT {
debug!(%msg.timestamp, "Invalid timestamp, ignoring FindFile message");
debug!(%msg.timestamp, ?d, "Invalid timestamp, ignoring FindChunks message");
return MessageAcceptance::Ignore;
}
@ -503,7 +742,7 @@ impl Libp2pEventHandler {
_ => return MessageAcceptance::Accept,
};
debug!(?msg, "Found chunks to respond FindChunks message");
trace!(?msg, "Found chunks to respond FindChunks message");
match self
.construct_announce_chunks_message(msg.tx_id, msg.index_start, msg.index_end)
@ -535,10 +774,14 @@ impl Libp2pEventHandler {
None => return false,
};
metrics::LIBP2P_VERIFY_ANNOUNCED_IP.mark(1);
let seen_ips: Vec<IpAddr> = match self.network_globals.peers.read().peer_info(peer_id) {
Some(v) => v.seen_ip_addresses().collect(),
None => {
debug!(%announced_ip, "Failed to verify announced IP address, no peer info found");
// ignore file announcement from un-seen peers
trace!(%announced_ip, "Failed to verify announced IP address, no peer info found");
metrics::LIBP2P_VERIFY_ANNOUNCED_IP_UNSEEN.mark(1);
return false;
}
};
@ -546,7 +789,9 @@ impl Libp2pEventHandler {
if seen_ips.iter().any(|x| *x == announced_ip) {
true
} else {
debug!(%announced_ip, ?seen_ips, "Failed to verify announced IP address, mismatch with seen ips");
// ignore file announcement if announced IP and seen IP mismatch
trace!(%announced_ip, ?seen_ips, "Failed to verify announced IP address, mismatch with seen ips");
metrics::LIBP2P_VERIFY_ANNOUNCED_IP_MISMATCH.mark(1);
false
}
}
@ -556,6 +801,9 @@ impl Libp2pEventHandler {
propagation_source: PeerId,
msg: SignedAnnounceFile,
) -> MessageAcceptance {
metrics::LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE_ANNOUNCEMENTS.mark(1);
metrics::LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE_FILES.mark(msg.tx_ids.len());
// verify message signature
if !verify_signature(&msg, &msg.peer_id, propagation_source) {
return MessageAcceptance::Reject;
@ -568,23 +816,41 @@ impl Libp2pEventHandler {
}
// verify announced ip address if required
if !self.config.private_ip_enabled && !self.verify_announced_address(&msg.peer_id, &addr) {
if !self.config.private_ip_enabled
&& self.config.check_announced_ip
&& !self.verify_announced_address(&msg.peer_id, &addr)
{
return MessageAcceptance::Reject;
}
// verify announced shard config
let announced_shard_config = match ShardConfig::new(msg.shard_id, msg.num_shard) {
Ok(v) => v,
Err(_) => return MessageAcceptance::Reject,
};
// propagate gossip to peers
let d = duration_since(msg.resend_timestamp);
let d = duration_since(
msg.resend_timestamp,
metrics::LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE_LATENCY.clone(),
);
if d < TOLERABLE_DRIFT.neg() || d > *ANNOUNCE_FILE_TIMEOUT {
debug!(%msg.resend_timestamp, "Invalid resend timestamp, ignoring AnnounceFile message");
debug!(%msg.resend_timestamp, ?d, "Invalid resend timestamp, ignoring AnnounceFile message");
metrics::LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE_TIMEOUT.mark(1);
return MessageAcceptance::Ignore;
}
// notify sync layer
self.send_to_sync(SyncMessage::AnnounceFileGossip {
tx_id: msg.tx_id,
peer_id: msg.peer_id.clone().into(),
addr,
});
// notify sync layer if shard config matches
let my_shard_config = self.store.get_store().get_shard_config();
if my_shard_config.intersect(&announced_shard_config) {
for tx_id in msg.tx_ids.iter() {
self.send_to_sync(SyncMessage::AnnounceFileGossip {
tx_id: *tx_id,
peer_id: msg.peer_id.clone().into(),
addr: addr.clone(),
});
}
}
// insert message to cache
self.file_location_cache.insert(msg);
@ -609,14 +875,20 @@ impl Libp2pEventHandler {
}
// verify announced ip address if required
if !self.config.private_ip_enabled && !self.verify_announced_address(&msg.peer_id, &addr) {
if !self.config.private_ip_enabled
&& self.config.check_announced_ip
&& !self.verify_announced_address(&msg.peer_id, &addr)
{
return MessageAcceptance::Reject;
}
// propagate gossip to peers
let d = duration_since(msg.resend_timestamp);
let d = duration_since(
msg.resend_timestamp,
metrics::LIBP2P_HANDLE_PUBSUB_ANNOUNCE_SHARD_LATENCY.clone(),
);
if d < TOLERABLE_DRIFT.neg() || d > *ANNOUNCE_SHARD_CONFIG_TIMEOUT {
debug!(%msg.resend_timestamp, "Invalid resend timestamp, ignoring AnnounceShardConfig message");
debug!(%msg.resend_timestamp, ?d, "Invalid resend timestamp, ignoring AnnounceShardConfig message");
return MessageAcceptance::Ignore;
}
@ -655,14 +927,20 @@ impl Libp2pEventHandler {
}
// verify announced ip address if required
if !self.config.private_ip_enabled && !self.verify_announced_address(&msg.peer_id, &addr) {
if !self.config.private_ip_enabled
&& self.config.check_announced_ip
&& !self.verify_announced_address(&msg.peer_id, &addr)
{
return MessageAcceptance::Reject;
}
// propagate gossip to peers
let d = duration_since(msg.resend_timestamp);
let d = duration_since(
msg.resend_timestamp,
metrics::LIBP2P_HANDLE_PUBSUB_ANNOUNCE_CHUNKS_LATENCY.clone(),
);
if d < TOLERABLE_DRIFT.neg() || d > *ANNOUNCE_FILE_TIMEOUT {
debug!(%msg.resend_timestamp, "Invalid resend timestamp, ignoring AnnounceChunks message");
debug!(%msg.resend_timestamp, ?d, "Invalid resend timestamp, ignoring AnnounceChunks message");
return MessageAcceptance::Ignore;
}
@ -671,6 +949,56 @@ impl Libp2pEventHandler {
MessageAcceptance::Accept
}
fn on_status_message(
&self,
peer_id: PeerId,
status: StatusMessage,
network_id: NetworkIdentity,
) {
if status.data != network_id {
warn!(%peer_id, ?network_id, ?status.data, "Report peer with incompatible network id");
self.send_to_network(NetworkMessage::ReportPeer {
peer_id,
action: PeerAction::Fatal,
source: ReportSource::Gossipsub,
msg: "Incompatible network id in StatusMessage",
})
}
}
async fn publish_file(&self, tx_id: TxID) -> Option<bool> {
match self.file_batcher.write().await.add(tx_id) {
Some(batch) => {
let announcement = self.construct_announce_file_message(batch).await?;
Some(self.publish_announcement(announcement).await)
}
None => Some(false),
}
}
async fn publish_announcement(&self, announcement: SignedAnnounceFile) -> bool {
match self.announcement_batcher.write().await.add(announcement) {
Some(batch) => {
self.publish(PubsubMessage::AnnounceFile(batch));
true
}
None => false,
}
}
/// Publish expired file announcements.
pub async fn expire_batcher(&self) {
if let Some(batch) = self.file_batcher.write().await.expire() {
if let Some(announcement) = self.construct_announce_file_message(batch).await {
self.publish_announcement(announcement).await;
}
}
if let Some(batch) = self.announcement_batcher.write().await.expire() {
self.publish(PubsubMessage::AnnounceFile(batch));
}
}
}
#[cfg(test)]
@ -682,10 +1010,12 @@ mod tests {
use network::{
discovery::{CombinedKey, ConnectionId},
discv5::enr::EnrBuilder,
new_network_channel,
rpc::{GetChunksRequest, StatusMessage, SubstreamId},
types::FindFile,
CombinedKeyExt, Keypair, MessageAcceptance, MessageId, Multiaddr, NetworkGlobals,
NetworkMessage, PeerId, PubsubMessage, Request, RequestId, Response, SyncId,
NetworkMessage, NetworkReceiver, PeerId, PubsubMessage, Request, RequestId, Response,
SyncId,
};
use shared_types::{timestamp_now, ChunkArray, ChunkArrayWithProof, FlowRangeProof, TxID};
use storage::{
@ -707,8 +1037,8 @@ mod tests {
runtime: TestRuntime,
network_globals: Arc<NetworkGlobals>,
keypair: Keypair,
network_send: mpsc::UnboundedSender<NetworkMessage>,
network_recv: mpsc::UnboundedReceiver<NetworkMessage>,
network_send: NetworkSender,
network_recv: NetworkReceiver,
sync_send: SyncSender,
sync_recv: SyncReceiver,
chunk_pool_send: mpsc::UnboundedSender<ChunkPoolMessage>,
@ -722,9 +1052,10 @@ mod tests {
fn default() -> Self {
let runtime = TestRuntime::default();
let (network_globals, keypair) = Context::new_network_globals();
let (network_send, network_recv) = mpsc::unbounded_channel();
let (sync_send, sync_recv) = channel::Channel::unbounded();
let (network_send, network_recv) = new_network_channel();
let (sync_send, sync_recv) = channel::Channel::unbounded("test");
let (chunk_pool_send, _chunk_pool_recv) = mpsc::unbounded_channel();
let store = LogManager::memorydb(LogConfig::default()).unwrap();
Self {
runtime,
@ -762,7 +1093,14 @@ mod tests {
let keypair = Keypair::generate_secp256k1();
let enr_key = CombinedKey::from_libp2p(&keypair).unwrap();
let enr = EnrBuilder::new("v4").build(&enr_key).unwrap();
let network_globals = NetworkGlobals::new(enr, 30000, 30000, vec![]);
let network_globals = NetworkGlobals::new(
enr,
30000,
30000,
vec![],
Default::default(),
Default::default(),
);
let listen_addr: Multiaddr = "/ip4/127.0.0.1/tcp/30000".parse().unwrap();
network_globals.listen_multiaddrs.write().push(listen_addr);
@ -779,7 +1117,7 @@ mod tests {
}) => {
assert_eq!(peer_id, expected_peer_id);
assert!(matches!(request, Request::Status(..)));
assert!(matches!(request_id, RequestId::Router))
assert!(matches!(request_id, RequestId::Router(..)))
}
Ok(_) => panic!("Unexpected network message type received"),
Err(e) => panic!("No network message received: {:?}", e),
@ -791,7 +1129,7 @@ mod tests {
Ok(NetworkMessage::Publish { messages }) => {
assert_eq!(messages.len(), 1);
assert!(
matches!(&messages[0], PubsubMessage::AnnounceFile(file) if file.tx_id == expected_tx_id)
matches!(&messages[0], PubsubMessage::AnnounceFile(files) if files[0].tx_ids[0] == expected_tx_id)
);
}
Ok(_) => panic!("Unexpected network message type received"),
@ -876,7 +1214,9 @@ mod tests {
let alice = PeerId::random();
let req_id = (ConnectionId::new(4), SubstreamId(12));
let request = Request::Status(StatusMessage { data: 412 });
let request = Request::Status(StatusMessage {
data: Default::default(),
});
handler.on_rpc_request(alice, req_id, request).await;
match ctx.network_recv.try_recv() {
@ -943,7 +1283,7 @@ mod tests {
handler
.on_rpc_response(
alice,
RequestId::Sync(SyncId::SerialSync { tx_id: id }),
RequestId::Sync(Instant::now(), SyncId::SerialSync { tx_id: id }),
Response::Chunks(data.clone()),
)
.await;
@ -971,7 +1311,10 @@ mod tests {
let alice = PeerId::random();
let id = TxID::random_hash(555);
handler
.on_rpc_error(alice, RequestId::Sync(SyncId::SerialSync { tx_id: id }))
.on_rpc_error(
alice,
RequestId::Sync(Instant::now(), SyncId::SerialSync { tx_id: id }),
)
.await;
match ctx.sync_recv.try_recv() {
@ -994,7 +1337,13 @@ mod tests {
) -> MessageAcceptance {
let (alice, bob) = (PeerId::random(), PeerId::random());
let id = MessageId::new(b"dummy message");
let message = PubsubMessage::FindFile(FindFile { tx_id, timestamp });
let message = PubsubMessage::FindFile(FindFile {
tx_id,
num_shard: 1,
shard_id: 0,
neighbors_only: false,
timestamp,
});
handler.on_pubsub_message(alice, bob, &id, message).await
}
@ -1076,18 +1425,13 @@ mod tests {
let tx_id = TxID::random_hash(412);
// change signed message
let message = match handler
.construct_announce_file_message(tx_id)
let mut file = handler
.construct_announce_file_message(vec![tx_id])
.await
.unwrap()
{
PubsubMessage::AnnounceFile(mut file) => {
let malicious_addr: Multiaddr = "/ip4/127.0.0.38/tcp/30000".parse().unwrap();
file.inner.at = malicious_addr.into();
PubsubMessage::AnnounceFile(file)
}
_ => panic!("Unexpected pubsub message type"),
};
.unwrap();
let malicious_addr: Multiaddr = "/ip4/127.0.0.38/tcp/30000".parse().unwrap();
file.inner.at = malicious_addr.into();
let message = PubsubMessage::AnnounceFile(vec![file]);
// failed to verify signature
let result = handler.on_pubsub_message(alice, bob, &id, message).await;
@ -1103,7 +1447,11 @@ mod tests {
let (alice, bob) = (PeerId::random(), PeerId::random());
let id = MessageId::new(b"dummy message");
let tx = TxID::random_hash(412);
let message = handler.construct_announce_file_message(tx).await.unwrap();
let message = handler
.construct_announce_file_message(vec![tx])
.await
.unwrap();
let message = PubsubMessage::AnnounceFile(vec![message]);
// succeeded to handle
let result = handler.on_pubsub_message(alice, bob, &id, message).await;

View File

@ -0,0 +1,82 @@
use std::sync::Arc;
use metrics::{register_meter, register_meter_with_group, Histogram, Meter, Sample};
lazy_static::lazy_static! {
// service
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE: Arc<dyn Meter> = register_meter("router_service_route_network_message");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_SEND_REQUEST: Arc<dyn Meter> = register_meter("router_service_route_network_message_send_request");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_SEND_RESPONSE: Arc<dyn Meter> = register_meter("router_service_route_network_message_send_response");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_SEND_ERROR_RESPONSE: Arc<dyn Meter> = register_meter("router_service_route_network_message_send_error_response");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_PUBLISH: Arc<dyn Meter> = register_meter("router_service_route_network_message_publish");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_REPORT_PEER: Arc<dyn Meter> = register_meter("router_service_route_network_message_report_peer");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_GOODBYE_PEER: Arc<dyn Meter> = register_meter("router_service_route_network_message_goodbye_peer");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_DAIL_PEER: Arc<dyn Meter> = register_meter_with_group("router_service_route_network_message_dail_peer", "all");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_DAIL_PEER_ALREADY: Arc<dyn Meter> = register_meter_with_group("router_service_route_network_message_dail_peer", "already");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_DAIL_PEER_NEW_OK: Arc<dyn Meter> = register_meter_with_group("router_service_route_network_message_dail_peer", "ok");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_DAIL_PEER_NEW_FAIL: Arc<dyn Meter> = register_meter_with_group("router_service_route_network_message_dail_peer", "fail");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_ANNOUNCE_LOCAL_FILE: Arc<dyn Meter> = register_meter("router_service_route_network_message_announce_local_file");
pub static ref SERVICE_ROUTE_NETWORK_MESSAGE_UPNP: Arc<dyn Meter> = register_meter("router_service_route_network_message_upnp");
pub static ref SERVICE_EXPIRED_PEERS: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register("router_service_expired_peers", 1024);
pub static ref SERVICE_EXPIRED_PEERS_DISCONNECT_OK: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register("router_service_expired_peers_disconnect_ok", 1024);
pub static ref SERVICE_EXPIRED_PEERS_DISCONNECT_FAIL: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register("router_service_expired_peers_disconnect_fail", 1024);
// libp2p_event_handler
// libp2p_event_handler: peer connection
pub static ref LIBP2P_HANDLE_PEER_CONNECTED_OUTGOING: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_peer_connected", "outgoing");
pub static ref LIBP2P_HANDLE_PEER_CONNECTED_INCOMING: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_peer_connected", "incoming");
pub static ref LIBP2P_HANDLE_PEER_DISCONNECTED: Arc<dyn Meter> = register_meter("router_libp2p_handle_peer_disconnected");
// libp2p_event_handler: status
pub static ref LIBP2P_SEND_STATUS: Arc<dyn Meter> = register_meter("router_libp2p_send_status");
pub static ref LIBP2P_HANDLE_STATUS_REQUEST: Arc<dyn Meter> = register_meter("router_libp2p_handle_status_request");
pub static ref LIBP2P_HANDLE_STATUS_RESPONSE: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_status_response", "qps");
pub static ref LIBP2P_HANDLE_STATUS_RESPONSE_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("router_libp2p_handle_status_response", "latency", 1024);
// libp2p_event_handler: get chunks
pub static ref LIBP2P_HANDLE_GET_CHUNKS_REQUEST: Arc<dyn Meter> = register_meter("router_libp2p_handle_get_chunks_request");
pub static ref LIBP2P_HANDLE_GET_CHUNKS_RESPONSE: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_get_chunks_response", "qps");
pub static ref LIBP2P_HANDLE_GET_CHUNKS_RESPONSE_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("router_libp2p_handle_get_chunks_response", "latency", 1024);
// libp2p_event_handler: rpc errors
pub static ref LIBP2P_HANDLE_RESPONSE_ERROR: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_response_error", "qps");
pub static ref LIBP2P_HANDLE_RESPONSE_ERROR_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("router_libp2p_handle_response_error", "latency", 1024);
// libp2p_event_handler: new file
pub static ref LIBP2P_HANDLE_PUBSUB_NEW_FILE: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_new_file", "qps");
pub static ref LIBP2P_HANDLE_PUBSUB_NEW_FILE_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("router_libp2p_handle_pubsub_new_file", "latency", 1024);
pub static ref LIBP2P_HANDLE_PUBSUB_NEW_FILE_TIMEOUT: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_new_file", "timeout");
// libp2p_event_handler: find & announce file
pub static ref LIBP2P_HANDLE_PUBSUB_FIND_FILE: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_find_file", "qps");
pub static ref LIBP2P_HANDLE_PUBSUB_FIND_FILE_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("router_libp2p_handle_pubsub_find_file", "latency", 1024);
pub static ref LIBP2P_HANDLE_PUBSUB_FIND_FILE_TIMEOUT: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_find_file", "timeout");
pub static ref LIBP2P_HANDLE_PUBSUB_FIND_FILE_STORE: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_find_file", "store");
pub static ref LIBP2P_HANDLE_PUBSUB_FIND_FILE_CACHE: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_find_file", "cache");
pub static ref LIBP2P_HANDLE_PUBSUB_FIND_FILE_FORWARD: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_find_file", "forward");
pub static ref LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_announce_file", "qps");
pub static ref LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("router_libp2p_handle_pubsub_announce_file", "latency", 1024);
pub static ref LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE_TIMEOUT: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_announce_file", "timeout");
pub static ref LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE_ANNOUNCEMENTS: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_announce_file", "announcements");
pub static ref LIBP2P_HANDLE_PUBSUB_ANNOUNCE_FILE_FILES: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_announce_file", "files");
// libp2p_event_handler: find & announce chunks
pub static ref LIBP2P_HANDLE_PUBSUB_FIND_CHUNKS: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_find_chunks", "qps");
pub static ref LIBP2P_HANDLE_PUBSUB_FIND_CHUNKS_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("router_libp2p_handle_pubsub_find_chunks", "latency", 1024);
pub static ref LIBP2P_HANDLE_PUBSUB_ANNOUNCE_CHUNKS: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_announce_chunks", "qps");
pub static ref LIBP2P_HANDLE_PUBSUB_ANNOUNCE_CHUNKS_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("router_libp2p_handle_pubsub_announce_chunks", "latency", 1024);
// libp2p_event_handler: announce shard config
pub static ref LIBP2P_HANDLE_PUBSUB_ANNOUNCE_SHARD: Arc<dyn Meter> = register_meter_with_group("router_libp2p_handle_pubsub_announce_shard", "qps");
pub static ref LIBP2P_HANDLE_PUBSUB_ANNOUNCE_SHARD_LATENCY: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register_with_group("router_libp2p_handle_pubsub_announce_shard", "latency", 1024);
// libp2p_event_handler: verify IP address
pub static ref LIBP2P_VERIFY_ANNOUNCED_IP: Arc<dyn Meter> = register_meter("router_libp2p_verify_announced_ip");
pub static ref LIBP2P_VERIFY_ANNOUNCED_IP_UNSEEN: Arc<dyn Meter> = register_meter("router_libp2p_verify_announced_ip_unseen");
pub static ref LIBP2P_VERIFY_ANNOUNCED_IP_MISMATCH: Arc<dyn Meter> = register_meter("router_libp2p_verify_announced_ip_mismatch");
// batcher
pub static ref BATCHER_ANNOUNCE_FILE_SIZE: Arc<dyn Histogram> = Sample::ExpDecay(0.015).register("router_batcher_announce_file_size", 1024);
}

View File

@ -1,3 +1,4 @@
use crate::metrics;
use crate::Config;
use crate::{libp2p_event_handler::Libp2pEventHandler, peer_manager::PeerManager};
use chunk_pool::ChunkPoolMessage;
@ -5,10 +6,11 @@ use file_location_cache::FileLocationCache;
use futures::{channel::mpsc::Sender, prelude::*};
use miner::MinerMessage;
use network::{
BehaviourEvent, Keypair, Libp2pEvent, NetworkGlobals, NetworkMessage, RequestId,
Service as LibP2PService, Swarm,
types::NewFile, BehaviourEvent, Keypair, Libp2pEvent, NetworkGlobals, NetworkMessage,
NetworkReceiver, NetworkSender, PubsubMessage, RequestId, Service as LibP2PService, Swarm,
};
use pruner::PrunerMessage;
use shared_types::timestamp_now;
use std::sync::Arc;
use storage::log_store::Store as LogStore;
use storage_async::Store;
@ -29,7 +31,7 @@ pub struct RouterService {
network_globals: Arc<NetworkGlobals>,
/// The receiver channel for Zgs to communicate with the network service.
network_recv: mpsc::UnboundedReceiver<NetworkMessage>,
network_recv: NetworkReceiver,
/// The receiver channel for Zgs to communicate with the pruner service.
pruner_recv: Option<mpsc::UnboundedReceiver<PrunerMessage>>,
@ -43,6 +45,8 @@ pub struct RouterService {
/// Stores potentially created UPnP mappings to be removed on shutdown. (TCP port and UDP
/// port).
upnp_mappings: (Option<u16>, Option<u16>),
store: Arc<dyn LogStore>,
}
impl RouterService {
@ -51,8 +55,8 @@ impl RouterService {
executor: task_executor::TaskExecutor,
libp2p: LibP2PService<RequestId>,
network_globals: Arc<NetworkGlobals>,
network_recv: mpsc::UnboundedReceiver<NetworkMessage>,
network_send: mpsc::UnboundedSender<NetworkMessage>,
network_recv: NetworkReceiver,
network_send: NetworkSender,
sync_send: SyncSender,
_miner_send: Option<broadcast::Sender<MinerMessage>>,
chunk_pool_send: UnboundedSender<ChunkPoolMessage>,
@ -62,7 +66,6 @@ impl RouterService {
local_keypair: Keypair,
config: Config,
) {
let store = Store::new(store, executor.clone());
let peers = Arc::new(RwLock::new(PeerManager::new(config.clone())));
// create the network service and spawn the task
@ -80,11 +83,12 @@ impl RouterService {
sync_send,
chunk_pool_send,
local_keypair,
store,
Store::new(store.clone(), executor.clone()),
file_location_cache,
peers,
),
upnp_mappings: (None, None),
store,
};
// spawn service
@ -94,7 +98,8 @@ impl RouterService {
}
async fn main(mut self, mut shutdown_sender: Sender<ShutdownReason>) {
let mut heartbeat = interval(self.config.heartbeat_interval);
let mut heartbeat_service = interval(self.config.heartbeat_interval);
let mut heartbeat_batcher = interval(self.config.batcher_timeout);
loop {
tokio::select! {
@ -106,8 +111,11 @@ impl RouterService {
Some(msg) = Self::try_recv(&mut self.pruner_recv) => self.on_pruner_msg(msg).await,
// heartbeat
_ = heartbeat.tick() => self.on_heartbeat().await,
// heartbeat for service
_ = heartbeat_service.tick() => self.on_heartbeat().await,
// heartbeat for expire file batcher
_ = heartbeat_batcher.tick() => self.libp2p_event_handler.expire_batcher().await,
}
}
}
@ -224,6 +232,8 @@ impl RouterService {
) {
trace!(?msg, "Received new message");
metrics::SERVICE_ROUTE_NETWORK_MESSAGE.mark(1);
match msg {
NetworkMessage::SendRequest {
peer_id,
@ -231,6 +241,7 @@ impl RouterService {
request_id,
} => {
self.libp2p.send_request(peer_id, request_id, request);
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_SEND_REQUEST.mark(1);
}
NetworkMessage::SendResponse {
peer_id,
@ -238,6 +249,7 @@ impl RouterService {
id,
} => {
self.libp2p.send_response(peer_id, id, response);
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_SEND_RESPONSE.mark(1);
}
NetworkMessage::SendErrorResponse {
peer_id,
@ -246,6 +258,7 @@ impl RouterService {
reason,
} => {
self.libp2p.respond_with_error(peer_id, id, error, reason);
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_SEND_ERROR_RESPONSE.mark(1);
}
NetworkMessage::Publish { messages } => {
if self.libp2p.swarm.connected_peers().next().is_none() {
@ -257,7 +270,7 @@ impl RouterService {
break;
}
Err(err) => {
debug!(address = %multiaddr, error = ?err, "Could not connect to peer")
debug!(address = %multiaddr, error = ?err, "Could not connect to peer");
}
};
}
@ -275,46 +288,64 @@ impl RouterService {
"Sending pubsub messages",
);
self.libp2p.swarm.behaviour_mut().publish(messages);
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_PUBLISH.mark(1);
}
NetworkMessage::ReportPeer {
peer_id,
action,
source,
msg,
} => self.libp2p.report_peer(&peer_id, action, source, msg),
} => {
self.libp2p.report_peer(&peer_id, action, source, msg);
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_REPORT_PEER.mark(1);
}
NetworkMessage::GoodbyePeer {
peer_id,
reason,
source,
} => self.libp2p.goodbye_peer(&peer_id, reason, source),
} => {
self.libp2p.goodbye_peer(&peer_id, reason, source);
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_GOODBYE_PEER.mark(1);
}
NetworkMessage::DialPeer { address, peer_id } => {
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_DAIL_PEER.mark(1);
if self.libp2p.swarm.is_connected(&peer_id) {
self.libp2p_event_handler
.send_to_sync(SyncMessage::PeerConnected { peer_id });
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_DAIL_PEER_ALREADY.mark(1);
} else {
match Swarm::dial(&mut self.libp2p.swarm, address.clone()) {
Ok(()) => debug!(%address, "Dialing libp2p peer"),
Ok(()) => {
debug!(%address, "Dialing libp2p peer");
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_DAIL_PEER_NEW_OK.mark(1);
}
Err(err) => {
info!(%address, error = ?err, "Failed to dial peer");
self.libp2p_event_handler
.send_to_sync(SyncMessage::DailFailed { peer_id, err });
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_DAIL_PEER_NEW_FAIL.mark(1);
}
};
}
}
NetworkMessage::AnnounceLocalFile { tx_id } => {
if let Some(msg) = self
.libp2p_event_handler
.construct_announce_file_message(tx_id)
.await
{
self.libp2p_event_handler.publish(msg);
}
let shard_config = self.store.get_shard_config();
let msg = PubsubMessage::NewFile(NewFile {
tx_id,
num_shard: shard_config.num_shard,
shard_id: shard_config.shard_id,
timestamp: timestamp_now(),
});
self.libp2p.swarm.behaviour_mut().publish(vec![msg]);
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_ANNOUNCE_LOCAL_FILE.mark(1);
}
NetworkMessage::UPnPMappingEstablished {
tcp_socket,
udp_socket,
} => {
metrics::SERVICE_ROUTE_NETWORK_MESSAGE_UPNP.mark(1);
self.upnp_mappings = (tcp_socket.map(|s| s.port()), udp_socket.map(|s| s.port()));
// If there is an external TCP port update, modify our local ENR.
if let Some(tcp_socket) = tcp_socket {
@ -362,16 +393,30 @@ impl RouterService {
async fn on_heartbeat(&mut self) {
let expired_peers = self.peers.write().await.expired_peers();
trace!("heartbeat, expired peers = {:?}", expired_peers.len());
let num_expired_peers = expired_peers.len() as u64;
metrics::SERVICE_EXPIRED_PEERS.update(num_expired_peers);
if num_expired_peers > 0 {
debug!(%num_expired_peers, "Heartbeat, remove expired peers")
}
let mut num_succeeded = 0;
let mut num_failed = 0;
for peer_id in expired_peers {
// async operation, once peer disconnected, swarm event `PeerDisconnected`
// will be polled to handle in advance.
match self.libp2p.swarm.disconnect_peer_id(peer_id) {
Ok(_) => debug!(%peer_id, "Peer expired and disconnect it"),
Err(_) => error!(%peer_id, "Peer expired but failed to disconnect"),
Ok(_) => {
debug!(%peer_id, "Peer expired and disconnect it");
num_succeeded += 1;
}
Err(_) => {
debug!(%peer_id, "Peer expired but failed to disconnect");
num_failed += 1;
}
}
}
metrics::SERVICE_EXPIRED_PEERS_DISCONNECT_OK.update(num_succeeded);
metrics::SERVICE_EXPIRED_PEERS_DISCONNECT_FAIL.update(num_failed);
}
}

View File

@ -26,3 +26,4 @@ storage-async = { path = "../storage-async" }
merkle_light = { path = "../../common/merkle_light" }
merkle_tree = { path = "../../common/merkle_tree"}
futures-channel = "^0.3"
metrics = { workspace = true }

View File

@ -1,8 +1,8 @@
use crate::types::{LocationInfo, NetworkInfo, PeerInfo};
use jsonrpsee::core::RpcResult;
use jsonrpsee::proc_macros::rpc;
use std::collections::HashMap;
use sync::FileSyncInfo;
use std::collections::{BTreeMap, HashMap};
use sync::{FileSyncInfo, SyncServiceState};
#[rpc(server, client, namespace = "admin")]
pub trait Rpc {
@ -27,6 +27,9 @@ pub trait Rpc {
#[method(name = "terminateSync")]
async fn terminate_sync(&self, tx_seq: u64) -> RpcResult<bool>;
#[method(name = "getSyncServiceState")]
async fn get_sync_service_state(&self) -> RpcResult<SyncServiceState>;
#[method(name = "getSyncStatus")]
async fn get_sync_status(&self, tx_seq: u64) -> RpcResult<String>;
@ -40,5 +43,15 @@ pub trait Rpc {
async fn get_peers(&self) -> RpcResult<HashMap<String, PeerInfo>>;
#[method(name = "getFileLocation")]
async fn get_file_location(&self, tx_seq: u64) -> RpcResult<Option<Vec<LocationInfo>>>;
async fn get_file_location(
&self,
tx_seq: u64,
all_shards: bool,
) -> RpcResult<Option<Vec<LocationInfo>>>;
#[method(name = "getMetrics")]
async fn get_metrics(
&self,
maybe_prefix: Option<String>,
) -> RpcResult<BTreeMap<String, String>>;
}

View File

@ -4,11 +4,12 @@ use crate::{error, Context};
use futures::prelude::*;
use jsonrpsee::core::async_trait;
use jsonrpsee::core::RpcResult;
use metrics::{DEFAULT_GROUPING_REGISTRY, DEFAULT_REGISTRY};
use network::{multiaddr::Protocol, Multiaddr};
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use std::net::IpAddr;
use storage::config::all_shards_available;
use sync::{FileSyncInfo, SyncRequest, SyncResponse};
use sync::{FileSyncInfo, SyncRequest, SyncResponse, SyncServiceState};
use task_executor::ShutdownReason;
pub struct RpcServerImpl {
@ -119,6 +120,17 @@ impl RpcServer for RpcServerImpl {
}
}
async fn get_sync_service_state(&self) -> RpcResult<SyncServiceState> {
info!("admin_getSyncServiceState()");
let response = self.ctx.request_sync(SyncRequest::SyncState).await?;
match response {
SyncResponse::SyncState { state } => Ok(state),
_ => Err(error::internal_error("unexpected response type")),
}
}
#[tracing::instrument(skip(self), err)]
async fn get_sync_status(&self, tx_seq: u64) -> RpcResult<String> {
info!("admin_getSyncStatus({tx_seq})");
@ -185,7 +197,13 @@ impl RpcServer for RpcServerImpl {
.collect())
}
async fn get_file_location(&self, tx_seq: u64) -> RpcResult<Option<Vec<LocationInfo>>> {
async fn get_file_location(
&self,
tx_seq: u64,
all_shards: bool,
) -> RpcResult<Option<Vec<LocationInfo>>> {
info!("admin_getFileLocation()");
let tx = match self.ctx.log_store.get_tx_by_seq_number(tx_seq).await? {
Some(tx) => tx,
None => {
@ -221,10 +239,48 @@ impl RpcServer for RpcServerImpl {
shard_config: shard_config.unwrap(),
})
.collect();
if all_shards_available(info.iter().map(|info| info.shard_config).collect()) {
if !all_shards || all_shards_available(info.iter().map(|info| info.shard_config).collect())
{
Ok(Some(info))
} else {
Ok(None)
}
}
async fn get_metrics(
&self,
maybe_prefix: Option<String>,
) -> RpcResult<BTreeMap<String, String>> {
let mut result = BTreeMap::new();
for (name, metric) in DEFAULT_REGISTRY.read().get_all() {
match &maybe_prefix {
Some(prefix) if !name.starts_with(prefix) => {}
_ => {
result.insert(
name.clone(),
format!("{} {}", metric.get_type(), metric.get_value()),
);
}
}
}
for (group_name, metrics) in DEFAULT_GROUPING_REGISTRY.read().get_all() {
for (metric_name, metric) in metrics.iter() {
let name = format!("{}.{}", group_name, metric_name);
match &maybe_prefix {
Some(prefix) if !name.starts_with(prefix) => {}
_ => {
result.insert(
name,
format!("{} {}", metric.get_type(), metric.get_value()),
);
}
}
}
}
Ok(result)
}
}

View File

@ -1,11 +1,27 @@
use std::net::SocketAddr;
use std::{net::SocketAddr, str::FromStr};
#[derive(Clone)]
use serde::{Deserialize, Serialize};
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
#[serde(default)]
pub struct Config {
pub enabled: bool,
pub listen_address: SocketAddr,
pub listen_address_admin: Option<SocketAddr>,
pub listen_address_admin: SocketAddr,
pub chunks_per_segment: usize,
pub max_request_body_size: u32,
pub max_cache_file_size: usize,
}
impl Default for Config {
fn default() -> Self {
Self {
enabled: true,
listen_address: SocketAddr::from_str("0.0.0.0:5678").unwrap(),
listen_address_admin: SocketAddr::from_str("127.0.0.1:5679").unwrap(),
chunks_per_segment: 1024,
max_request_body_size: 100 * 1024 * 1024, // 100MB
max_cache_file_size: 10 * 1024 * 1024, // 10MB
}
}
}

View File

@ -17,16 +17,13 @@ use file_location_cache::FileLocationCache;
use futures::channel::mpsc::Sender;
use jsonrpsee::core::RpcResult;
use jsonrpsee::http_server::{HttpServerBuilder, HttpServerHandle};
use network::NetworkGlobals;
use network::NetworkMessage;
use network::{NetworkGlobals, NetworkMessage, NetworkSender};
use std::error::Error;
use std::net::SocketAddr;
use std::sync::Arc;
use storage_async::Store;
use sync::{SyncRequest, SyncResponse, SyncSender};
use task_executor::ShutdownReason;
use tokio::sync::broadcast;
use tokio::sync::mpsc::UnboundedSender;
use zgs::RpcServer as ZgsRpcServer;
use zgs_miner::MinerMessage;
@ -43,7 +40,7 @@ pub struct Context {
pub config: RPCConfig,
pub file_location_cache: Arc<FileLocationCache>,
pub network_globals: Arc<NetworkGlobals>,
pub network_send: UnboundedSender<NetworkMessage>,
pub network_send: NetworkSender,
pub sync_send: SyncSender,
pub chunk_pool: Arc<MemoryChunkPool>,
pub log_store: Arc<Store>,
@ -69,9 +66,10 @@ impl Context {
pub async fn run_server(
ctx: Context,
) -> Result<(HttpServerHandle, Option<HttpServerHandle>), Box<dyn Error>> {
let handles = match ctx.config.listen_address_admin {
Some(listen_addr_private) => run_server_public_private(ctx, listen_addr_private).await?,
None => (run_server_all(ctx).await?, None),
let handles = if ctx.config.listen_address.port() != ctx.config.listen_address_admin.port() {
run_server_public_private(ctx).await?
} else {
(run_server_all(ctx).await?, None)
};
info!("Server started");
@ -107,7 +105,6 @@ async fn run_server_all(ctx: Context) -> Result<HttpServerHandle, Box<dyn Error>
/// Run 2 RPC servers (public & private) for different namespace RPCs.
async fn run_server_public_private(
ctx: Context,
listen_addr_private: SocketAddr,
) -> Result<(HttpServerHandle, Option<HttpServerHandle>), Box<dyn Error>> {
// public rpc
let zgs = (zgs::RpcServerImpl { ctx: ctx.clone() }).into_rpc();
@ -127,7 +124,7 @@ async fn run_server_public_private(
.start(zgs)?;
let handle_private = server_builder(ctx.clone())
.build(listen_addr_private)
.build(ctx.config.listen_address_admin)
.await?
.start(admin)?;

View File

@ -7,7 +7,8 @@ use merkle_tree::RawLeafSha3Algorithm;
use network::Multiaddr;
use serde::{Deserialize, Serialize};
use shared_types::{
compute_padded_chunk_size, compute_segment_size, DataRoot, FileProof, Transaction, CHUNK_SIZE,
compute_padded_chunk_size, compute_segment_size, DataRoot, FileProof, NetworkIdentity,
Transaction, CHUNK_SIZE,
};
use std::collections::HashSet;
use std::hash::Hasher;
@ -28,6 +29,8 @@ pub struct Status {
pub connected_peers: usize,
pub log_sync_height: u64,
pub log_sync_block: H256,
pub next_tx_seq: u64,
pub network_identity: NetworkIdentity,
}
#[derive(Serialize, Deserialize)]
@ -50,6 +53,8 @@ pub struct FileInfo {
pub finalized: bool,
pub is_cached: bool,
pub uploaded_seg_num: usize,
/// Whether file is pruned, in which case `finalized` will be `false`.
pub pruned: bool,
}
#[derive(Debug, Serialize, Deserialize)]

View File

@ -1,8 +1,8 @@
use crate::types::{FileInfo, Segment, SegmentWithProof, Status};
use jsonrpsee::core::RpcResult;
use jsonrpsee::proc_macros::rpc;
use shared_types::{DataRoot, FlowProof};
use storage::config::ShardConfig;
use shared_types::{DataRoot, FlowProof, TxSeqOrRoot};
use storage::{config::ShardConfig, H256};
#[rpc(server, client, namespace = "zgs")]
pub trait Rpc {
@ -12,9 +12,23 @@ pub trait Rpc {
#[method(name = "uploadSegment")]
async fn upload_segment(&self, segment: SegmentWithProof) -> RpcResult<()>;
#[method(name = "uploadSegmentByTxSeq")]
async fn upload_segment_by_tx_seq(
&self,
segment: SegmentWithProof,
tx_seq: u64,
) -> RpcResult<()>;
#[method(name = "uploadSegments")]
async fn upload_segments(&self, segments: Vec<SegmentWithProof>) -> RpcResult<()>;
#[method(name = "uploadSegmentsByTxSeq")]
async fn upload_segments_by_tx_seq(
&self,
segments: Vec<SegmentWithProof>,
tx_seq: u64,
) -> RpcResult<()>;
#[method(name = "downloadSegment")]
async fn download_segment(
&self,
@ -23,6 +37,14 @@ pub trait Rpc {
end_index: usize,
) -> RpcResult<Option<Segment>>;
#[method(name = "downloadSegmentByTxSeq")]
async fn download_segment_by_tx_seq(
&self,
tx_seq: u64,
start_index: usize,
end_index: usize,
) -> RpcResult<Option<Segment>>;
#[method(name = "downloadSegmentWithProof")]
async fn download_segment_with_proof(
&self,
@ -30,6 +52,16 @@ pub trait Rpc {
index: usize,
) -> RpcResult<Option<SegmentWithProof>>;
#[method(name = "downloadSegmentWithProofByTxSeq")]
async fn download_segment_with_proof_by_tx_seq(
&self,
tx_seq: u64,
index: usize,
) -> RpcResult<Option<SegmentWithProof>>;
#[method(name = "checkFileFinalized")]
async fn check_file_finalized(&self, tx_seq_or_root: TxSeqOrRoot) -> RpcResult<Option<bool>>;
#[method(name = "getFileInfo")]
async fn get_file_info(&self, data_root: DataRoot) -> RpcResult<Option<FileInfo>>;
@ -45,4 +77,7 @@ pub trait Rpc {
sector_index: u64,
flow_root: Option<DataRoot>,
) -> RpcResult<FlowProof>;
#[method(name = "getFlowContext")]
async fn get_flow_context(&self) -> RpcResult<(H256, u64)>;
}

View File

@ -5,10 +5,11 @@ use crate::Context;
use chunk_pool::{FileID, SegmentInfo};
use jsonrpsee::core::async_trait;
use jsonrpsee::core::RpcResult;
use shared_types::{DataRoot, FlowProof, Transaction, CHUNK_SIZE};
use shared_types::{DataRoot, FlowProof, Transaction, TxSeqOrRoot, CHUNK_SIZE};
use std::fmt::{Debug, Formatter, Result};
use storage::config::ShardConfig;
use storage::try_option;
use storage::log_store::tx_store::TxStatus;
use storage::{try_option, H256};
pub struct RpcServerImpl {
pub ctx: Context,
@ -26,10 +27,14 @@ impl RpcServer for RpcServerImpl {
.get_sync_progress()?
.unwrap_or_default();
let next_tx_seq = self.ctx.log_store.get_store().next_tx_seq();
Ok(Status {
connected_peers: self.ctx.network_globals.connected_peers(),
log_sync_height: sync_progress.0,
log_sync_block: sync_progress.1,
next_tx_seq,
network_identity: self.ctx.network_globals.network_id(),
})
}
@ -38,6 +43,16 @@ impl RpcServer for RpcServerImpl {
self.put_segment(segment).await
}
async fn upload_segment_by_tx_seq(
&self,
segment: SegmentWithProof,
tx_seq: u64,
) -> RpcResult<()> {
info!(tx_seq = %tx_seq, index = %segment.index, "zgs_uploadSegmentByTxSeq");
let maybe_tx = self.ctx.log_store.get_tx_by_seq_number(tx_seq).await?;
self.put_segment_with_maybe_tx(segment, maybe_tx).await
}
async fn upload_segments(&self, segments: Vec<SegmentWithProof>) -> RpcResult<()> {
let root = match segments.first() {
None => return Ok(()),
@ -53,6 +68,23 @@ impl RpcServer for RpcServerImpl {
Ok(())
}
async fn upload_segments_by_tx_seq(
&self,
segments: Vec<SegmentWithProof>,
tx_seq: u64,
) -> RpcResult<()> {
let indices = SegmentIndexArray::new(&segments);
info!(%tx_seq, ?indices, "zgs_uploadSegmentsByTxSeq");
let maybe_tx = self.ctx.log_store.get_tx_by_seq_number(tx_seq).await?;
for segment in segments.into_iter() {
self.put_segment_with_maybe_tx(segment, maybe_tx.clone())
.await?;
}
Ok(())
}
async fn download_segment(
&self,
data_root: DataRoot,
@ -61,34 +93,26 @@ impl RpcServer for RpcServerImpl {
) -> RpcResult<Option<Segment>> {
info!(%data_root, %start_index, %end_index, "zgs_downloadSegment");
if start_index >= end_index {
return Err(error::invalid_params("end_index", "invalid chunk index"));
}
if end_index - start_index > self.ctx.config.chunks_per_segment {
return Err(error::invalid_params(
"end_index",
format!(
"exceeds maximum chunks {}",
self.ctx.config.chunks_per_segment
),
));
}
let tx_seq = try_option!(
self.ctx
.log_store
.get_tx_seq_by_data_root(&data_root)
.await?
);
let segment = try_option!(
self.ctx
.log_store
.get_chunks_by_tx_and_index_range(tx_seq, start_index, end_index)
.await?
);
Ok(Some(Segment(segment.data)))
self.get_segment_by_tx_seq(tx_seq, start_index, end_index)
.await
}
async fn download_segment_by_tx_seq(
&self,
tx_seq: u64,
start_index: usize,
end_index: usize,
) -> RpcResult<Option<Segment>> {
info!(%tx_seq, %start_index, %end_index, "zgs_downloadSegmentByTxSeq");
self.get_segment_by_tx_seq(tx_seq, start_index, end_index)
.await
}
async fn download_segment_with_proof(
@ -100,40 +124,44 @@ impl RpcServer for RpcServerImpl {
let tx = try_option!(self.ctx.log_store.get_tx_by_data_root(&data_root).await?);
// validate index
let chunks_per_segment = self.ctx.config.chunks_per_segment;
let (num_segments, last_segment_size) =
SegmentWithProof::split_file_into_segments(tx.size as usize, chunks_per_segment)?;
self.get_segment_with_proof_by_tx(tx, index).await
}
if index >= num_segments {
return Err(error::invalid_params("index", "index out of bound"));
}
async fn download_segment_with_proof_by_tx_seq(
&self,
tx_seq: u64,
index: usize,
) -> RpcResult<Option<SegmentWithProof>> {
info!(%tx_seq, %index, "zgs_downloadSegmentWithProofByTxSeq");
// calculate chunk start and end index
let start_index = index * chunks_per_segment;
let end_index = if index == num_segments - 1 {
// last segment without padding chunks by flow
start_index + last_segment_size / CHUNK_SIZE
} else {
start_index + chunks_per_segment
let tx = try_option!(self.ctx.log_store.get_tx_by_seq_number(tx_seq).await?);
self.get_segment_with_proof_by_tx(tx, index).await
}
async fn check_file_finalized(&self, tx_seq_or_root: TxSeqOrRoot) -> RpcResult<Option<bool>> {
debug!(?tx_seq_or_root, "zgs_checkFileFinalized");
let seq = match tx_seq_or_root {
TxSeqOrRoot::TxSeq(v) => v,
TxSeqOrRoot::Root(v) => {
try_option!(self.ctx.log_store.get_tx_seq_by_data_root(&v).await?)
}
};
let segment = try_option!(
self.ctx
.log_store
.get_chunks_with_proof_by_tx_and_index_range(tx.seq, start_index, end_index, None)
.await?
);
let proof = tx.compute_segment_proof(&segment, chunks_per_segment)?;
Ok(Some(SegmentWithProof {
root: data_root,
data: segment.chunks.data,
index,
proof,
file_size: tx.size as usize,
}))
if self.ctx.log_store.check_tx_completed(seq).await? {
Ok(Some(true))
} else if self
.ctx
.log_store
.get_tx_by_seq_number(seq)
.await?
.is_some()
{
Ok(Some(false))
} else {
Ok(None)
}
}
async fn get_file_info(&self, data_root: DataRoot) -> RpcResult<Option<FileInfo>> {
@ -154,7 +182,7 @@ impl RpcServer for RpcServerImpl {
async fn get_shard_config(&self) -> RpcResult<ShardConfig> {
debug!("zgs_getShardConfig");
let shard_config = self.ctx.log_store.get_store().flow().get_shard_config();
let shard_config = self.ctx.log_store.get_store().get_shard_config();
Ok(shard_config)
}
@ -171,6 +199,10 @@ impl RpcServer for RpcServerImpl {
assert_eq!(proof.left_proof, proof.right_proof);
Ok(proof.right_proof)
}
async fn get_flow_context(&self) -> RpcResult<(H256, u64)> {
Ok(self.ctx.log_store.get_context().await?)
}
}
impl RpcServerImpl {
@ -195,6 +227,10 @@ impl RpcServerImpl {
));
}
if self.ctx.log_store.check_tx_pruned(tx.seq).await? {
return Err(error::invalid_params("root", "already pruned"));
}
Ok(false)
} else {
//Check whether file is small enough to cache in the system
@ -210,7 +246,17 @@ impl RpcServerImpl {
}
async fn get_file_info_by_tx(&self, tx: Transaction) -> RpcResult<FileInfo> {
let finalized = self.ctx.log_store.check_tx_completed(tx.seq).await?;
let (finalized, pruned) = match self
.ctx
.log_store
.get_store()
.get_tx_status(TxSeqOrRoot::TxSeq(tx.seq))?
{
Some(TxStatus::Finalized) => (true, false),
Some(TxStatus::Pruned) => (false, true),
None => (false, false),
};
let (uploaded_seg_num, is_cached) = match self
.ctx
.chunk_pool
@ -219,7 +265,7 @@ impl RpcServerImpl {
{
Some(v) => v,
_ => (
if finalized {
if finalized || pruned {
let chunks_per_segment = self.ctx.config.chunks_per_segment;
let (num_segments, _) = SegmentWithProof::split_file_into_segments(
tx.size as usize,
@ -238,21 +284,36 @@ impl RpcServerImpl {
finalized,
is_cached,
uploaded_seg_num,
pruned,
})
}
async fn put_segment(&self, segment: SegmentWithProof) -> RpcResult<()> {
debug!(root = %segment.root, index = %segment.index, "putSegment");
self.ctx.chunk_pool.validate_segment_size(&segment.data)?;
let maybe_tx = self
.ctx
.log_store
.get_tx_by_data_root(&segment.root)
.await?;
let mut need_cache = false;
self.put_segment_with_maybe_tx(segment, maybe_tx).await
}
async fn put_segment_with_maybe_tx(
&self,
segment: SegmentWithProof,
maybe_tx: Option<Transaction>,
) -> RpcResult<()> {
self.ctx.chunk_pool.validate_segment_size(&segment.data)?;
if let Some(tx) = &maybe_tx {
if tx.data_merkle_root != segment.root {
return Err(error::internal_error("data root and tx seq not match"));
}
}
let mut need_cache = false;
if self
.ctx
.chunk_pool
@ -290,6 +351,77 @@ impl RpcServerImpl {
}
Ok(())
}
async fn get_segment_by_tx_seq(
&self,
tx_seq: u64,
start_index: usize,
end_index: usize,
) -> RpcResult<Option<Segment>> {
if start_index >= end_index {
return Err(error::invalid_params("end_index", "invalid chunk index"));
}
if end_index - start_index > self.ctx.config.chunks_per_segment {
return Err(error::invalid_params(
"end_index",
format!(
"exceeds maximum chunks {}",
self.ctx.config.chunks_per_segment
),
));
}
let segment = try_option!(
self.ctx
.log_store
.get_chunks_by_tx_and_index_range(tx_seq, start_index, end_index)
.await?
);
Ok(Some(Segment(segment.data)))
}
async fn get_segment_with_proof_by_tx(
&self,
tx: Transaction,
index: usize,
) -> RpcResult<Option<SegmentWithProof>> {
// validate index
let chunks_per_segment = self.ctx.config.chunks_per_segment;
let (num_segments, last_segment_size) =
SegmentWithProof::split_file_into_segments(tx.size as usize, chunks_per_segment)?;
if index >= num_segments {
return Err(error::invalid_params("index", "index out of bound"));
}
// calculate chunk start and end index
let start_index = index * chunks_per_segment;
let end_index = if index == num_segments - 1 {
// last segment without padding chunks by flow
start_index + last_segment_size / CHUNK_SIZE
} else {
start_index + chunks_per_segment
};
let segment = try_option!(
self.ctx
.log_store
.get_chunks_with_proof_by_tx_and_index_range(tx.seq, start_index, end_index, None)
.await?
);
let proof = tx.compute_segment_proof(&segment, chunks_per_segment)?;
Ok(Some(SegmentWithProof {
root: tx.data_merkle_root,
data: segment.chunks.data,
index,
proof,
file_size: tx.size as usize,
}))
}
}
enum SegmentIndex {

View File

@ -18,3 +18,6 @@ tracing = "0.1.35"
typenum = "1.15.0"
serde = { version = "1.0.137", features = ["derive"] }
chrono = "0.4.19"
[dev-dependencies]
serde_json = "1.0.82"

View File

@ -4,16 +4,18 @@ use anyhow::{anyhow, bail, Error};
use append_merkle::{
AppendMerkleTree, Proof as RawProof, RangeProof as RawRangeProof, Sha3Algorithm,
};
use ethereum_types::{H256, U256};
use ethereum_types::{Address, H256, U256};
use merkle_light::merkle::MerkleTree;
use merkle_light::proof::Proof as RawFileProof;
use merkle_light::{hash::Algorithm, merkle::next_pow2};
use merkle_tree::RawLeafSha3Algorithm;
use serde::de::Visitor;
use serde::{Deserialize, Serialize};
use ssz::Encode;
use ssz_derive::{Decode as DeriveDecode, Encode as DeriveEncode};
use std::fmt;
use std::hash::Hasher;
use std::str::FromStr;
use tiny_keccak::{Hasher as KeccakHasher, Keccak};
use tracing::debug;
@ -111,12 +113,16 @@ impl Transaction {
1 << (depth - 1)
}
pub fn num_entries(&self) -> usize {
self.merkle_nodes.iter().fold(0, |size, &(depth, _)| {
pub fn num_entries_of_list(merkle_nodes: &[(usize, DataRoot)]) -> usize {
merkle_nodes.iter().fold(0, |size, &(depth, _)| {
size + Transaction::num_entries_of_node(depth)
})
}
pub fn num_entries(&self) -> usize {
Self::num_entries_of_list(&self.merkle_nodes)
}
pub fn hash(&self) -> H256 {
let bytes = self.as_ssz_bytes();
let mut h = Keccak::v256();
@ -362,7 +368,115 @@ impl TryFrom<FileProof> for FlowProof {
if lemma.len() != value.path.len() + 2 {
Err(anyhow!("invalid file proof"))
} else {
Ok(Self::new(lemma, value.path))
Self::new(lemma, value.path)
}
}
}
#[derive(
DeriveEncode, DeriveDecode, Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize,
)]
#[serde(rename_all = "camelCase")]
pub struct NetworkIdentity {
/// The chain id of the blockchain network.
pub chain_id: u64,
/// The address of the deployed Flow contract on the blockchain.
pub flow_address: Address,
/// P2P network protocol version.
pub p2p_protocol_version: ProtocolVersion,
}
#[derive(
DeriveEncode, DeriveDecode, Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize,
)]
#[serde(rename_all = "camelCase")]
pub struct ProtocolVersion {
pub major: u8,
pub minor: u8,
pub build: u8,
}
#[derive(Debug)]
pub enum TxSeqOrRoot {
TxSeq(u64),
Root(DataRoot),
}
impl Serialize for TxSeqOrRoot {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
TxSeqOrRoot::TxSeq(seq) => seq.serialize(serializer),
TxSeqOrRoot::Root(root) => root.serialize(serializer),
}
}
}
impl<'a> Deserialize<'a> for TxSeqOrRoot {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'a>,
{
deserializer.deserialize_any(TxSeqOrRootVisitor)
}
}
struct TxSeqOrRootVisitor;
impl<'a> Visitor<'a> for TxSeqOrRootVisitor {
type Value = TxSeqOrRoot;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "an u64 integer or a hex64 value")
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(TxSeqOrRoot::TxSeq(v))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let root: H256 = H256::from_str(v).map_err(E::custom)?;
Ok(TxSeqOrRoot::Root(root))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tx_seq_or_root_serde() {
// serialize tx seq
let tx_seq = TxSeqOrRoot::TxSeq(666);
assert_eq!(serde_json::to_string(&tx_seq).unwrap(), "666".to_string());
// serialize root
let hash_str = "0xa906f46f8b9f15908dbee7adc5492ff30779c3abe114ccdb7079ecdcb72eb855";
let hash_quoted = format!("\"{}\"", hash_str);
let hash = H256::from_str(hash_str).unwrap();
let root = TxSeqOrRoot::Root(hash);
assert_eq!(serde_json::to_string(&root).unwrap(), hash_quoted);
// deserialize tx seq
assert!(matches!(
serde_json::from_str::<TxSeqOrRoot>("777").unwrap(),
TxSeqOrRoot::TxSeq(777)
));
// deserialize root
assert!(matches!(
serde_json::from_str::<TxSeqOrRoot>(hash_quoted.as_str()).unwrap(),
TxSeqOrRoot::Root(v) if v == hash,
));
}
}

View File

@ -1,12 +1,14 @@
use clap::{arg, command, Command};
pub fn cli_app<'a>() -> Command<'a> {
pub fn cli_app() -> Command {
command!()
.arg(arg!(-c --config <FILE> "Sets a custom config file"))
.arg(arg!(--"log-config-file" [FILE] "Sets log configuration file (Default: log_config)"))
.arg(arg!(--"miner-key" [KEY] "Sets miner private key (Default: None)"))
.arg(
arg!(--"blockchain-rpc-endpoint" [URL] "Sets blockchain RPC endpoint (Default: http://127.0.0.1:8545)")
)
.arg(arg!(--"db-max-num-chunks" [NUM] "Sets the max number of chunks to store in db (Default: None)"))
.allow_external_subcommands(true)
.version(zgs_version::VERSION)
}

View File

@ -2,10 +2,10 @@ use super::{Client, RuntimeContext};
use chunk_pool::{ChunkPoolMessage, Config as ChunkPoolConfig, MemoryChunkPool};
use file_location_cache::FileLocationCache;
use log_entry_sync::{LogSyncConfig, LogSyncEvent, LogSyncManager};
use miner::{MineService, MinerConfig, MinerMessage};
use miner::{MineService, MinerConfig, MinerMessage, ShardConfig};
use network::{
self, Keypair, NetworkConfig, NetworkGlobals, NetworkMessage, RequestId,
Service as LibP2PService,
self, new_network_channel, Keypair, NetworkConfig, NetworkGlobals, NetworkReceiver,
NetworkSender, RequestId, Service as LibP2PService,
};
use pruner::{Pruner, PrunerConfig, PrunerMessage};
use router::RouterService;
@ -27,15 +27,12 @@ macro_rules! require {
}
struct NetworkComponents {
send: mpsc::UnboundedSender<NetworkMessage>,
send: NetworkSender,
globals: Arc<NetworkGlobals>,
keypair: Keypair,
// note: these will be owned by the router service
owned: Option<(
LibP2PService<RequestId>,
mpsc::UnboundedReceiver<NetworkMessage>,
)>,
owned: Option<(LibP2PService<RequestId>, NetworkReceiver)>,
}
struct SyncComponents {
@ -110,8 +107,12 @@ impl ClientBuilder {
/// Initializes RocksDB storage.
pub fn with_rocksdb_store(mut self, config: &StorageConfig) -> Result<Self, String> {
let store = Arc::new(
LogManager::rocksdb(LogConfig::default(), &config.db_dir)
.map_err(|e| format!("Unable to start RocksDB store: {:?}", e))?,
LogManager::rocksdb(
config.log_config.clone(),
config.db_dir.join("flow_db"),
config.db_dir.join("data_db"),
)
.map_err(|e| format!("Unable to start RocksDB store: {:?}", e))?,
);
self.store = Some(store.clone());
@ -140,7 +141,7 @@ impl ClientBuilder {
let service_context = network::Context { config };
// construct communication channel
let (send, recv) = mpsc::unbounded_channel::<NetworkMessage>();
let (send, recv) = new_network_channel();
// launch libp2p service
let (globals, keypair, libp2p) =
@ -214,6 +215,16 @@ impl ClientBuilder {
Ok(self)
}
pub async fn with_shard(self, config: ShardConfig) -> Result<Self, String> {
self.async_store
.as_ref()
.unwrap()
.update_shard_config(config)
.await;
Ok(self)
}
/// Starts the networking stack.
pub fn with_router(mut self, router_config: router::Config) -> Result<Self, String> {
let executor = require!("router", self, runtime_context).clone().executor;

View File

@ -48,7 +48,7 @@ macro_rules! build_config{
let mut config = RawConfiguration::default();
// read from config file
if let Some(config_file) = matches.value_of("config") {
if let Some(config_file) = matches.get_one::<String>("config") {
let config_value = std::fs::read_to_string(config_file)
.map_err(|e| format!("failed to read configuration file: {:?}", e))?
.parse::<toml::Value>()
@ -67,7 +67,7 @@ macro_rules! build_config{
// read from command line
$(
#[allow(unused_variables)]
if let Some(value) = matches.value_of(underscore_to_hyphen!(stringify!($name))) {
if let Some(value) = matches.get_one::<String>(&underscore_to_hyphen!(stringify!($name))) {
if_not_vector!($($type)+, THEN {
config.$name = if_option!($($type)+,
THEN{ Some(value.parse().map_err(|_| concat!("Invalid ", stringify!($name)).to_owned())?) }

View File

@ -2,14 +2,17 @@
use crate::ZgsConfig;
use ethereum_types::{H256, U256};
use ethers::prelude::{Http, Middleware, Provider};
use log_entry_sync::{CacheConfig, ContractAddress, LogSyncConfig};
use miner::MinerConfig;
use network::NetworkConfig;
use network::{EnrExt, NetworkConfig};
use pruner::PrunerConfig;
use rpc::RPCConfig;
use shared_types::{NetworkIdentity, ProtocolVersion};
use std::net::IpAddr;
use std::sync::Arc;
use std::time::Duration;
use storage::config::ShardConfig;
use storage::log_store::log_manager::LogConfig;
use storage::StorageConfig;
impl ZgsConfig {
@ -25,6 +28,27 @@ impl ZgsConfig {
network_config.libp2p_port = self.network_libp2p_port;
network_config.disable_discovery = self.network_disable_discovery;
network_config.discovery_port = self.network_discovery_port;
let flow_address = self
.log_contract_address
.parse::<ContractAddress>()
.map_err(|e| format!("Unable to parse log_contract_address: {:?}", e))?;
let provider = Provider::<Http>::try_from(&self.blockchain_rpc_endpoint)
.map_err(|e| format!("Can not parse blockchain endpoint: {:?}", e))?;
let chain_id = provider
.get_chainid()
.await
.map_err(|e| format!("Unable to get chain id: {:?}", e))?
.as_u64();
let local_network_id = NetworkIdentity {
chain_id,
flow_address,
p2p_protocol_version: ProtocolVersion {
major: network::PROTOCOL_VERSION[0],
minor: network::PROTOCOL_VERSION[1],
build: network::PROTOCOL_VERSION[2],
},
};
network_config.network_id = local_network_id.clone();
if !self.network_disable_discovery {
network_config.enr_tcp_port = Some(self.network_enr_tcp_port);
@ -60,7 +84,13 @@ impl ZgsConfig {
.collect::<Result<_, _>>()
.map_err(|e| format!("Unable to parse network_libp2p_nodes: {:?}", e))?;
network_config.discv5_config.table_filter = |_| true;
network_config.discv5_config.table_filter = if self.discv5_disable_enr_network_id {
Arc::new(|_| true)
} else {
Arc::new(
move |enr| matches!(enr.network_identity(), Some(Ok(id)) if id == local_network_id),
)
};
network_config.discv5_config.request_timeout =
Duration::from_secs(self.discv5_request_timeout_secs);
network_config.discv5_config.query_peer_timeout =
@ -74,38 +104,19 @@ impl ZgsConfig {
network_config.target_peers = self.network_target_peers;
network_config.private = self.network_private;
network_config.peer_db = self.network_peer_db;
network_config.peer_manager = self.network_peer_manager;
network_config.disable_enr_network_id = self.discv5_disable_enr_network_id;
Ok(network_config)
}
pub fn storage_config(&self) -> Result<StorageConfig, String> {
let mut log_config = LogConfig::default();
log_config.flow.merkle_node_cache_capacity = self.merkle_node_cache_capacity;
Ok(StorageConfig {
db_dir: self.db_dir.clone().into(),
})
}
pub fn rpc_config(&self) -> Result<RPCConfig, String> {
let listen_address = self
.rpc_listen_address
.parse::<std::net::SocketAddr>()
.map_err(|e| format!("Unable to parse rpc_listen_address: {:?}", e))?;
let listen_address_admin = if self.rpc_listen_address_admin.is_empty() {
None
} else {
Some(
self.rpc_listen_address_admin
.parse::<std::net::SocketAddr>()
.map_err(|e| format!("Unable to parse rpc_listen_address_admin: {:?}", e))?,
)
};
Ok(RPCConfig {
enabled: self.rpc_enabled,
listen_address,
listen_address_admin,
max_request_body_size: self.max_request_body_size,
chunks_per_segment: self.rpc_chunks_per_segment,
max_cache_file_size: self.rpc_max_cache_file_size,
log_config,
})
}
@ -134,6 +145,8 @@ impl ZgsConfig {
self.default_finalized_block_count,
self.remove_finalized_block_interval_minutes,
self.watch_loop_wait_time_ms,
self.force_log_sync_from_start_block_number,
Duration::from_secs(self.blockchain_rpc_timeout_secs),
))
}
@ -168,6 +181,7 @@ impl ZgsConfig {
let submission_gas = self.miner_submission_gas.map(U256::from);
let cpu_percentage = self.miner_cpu_percentage;
let iter_batch = self.mine_iter_batch_size;
let context_query_seconds = self.mine_context_query_seconds;
let shard_config = self.shard_config()?;
@ -180,7 +194,11 @@ impl ZgsConfig {
submission_gas,
cpu_percentage,
iter_batch,
context_query_seconds,
shard_config,
self.rate_limit_retries,
self.timeout_retries,
self.initial_backoff,
))
}
@ -197,26 +215,42 @@ impl ZgsConfig {
pub fn router_config(&self, network_config: &NetworkConfig) -> Result<router::Config, String> {
let mut router_config = self.router.clone();
router_config.libp2p_nodes = network_config.libp2p_nodes.to_vec();
if router_config.public_address.is_none() {
if let Some(addr) = &self.network_enr_address {
router_config.public_address = Some(addr.parse().unwrap());
}
}
Ok(router_config)
}
pub fn pruner_config(&self) -> Result<Option<PrunerConfig>, String> {
if let Some(max_num_chunks) = self.db_max_num_chunks {
if let Some(max_num_sectors) = self.db_max_num_sectors {
let shard_config = self.shard_config()?;
let reward_address = self
.reward_contract_address
.parse::<ContractAddress>()
.map_err(|e| format!("Unable to parse reward_contract_address: {:?}", e))?;
Ok(Some(PrunerConfig {
shard_config,
db_path: self.db_dir.clone().into(),
max_num_chunks,
max_num_sectors,
check_time: Duration::from_secs(self.prune_check_time_s),
batch_size: self.prune_batch_size,
batch_wait_time: Duration::from_millis(self.prune_batch_wait_time_ms),
rpc_endpoint_url: self.blockchain_rpc_endpoint.clone(),
reward_address,
rate_limit_retries: self.rate_limit_retries,
timeout_retries: self.timeout_retries,
initial_backoff: self.initial_backoff,
}))
} else {
Ok(None)
}
}
fn shard_config(&self) -> Result<ShardConfig, String> {
ShardConfig::new(&self.shard_position)
pub fn shard_config(&self) -> Result<ShardConfig, String> {
self.shard_position.clone().try_into()
}
}

View File

@ -28,12 +28,14 @@ build_config! {
(discv5_report_discovered_peers, (bool), false)
(discv5_disable_packet_filter, (bool), false)
(discv5_disable_ip_limit, (bool), false)
(discv5_disable_enr_network_id, (bool), false)
// log sync
(blockchain_rpc_endpoint, (String), "http://127.0.0.1:8545".to_string())
(log_contract_address, (String), "".to_string())
(log_sync_start_block_number, (u64), 0)
(confirmation_block_count, (u64), 6)
(force_log_sync_from_start_block_number, (bool), false)
(confirmation_block_count, (u64), 3)
(log_page_size, (u64), 999)
(max_cache_data_size, (usize), 100 * 1024 * 1024) // 100 MB
(cache_tx_seq_ttl, (usize), 500)
@ -47,13 +49,7 @@ build_config! {
(remove_finalized_block_interval_minutes, (u64), 30)
(watch_loop_wait_time_ms, (u64), 500)
// rpc
(rpc_enabled, (bool), true)
(rpc_listen_address, (String), "0.0.0.0:5678".to_string())
(rpc_listen_address_admin, (String), "127.0.0.1:5679".to_string())
(max_request_body_size, (u32), 100*1024*1024) // 100MB
(rpc_chunks_per_segment, (usize), 1024)
(rpc_max_cache_file_size, (usize), 10*1024*1024) //10MB
(blockchain_rpc_timeout_secs, (u64), 120)
// chunk pool
(chunk_pool_write_window_size, (usize), 4)
@ -63,10 +59,11 @@ build_config! {
// db
(db_dir, (String), "db".to_string())
(db_max_num_chunks, (Option<usize>), None)
(db_max_num_sectors, (Option<usize>), None)
(prune_check_time_s, (u64), 60)
(prune_batch_size, (usize), 1024)
(prune_batch_size, (usize), 16 * 1024)
(prune_batch_wait_time_ms, (u64), 1000)
(merkle_node_cache_capacity, (usize), 32 * 1024 * 1024)
// misc
(log_config_file, (String), "log_config".to_string())
@ -79,7 +76,10 @@ build_config! {
(miner_submission_gas, (Option<u64>), None)
(miner_cpu_percentage, (u64), 100)
(mine_iter_batch_size, (usize), 100)
(reward_contract_address, (String), "".to_string())
(shard_position, (Option<String>), None)
(mine_context_query_seconds, (u64), 5)
}
#[derive(Debug, Default, Deserialize)]
@ -87,6 +87,12 @@ build_config! {
pub struct ZgsConfig {
pub raw_conf: RawConfiguration,
/// Network peer db config, configured by [network_peer_db] section by `config` crate.
pub network_peer_db: network::peer_manager::peerdb::PeerDBConfig,
/// Network peer manager config, configured by [network_peer_manager] section by `config` crate.
pub network_peer_manager: network::peer_manager::config::Config,
// router config, configured by [router] section by `config` crate.
pub router: router::Config,
@ -95,6 +101,12 @@ pub struct ZgsConfig {
// file location cache config, configured by [file_location_cache] section by `config` crate.
pub file_location_cache: file_location_cache::Config,
// rpc config, configured by [rpc] section by `config` crate.
pub rpc: rpc::RPCConfig,
// metrics config, configured by [metrics] section by `config` crate.
pub metrics: metrics::MetricsConfiguration,
}
impl Deref for ZgsConfig {

View File

@ -20,10 +20,13 @@ pub fn configure(log_level_file: &str, log_directory: &str, executor: TaskExecut
let handle = builder.reload_handle();
builder.init();
let level_file = log_level_file.to_string();
let level_file = log_level_file.trim_end().to_string();
// load config synchronously
let mut config = std::fs::read_to_string(&level_file).unwrap_or_default();
let mut config = std::fs::read_to_string(&level_file)
.unwrap_or_default()
.trim_end()
.to_string();
let _ = handle.reload(&config);
// periodically check for config changes
@ -38,8 +41,14 @@ pub fn configure(log_level_file: &str, log_directory: &str, executor: TaskExecut
interval.tick().await;
let new_config = match tokio::fs::read_to_string(&level_file).await {
Ok(c) if c == config => continue,
Ok(c) => c,
Ok(c) => {
let nc = c.trim_end().to_string();
if nc == config {
continue;
} else {
nc
}
}
Err(e) => {
println!("Unable to read log file {}: {:?}", level_file, e);
continue;

View File

@ -13,11 +13,11 @@ use std::error::Error;
async fn start_node(context: RuntimeContext, config: ZgsConfig) -> Result<Client, String> {
let network_config = config.network_config().await?;
let storage_config = config.storage_config()?;
let rpc_config = config.rpc_config()?;
let log_sync_config = config.log_sync_config()?;
let miner_config = config.mine_config()?;
let router_config = config.router_config(&network_config)?;
let pruner_config = config.pruner_config()?;
let shard_config = config.shard_config()?;
ClientBuilder::default()
.with_runtime_context(context)
@ -31,9 +31,11 @@ async fn start_node(context: RuntimeContext, config: ZgsConfig) -> Result<Client
.await?
.with_miner(miner_config)
.await?
.with_shard(shard_config)
.await?
.with_pruner(pruner_config)
.await?
.with_rpc(rpc_config, config.chunk_pool_config()?)
.with_rpc(config.rpc, config.chunk_pool_config()?)
.await?
.with_router(router_config)?
.build()
@ -60,6 +62,7 @@ fn main() -> Result<(), Box<dyn Error>> {
// CLI, config, and logs
let matches = cli::cli_app().get_matches();
let config = ZgsConfig::parse(&matches)?;
metrics::initialize(config.metrics.clone());
log::configure(
&config.log_config_file,
&config.log_directory,

View File

@ -10,4 +10,5 @@ storage = { path = "../storage" }
task_executor = { path = "../../common/task_executor" }
tokio = { version = "1.19.2", features = ["sync"] }
tracing = "0.1.35"
eth2_ssz = "0.4.0"
eth2_ssz = "0.4.0"
backtrace = "0.3"

Some files were not shown because too many files have changed in this diff Show More