diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..bdaab28 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,39 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..14a4e65 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,40 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest diff --git a/.gitignore b/.gitignore index 94845ce..55495a6 100644 --- a/.gitignore +++ b/.gitignore @@ -59,3 +59,6 @@ target/ # PyCharm .idea/ + +.vscode/ +.venv/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d7b170f..0000000 --- a/.travis.yml +++ /dev/null @@ -1,25 +0,0 @@ -sudo: required - -language: - - python - -python: - - "3.3" - - "3.4" - - "3.5" - - "3.6" - -before_install: - - ./travis.sh - -install: - - pip install -r requirements.txt - - pip install coveralls - -script: - - coverage run --append --include='blockchain_parser/*' --omit='*/tests/*' setup.py test - -after_success: - - if [[ $TRAVIS_PYTHON_VERSION == '3.6' ]]; then - coveralls; - fi diff --git a/README.md b/README.md index c57701b..ff333df 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# bitcoin-blockchain-parser [![Build Status](https://travis-ci.org/alecalve/python-bitcoin-blockchain-parser.svg?branch=master)](https://travis-ci.org/alecalve/python-bitcoin-blockchain-parser) [![Coverage Status](https://coveralls.io/repos/alecalve/python-bitcoin-blockchain-parser/badge.svg?branch=master&service=github)](https://coveralls.io/github/alecalve/python-bitcoin-blockchain-parser?branch=master) -This Python 3 library provides a parser for the raw data stored by bitcoind. +# bitcoin-blockchain-parser ![Build Status](https://github.com/alecalve/python-bitcoin-blockchain-parser/actions/workflows/python-package.yml/badge.svg) [![Coverage Status](https://coveralls.io/repos/alecalve/python-bitcoin-blockchain-parser/badge.svg?branch=master&service=github)](https://coveralls.io/github/alecalve/python-bitcoin-blockchain-parser?branch=master) +This Python 3 library provides a parser for the raw data stored by bitcoind. ## Features - Detects outputs types @@ -8,6 +8,52 @@ This Python 3 library provides a parser for the raw data stored by bitcoind. - Supports SegWit - Supports ordered block parsing +## Installing + +Whether installing using Pip or from source, plyvel requires leveldb development libraries for LevelDB >1.2.X. + +On Linux, install libleveldb-dev + +``` +sudo apt-get install libleveldb-dev +``` + +### Using pip + +``` +pip install blockchain-parser +``` + +### Using source + +Requirements : python-bitcoinlib, plyvel, coverage for tests + + +Install dependencies contained in `requirements.txt`: +``` +pip install -r requirements.txt +``` + +Then, just run +``` +python setup.py install +``` + +## Developing + +First, setup a virtualenv and install dependencies: + +``` +virtualenv -p python3 .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +Run the test suite by lauching +``` +./tests.sh +``` + ## Examples Below are two basic examples for parsing the blockchain. More examples are available in the examples directory. @@ -17,10 +63,10 @@ Below are two basic examples for parsing the blockchain. More examples are avail This blockchain parser parses raw blocks saved in Bitcoin Core's `.blk` file format. Bitcoin Core does not guarantee that these blocks are saved in order. If your application does not require that blocks are parsed in order, the `Blockchain.get_unordered_blocks(...)` method can be used: ```python -import os +import os from blockchain_parser.blockchain import Blockchain -# Instantiate the Blockchain by giving the path to the directory +# Instantiate the Blockchain by giving the path to the directory # containing the .blk files created by bitcoind blockchain = Blockchain(os.path.expanduser('~/.bitcoin/blocks')) for block in blockchain.get_unordered_blocks(): @@ -34,12 +80,13 @@ for block in blockchain.get_unordered_blocks(): If maintaining block order is necessary for your application, you should use the `Blockchain.get_ordered_blocks(...)` method. This method uses Bitcoin Core's LevelDB index to locate ordered block data in it's `.blk` files. ```python -import os +import os from blockchain_parser.blockchain import Blockchain # To get the blocks ordered by height, you need to provide the path of the # `index` directory (LevelDB index) being maintained by bitcoind. It contains # .ldb files and is present inside the `blocks` directory. +blockchain = Blockchain(os.path.expanduser('~/.bitcoin/blocks')) for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), end=1000): print("height=%d block=%s" % (block.height, block.hash)) ``` @@ -51,7 +98,7 @@ for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks print("height=%d block=%s" % (block.height, block.hash)) ``` -Building the LevelDB index can take a while which can make iterative development and debugging challenging. For this reason, `Blockchain.get_ordered_blocks(...)` supports caching the LevelDB index database using [pickle](https://docs.python.org/3.6/library/pickle.html). To use a cache simply pass `cache=filename` to the ordered blocks method. If the cached file does not exist it will be created for faster parsing the next time the method is run. If the cached file already exists it will be used instead of re-parsing the LevelDB database. +Building the LevelDB index can take a while which can make iterative development and debugging challenging. For this reason, `Blockchain.get_ordered_blocks(...)` supports caching the LevelDB index database using [pickle](https://docs.python.org/3.6/library/pickle.html). To use a cache simply pass `cache=filename` to the ordered blocks method. If the cached file does not exist it will be created for faster parsing the next time the method is run. If the cached file already exists it will be used instead of re-parsing the LevelDB database. ```python for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), cache='index-cache.pickle'): @@ -60,29 +107,3 @@ for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks **NOTE**: You must manually/programmatically delete the cache file in order to rebuild the cache. Don't forget to do this each time you would like to re-parse the blockchain with a higher block height than the first time you saved the cache file as the new blocks will not be included in the cache. -## Installing - -Requirements : python-bitcoinlib, plyvel, coverage for tests - -plyvel requires leveldb development libraries for LevelDB >1.2.X - -On Linux, install libleveldb-dev - -``` -sudo apt-get install libleveldb-dev -``` - -Then, just run -``` -python setup.py install -``` - -## Tests - -Run the test suite by lauching -``` -./tests.sh -``` - - - diff --git a/blockchain_parser/__init__.py b/blockchain_parser/__init__.py index 4f76129..75da190 100644 --- a/blockchain_parser/__init__.py +++ b/blockchain_parser/__init__.py @@ -9,4 +9,4 @@ # modified, propagated, or distributed except according to the terms contained # in the LICENSE file. -__version__ = "0.1.5" +__version__ = "0.1.6" diff --git a/blockchain_parser/block.py b/blockchain_parser/block.py index f00db41..15caf22 100644 --- a/blockchain_parser/block.py +++ b/blockchain_parser/block.py @@ -46,7 +46,7 @@ class Block(object): Represents a Bitcoin block, contains its header and its transactions. """ - def __init__(self, raw_hex, height=None): + def __init__(self, raw_hex, height=None, blk_file=None): self.hex = raw_hex self._hash = None self._transactions = None @@ -54,6 +54,7 @@ def __init__(self, raw_hex, height=None): self._n_transactions = None self.size = len(raw_hex) self.height = height + self.blk_file = blk_file def __repr__(self): return "Block(%s)" % self.hash diff --git a/blockchain_parser/blockchain.py b/blockchain_parser/blockchain.py index 8a29bd1..55d8b41 100644 --- a/blockchain_parser/blockchain.py +++ b/blockchain_parser/blockchain.py @@ -16,9 +16,15 @@ import stat import plyvel +from blockchain_parser.transaction import Transaction +from blockchain_parser.index import DBTransactionIndex +from blockchain_parser import utils +from binascii import unhexlify +from binascii import hexlify from .block import Block from .index import DBBlockIndex from .utils import format_hash +from .block_header import BlockHeader # Constant separating blocks in the .blk files @@ -89,7 +95,7 @@ def get_unordered_blocks(self): """ for blk_file in get_files(self.path): for raw_block in get_blocks(blk_file): - yield Block(raw_block) + yield Block(raw_block, None, os.path.split(blk_file)[1]) def __getBlockIndexes(self, index): """There is no method of leveldb to close the db (and release the lock). @@ -146,7 +152,6 @@ def _index_confirmed(self, chain_indexes, num_confirmations=6): if len(chain) == num_confirmations: return first_block.hash in chain - def get_ordered_blocks(self, index, start=0, end=None, cache=None): """Yields the blocks contained in the .blk files as per the heigt extract from the leveldb index present at path @@ -168,8 +173,8 @@ def get_ordered_blocks(self, index, start=0, end=None, cache=None): with open(cache, 'wb') as f: pickle.dump(blockIndexes, f) - # remove small forks that may have occured while the node was live. - # Occassionally a node will receive two different solutions to a block + # remove small forks that may have occurred while the node was live. + # Occasionally a node will receive two different solutions to a block # at the same time. The Leveldb index saves both, not pruning the # block that leads to a shorter chain once the fork is settled without # "-reindex"ing the bitcoind block data. This leads to at least two @@ -217,3 +222,38 @@ def get_ordered_blocks(self, index, start=0, end=None, cache=None): break blkFile = os.path.join(self.path, "blk%05d.dat" % blkIdx.file) yield Block(get_block(blkFile, blkIdx.data_pos), blkIdx.height) + + def get_transaction(self, txid, db): + """Yields the transaction contained in the .blk files as a python + object, similar to + https://developer.bitcoin.org/reference/rpc/getrawtransaction.html + """ + + byte_arr = bytearray.fromhex(txid) + byte_arr.reverse() + tx_hash = hexlify(b't').decode('utf-8') + \ + hexlify(byte_arr).decode('utf-8') + + tx_hash_fmtd = unhexlify(tx_hash) + raw_hex = db.get(tx_hash_fmtd) + + tx_idx = DBTransactionIndex(utils.format_hash(tx_hash_fmtd), raw_hex) + blk_file = os.path.join(self.path, "blk%05d.dat" % tx_idx.blockfile_no) + raw_hex = get_block(blk_file, tx_idx.file_offset) + + offset = tx_idx.block_offset + + transaction_data = raw_hex[80:] + block_header_data = raw_hex[:80] + # Try from 1024 (1KiB) -> 1073741824 (1GiB) slice widths + for j in range(0, 20): + try: + block_header = BlockHeader.from_hex(block_header_data) + offset_e = offset + (1024 * 2 ** j) + transaction = Transaction.from_hex( + transaction_data[offset:offset_e]) + return [block_header, transaction] + except Exception: + continue + + return None diff --git a/blockchain_parser/index.py b/blockchain_parser/index.py index 8334500..aaaedc0 100644 --- a/blockchain_parser/index.py +++ b/blockchain_parser/index.py @@ -51,7 +51,7 @@ def __init__(self, blk_hash, raw_hex): self.undo_pos, i = _read_varint(raw_hex[pos:]) pos += i - assert(pos + 80 == len(raw_hex)) + assert (pos + 80 == len(raw_hex)) self.version, p, m, time, bits, self.nonce = unpack( "