Skip to content

Commit

Permalink
Merge pull request #58 from 0x26res/add-benchmark
Browse files Browse the repository at this point in the history
Add benchmark
  • Loading branch information
0x26res authored Oct 15, 2024
2 parents 03d4d1e + 8db608e commit 58a498a
Show file tree
Hide file tree
Showing 15 changed files with 224 additions and 189 deletions.
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,9 @@ repos:
rev: 1.8.0
hooks:
- id: poetry-check
- repo: https://github.com/kynan/nbstripout
rev: 0.7.1
hooks:
- id: nbstripout
args:
- "--drop-empty-cells"
24 changes: 12 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 14 additions & 8 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,23 @@ Note: the ci script is autogenerated:
maturin generate-ci github > .github/workflows/release.yml
```

## Benchmarking

Make sure to install the release version of ptars.
The locally built version is much slower.

```shell
pytest python/test/benchmark --benchmark-name=short --benchmark-columns=mean --benchmark-sort=name
```

## TODO

- [ ] arrow to proto
- [ ] repeated messages
- [ ] more generic
- [ ] finish arrow to proto
- [ ] handle repeated messages
- [ ] more generic code
- [ ] add rust unit tests
- [ ] publish package
- [ ] add configuration for enums
- [ ] maps
- [ ] timestamp, date, wrapped types, duration
- [ ] reuse protarrow tests
- [ ] add configuration for enums, timestamp, date, wrapped types, duration
- [ ] reuse protarrow tests and random generator

## Resources

Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ all:
env:
test -d env || python3 -m venv env
. env/bin/activate && \
python -m pip install maturin pytest pyarrow googleapis-common-protos protobuf grpcio-tools
python -m pip install maturin pytest pyarrow googleapis-common-protos protobuf grpcio-tools protarrow


.PHONY: develop
Expand All @@ -16,7 +16,7 @@ develop: env

.PHONY: test
test: develop
. env/bin/activate && RUST_BACKTRACE=1 python -m pytest python/test/ && cargo test
. env/bin/activate && RUST_BACKTRACE=1 python -m pytest python/test/unit && cargo test

.PHONY: build
build: env
Expand Down
33 changes: 32 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# ptars

[![Ruff][ruff-image]][ruff-url]
[![PyPI Version][pypi-image]][pypi-url]
[![Python Version][versions-image]][versions-url]
[![Github Stars][stars-image]][stars-url]
Expand All @@ -10,6 +11,7 @@
[![Downloads][downloads-month-image]][downloads-month-url]
[![Code style: black][codestyle-image]][codestyle-url]
[![snyk][snyk-image]][snyk-url]
![Size][repo-size-url]

Protobuf to Arrow, using Rust

Expand Down Expand Up @@ -64,6 +66,32 @@ messages_back: list[SearchRequest] = [
]
```

## Benchmark against protarrow

[Ptars](https://github.com/0x26res/ptars) is a rust implementation of
[protarrow](https://github.com/tradewelltech/protarrow),
which is implemented in plain python.
It is:

- marginally faster when converting from proto to arrow.
- About 3 times faster when converting from arrow to proto.

```benchmark
---- benchmark 'to_arrow': 2 tests ----
Name (time in ms) Mean
---------------------------------------
protarrow_to_arrow 8.6582 (1.18)
ptars_to_arrow 7.3336 (1.0)
---------------------------------------
---- benchmark 'to_proto': 2 tests -----
Name (time in ms) Mean
----------------------------------------
ptars_to_proto 6.4088 (1.0)
protarrow_to_proto 21.5594 (3.36)
----------------------------------------
```

[pypi-image]: https://img.shields.io/pypi/v/ptars
[pypi-url]: https://pypi.org/project/ptars/
[build-image]: https://github.com/0x26res/ptars/actions/workflows/ci.yaml/badge.svg
Expand All @@ -81,6 +109,9 @@ messages_back: list[SearchRequest] = [
[downloads-month-image]: https://pepy.tech/badge/ptars/month
[downloads-month-url]: https://static.pepy.tech/badge/ptars/month
[codestyle-image]: https://img.shields.io/badge/code%20style-black-000000.svg
[codestyle-url]: https://github.com/ambv/black
[codestyle-url]: https://github.com/astral-sh/ruff
[snyk-image]: https://snyk.io/advisor/python/ptars/badge.svg
[snyk-url]: https://snyk.io/advisor/python/ptars
[ruff-image]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json
[ruff-url]: https://github.com/astral-sh/ruff
[repo-size-url]: https://img.shields.io/github/repo-size/0x26res/ptars
102 changes: 40 additions & 62 deletions notebooks/demonstrator.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,45 @@
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "3e7e8195-a6da-4da0-bbdb-dfe5ae752e3e",
"execution_count": null,
"id": "0",
"metadata": {},
"outputs": [],
"source": [
"%cd ../"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"import secrets\n",
"\n",
"import protarrow\n",
"from demonstartor_pb2 import SearchRequest\n",
"\n",
"import ptars"
"import ptars\n",
"from ptars_protos.benchmark_pb2 import BenchmarkMessage"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "83c97629-5553-4760-b2f8-3c3b911d11bb",
"execution_count": null,
"id": "2",
"metadata": {},
"outputs": [],
"source": [
"pool = ptars.HandlerPool()\n",
"handler = pool.get_for_message(SearchRequest.DESCRIPTOR)"
"handler = pool.get_for_message(BenchmarkMessage.DESCRIPTOR)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "34757cb6-9882-47d5-8689-68593f98415f",
"execution_count": null,
"id": "3",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -42,13 +52,13 @@
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a2192b04-19ee-4ed4-8937-c49b5cdbc6c0",
"execution_count": null,
"id": "4",
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" SearchRequest(\n",
" BenchmarkMessage(\n",
" query=secrets.token_urlsafe(random.randint(0, STRING_SIZE)),\n",
" page_number=random.randint(MIN_INT, MAX_INT),\n",
" result_per_page=random.randint(MIN_INT, MAX_INT),\n",
Expand All @@ -61,49 +71,33 @@
},
{
"cell_type": "code",
"execution_count": 14,
"id": "7d200de8-f1d1-49bb-a7a6-2488a7c07c41",
"execution_count": null,
"id": "5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7.71 ms ± 148 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"outputs": [],
"source": [
"%%timeit\n",
"protarrow.messages_to_record_batch(\n",
" [SearchRequest.FromString(p) for p in payloads],\n",
" SearchRequest,\n",
" [BenchmarkMessage.FromString(p) for p in payloads],\n",
" BenchmarkMessage,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "5f986916-ff38-448d-a12a-2eca5e04254a",
"execution_count": null,
"id": "6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7.02 ms ± 20.7 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"outputs": [],
"source": [
"%%timeit\n",
"handler.list_to_record_batch(payloads)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "c7dfa27d-c24a-4d34-90df-717dfd3bcc6d",
"execution_count": null,
"id": "7",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -112,42 +106,26 @@
},
{
"cell_type": "code",
"execution_count": 17,
"id": "b881d056-0dc6-4790-8a91-a7d3ec5061ac",
"execution_count": null,
"id": "8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6.5 ms ± 22.7 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"outputs": [],
"source": [
"%%timeit\n",
"handler.record_batch_to_array(record_batch)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "fc0d9019-3f4e-4df4-a083-47fb35807e8b",
"execution_count": null,
"id": "9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"19.7 ms ± 125 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"outputs": [],
"source": [
"%%timeit\n",
"[\n",
" m.SerializeToString()\n",
" for m in protarrow.record_batch_to_messages(record_batch, SearchRequest)\n",
" for m in protarrow.record_batch_to_messages(record_batch, BenchmarkMessage)\n",
"]"
]
}
Expand All @@ -168,7 +146,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
"version": "3.11.6"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 58a498a

Please sign in to comment.