From 0d33c9b606297003483c4f08a147482c27f2f4f4 Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Mon, 2 May 2022 23:15:48 +0200 Subject: [PATCH 01/10] refactor worker --- worker.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/worker.go b/worker.go index 80feb52..6ead7fb 100644 --- a/worker.go +++ b/worker.go @@ -8,12 +8,6 @@ import ( "time" ) -type operation[Resource any] struct { - resourceKey string - run func(Resource) - result chan error -} - type worker[Resource any] struct { goRoutineNumber int incomingOperations <-chan operation[Resource] @@ -100,6 +94,12 @@ func (w *worker[Resource]) runOperation(_operation operation[Resource]) { _operation.run(_batch.resource) } +type operation[Resource any] struct { + resourceKey string + run func(Resource) + result chan error +} + type batch[Resource any] struct { ctx context.Context key string From de61dc91b23aa3bf43b78df2063ebf3397d49ed8 Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Tue, 3 May 2022 09:45:33 +0200 Subject: [PATCH 02/10] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 08aefb4..feb6028 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ ## What it can be used for? -To speed up application performance **without** sacrificing *data consistency* or *data durability* or making source code/architecture complex. +To speed up application performance **without** sacrificing *data consistency* and *data durability* or making source code/architecture complex. The **batch** package simplifies writing Go applications that process incoming requests (HTTP, GRPC etc.) in a batch manner: -instead of processing each request separately, group incoming requests to a batch and run whole group at once. +instead of processing each request separately, they group incoming requests to a batch and run whole group at once. This method of processing can significantly speed up the application and reduce the consumption of disk, network or CPU. The **batch** package can be used to write any type of *servers* that handle thousands of requests per second. From 30d9b4d8d958bf252aefdde24bc2f4cf63bd9722 Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Tue, 3 May 2022 10:39:45 +0200 Subject: [PATCH 03/10] Add benchmark --- batch_bench_test.go | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 batch_bench_test.go diff --git a/batch_bench_test.go b/batch_bench_test.go new file mode 100644 index 0000000..f4e68bc --- /dev/null +++ b/batch_bench_test.go @@ -0,0 +1,47 @@ +package batch_test + +import ( + "strconv" + "sync" + "testing" + + "github.com/elgopher/batch" + "github.com/stretchr/testify/require" +) + +func BenchmarkProcessor_Run(b *testing.B) { + resources := []int{ + 1, 8, 64, 512, 4096, 32768, 262144, 2097152, + } + + for _, resourceCount := range resources { + b.Run(strconv.Itoa(resourceCount), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + processor := batch.StartProcessor(batch.Options[empty]{}) + defer processor.Stop() + + var allOperationsFinished sync.WaitGroup + allOperationsFinished.Add(b.N) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + key := strconv.Itoa(i % resourceCount) + go func() { + // when + err := processor.Run(key, operation) + require.NoError(b, err) + allOperationsFinished.Done() + }() + } + + b.StopTimer() + + allOperationsFinished.Wait() + }) + } +} + +func operation(empty) {} From 44de6ca04e8a769f58105d3ff50dcaf28cb3a375 Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Tue, 3 May 2022 13:07:18 +0200 Subject: [PATCH 04/10] Use single deadline context.Context deadline might be slightly out of sync with batch deadline. --- worker.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/worker.go b/worker.go index 6ead7fb..00e013e 100644 --- a/worker.go +++ b/worker.go @@ -69,9 +69,10 @@ func (w *worker[Resource]) endAllBatches() { func (w *worker[Resource]) runOperation(_operation operation[Resource]) { _batch, found := w.batchByResourceKey[_operation.resourceKey] if !found { - ctx, _ := context.WithTimeout(context.Background(), w.maxDuration) - now := time.Now() + deadline := now.Add(w.minDuration) + + ctx, _ := context.WithDeadline(context.Background(), deadline) resource, err := w.loadResource(ctx, _operation.resourceKey) if err != nil { @@ -83,7 +84,7 @@ func (w *worker[Resource]) runOperation(_operation operation[Resource]) { ctx: ctx, key: _operation.resourceKey, resource: resource, - deadline: now.Add(w.minDuration), + deadline: deadline, } w.batchByResourceKey[_operation.resourceKey] = _batch w.batchByDeadline = append(w.batchByDeadline, _batch) From 93ba355bd510f2f8372f83dd6cae449a30d1ae27 Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Tue, 3 May 2022 13:45:03 +0200 Subject: [PATCH 05/10] Do not retain context.Context in batch And always run cancel() --- worker.go | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/worker.go b/worker.go index 00e013e..d85b16b 100644 --- a/worker.go +++ b/worker.go @@ -49,17 +49,25 @@ func (w *worker[Resource]) endBatchesAfterDeadline() { return } - err := w.saveResource(_batch.ctx, _batch.key, _batch.resource) + ctx, cancel := context.WithDeadline(context.Background(), _batch.deadline) + + err := w.saveResource(ctx, _batch.key, _batch.resource) _batch.publishResult(err) delete(w.batchByResourceKey, _batch.key) w.batchByDeadline = w.batchByDeadline[1:] + + cancel() } } func (w *worker[Resource]) endAllBatches() { for key, _batch := range w.batchByResourceKey { - err := w.saveResource(_batch.ctx, key, _batch.resource) + ctx, cancel := context.WithDeadline(context.Background(), _batch.deadline) + + err := w.saveResource(ctx, key, _batch.resource) _batch.publishResult(err) + + cancel() } w.batchByResourceKey = map[string]*batch[Resource]{} @@ -72,7 +80,8 @@ func (w *worker[Resource]) runOperation(_operation operation[Resource]) { now := time.Now() deadline := now.Add(w.minDuration) - ctx, _ := context.WithDeadline(context.Background(), deadline) + ctx, cancel := context.WithDeadline(context.Background(), deadline) + defer cancel() resource, err := w.loadResource(ctx, _operation.resourceKey) if err != nil { @@ -81,11 +90,11 @@ func (w *worker[Resource]) runOperation(_operation operation[Resource]) { } _batch = &batch[Resource]{ - ctx: ctx, key: _operation.resourceKey, resource: resource, deadline: deadline, } + w.batchByResourceKey[_operation.resourceKey] = _batch w.batchByDeadline = append(w.batchByDeadline, _batch) } @@ -102,7 +111,6 @@ type operation[Resource any] struct { } type batch[Resource any] struct { - ctx context.Context key string resource Resource results []chan error From 6fd8dfdb819b9715e71038ff48d7316f0e84e284 Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Tue, 3 May 2022 14:06:47 +0200 Subject: [PATCH 06/10] [refactor] Simplify worker code --- worker.go | 80 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/worker.go b/worker.go index d85b16b..9918239 100644 --- a/worker.go +++ b/worker.go @@ -49,25 +49,23 @@ func (w *worker[Resource]) endBatchesAfterDeadline() { return } - ctx, cancel := context.WithDeadline(context.Background(), _batch.deadline) + w.endBatch(_batch) - err := w.saveResource(ctx, _batch.key, _batch.resource) - _batch.publishResult(err) - delete(w.batchByResourceKey, _batch.key) + delete(w.batchByResourceKey, _batch.resourceKey) w.batchByDeadline = w.batchByDeadline[1:] - - cancel() } } -func (w *worker[Resource]) endAllBatches() { - for key, _batch := range w.batchByResourceKey { - ctx, cancel := context.WithDeadline(context.Background(), _batch.deadline) - - err := w.saveResource(ctx, key, _batch.resource) - _batch.publishResult(err) +func (w *worker[Resource]) endBatch(_batch *batch[Resource]) { + ctx, cancel := context.WithDeadline(context.Background(), _batch.deadline) + err := w.saveResource(ctx, _batch.resourceKey, _batch.resource) + _batch.publishResult(err) + cancel() +} - cancel() +func (w *worker[Resource]) endAllBatches() { + for _, _batch := range w.batchByDeadline { + w.endBatch(_batch) } w.batchByResourceKey = map[string]*batch[Resource]{} @@ -77,33 +75,45 @@ func (w *worker[Resource]) endAllBatches() { func (w *worker[Resource]) runOperation(_operation operation[Resource]) { _batch, found := w.batchByResourceKey[_operation.resourceKey] if !found { - now := time.Now() - deadline := now.Add(w.minDuration) - - ctx, cancel := context.WithDeadline(context.Background(), deadline) - defer cancel() - - resource, err := w.loadResource(ctx, _operation.resourceKey) + var err error + _batch, err = w.newBatch(_operation.resourceKey) if err != nil { _operation.result <- err return } - _batch = &batch[Resource]{ - key: _operation.resourceKey, - resource: resource, - deadline: deadline, - } - - w.batchByResourceKey[_operation.resourceKey] = _batch - w.batchByDeadline = append(w.batchByDeadline, _batch) + w.addBatch(_batch) } - _batch.results = append(_batch.results, _operation.result) + _batch.operationResults = append(_batch.operationResults, _operation.result) _operation.run(_batch.resource) } +func (w *worker[Resource]) newBatch(resourceKey string) (*batch[Resource], error) { + now := time.Now() + deadline := now.Add(w.minDuration) + + ctx, cancel := context.WithDeadline(context.Background(), deadline) + defer cancel() + + resource, err := w.loadResource(ctx, resourceKey) + if err != nil { + return nil, err + } + + return &batch[Resource]{ + resourceKey: resourceKey, + resource: resource, + deadline: deadline, + }, nil +} + +func (w *worker[Resource]) addBatch(b *batch[Resource]) { + w.batchByResourceKey[b.resourceKey] = b + w.batchByDeadline = append(w.batchByDeadline, b) +} + type operation[Resource any] struct { resourceKey string run func(Resource) @@ -111,14 +121,14 @@ type operation[Resource any] struct { } type batch[Resource any] struct { - key string - resource Resource - results []chan error - deadline time.Time + resourceKey string + resource Resource + operationResults []chan error + deadline time.Time } func (b *batch[Resource]) publishResult(result error) { - for _, c := range b.results { - c <- result + for _, r := range b.operationResults { + r <- result } } From 05a50f650f98a6d5467ff5e50588a61d5cfbe1a4 Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Tue, 3 May 2022 16:12:24 +0200 Subject: [PATCH 07/10] Update README.md --- README.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index feb6028..f8cb3cf 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,8 @@ Thanks to this small library, you can create relatively simple code without the Normally a web application is using following pattern to modify data in the database: 1. **Load resource** from database. Resource is some portion of data -such as record, document etc. Lock the entire resource pessimistically -or optimistically (by reading version number). +such as set of records from relational database, document from Document-oriented database or value from KV store. +Lock the entire resource pessimistically or optimistically (by reading version number). 2. **Apply change** to data 3. **Save resource** to database. Release the pessimistic lock. Or run atomic update with version check (optimistic lock). @@ -37,7 +37,7 @@ Because a single resource is loaded and saved thousands of times per second we can instead: 1. Load the resource **once** (let's say once per second) -2. Execute all the requests from this period of time on an already loaded resource. Run them all sequentially. +2. Execute all the requests from this period of time on an already loaded resource. Run them all sequentially to keep things simple and data consistent. 3. Save the resource and send responses to all clients if data was stored successfully. Such solution could improve the performance by a factor of 1000. And resource is still stored in a consistent state. @@ -51,14 +51,15 @@ processor := batch.StartProcessor( batch.Options[*YourResource]{ // YourResource is your own Go struct MinDuration: 100 * time.Millisecond, LoadResource: func(ctx context.Context, resourceKey string) (*YourResource, error){ - // resourceKey uniquely identifies the resource - ... + // resourceKey uniquely identifies the resource + ... }, SaveResource: ..., }, ) -// Following code is run from http/grpc handler: +// And use the processor inside http/grpc handler or technology-agnostic service. +// ResourceKey can be taken from request parameter. err := s.BatchProcessor.Run(resourceKey, func(r *YourResource) { // Here you put the code which will executed sequentially inside batch }) @@ -72,18 +73,19 @@ For real-life example see [example web application](_example). # Add batch to your Go module: go get github.com/elgopher/batch ``` -Please note that at least **Go 1.18** is required. +Please note that at least **Go 1.18** is required. The package is using generics, which was added in 1.18. ## Scaling out -Single Go http server is able to handle up to tens of thousands of requests per second on a commodity hardware. This is a lot, but very often you also need: +Single Go http server is able to handle up to tens of thousands of requests per second on a commodity hardware. +This is a lot, but very often you also need: * high availability (if one server goes down you want other to handle the traffic) -* you want to handle hundred thousands or millions of requests per second +* you want to handle hundred-thousands or millions of requests per second For both cases you need to deploy **multiple servers** and put a **load balancer** in front of them. Please note though, that you have to carefully configure the load balancing algorithm. -Round-robin is not an option here, because sooner or later you will have problems with locking +_Round-robin_ is not an option here, because sooner or later you will have problems with locking (multiple server instances will run batches on the same resource). Ideal solution is to route requests based on parameters or URL. For example some http parameter could be a resource key. You can instruct load balancer From c813a0ccfbf569cd4ee6bc319445a8ea54246f18 Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Tue, 3 May 2022 17:25:02 +0200 Subject: [PATCH 08/10] Add logo --- README.md | 1 + logo.svg | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 logo.svg diff --git a/README.md b/README.md index f8cb3cf..425da4a 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![Go Report Card](https://goreportcard.com/badge/github.com/elgopher/batch)](https://goreportcard.com/report/github.com/elgopher/batch) [![codecov](https://codecov.io/gh/elgopher/batch/branch/master/graph/badge.svg)](https://codecov.io/gh/elgopher/batch) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) + ## What it can be used for? diff --git a/logo.svg b/logo.svg new file mode 100644 index 0000000..db3adf1 --- /dev/null +++ b/logo.svg @@ -0,0 +1,14 @@ + + + + + Codestin Search App + + \ No newline at end of file From 41694dc2092ce604e71eb37b3b68bd27ae3dc67a Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Tue, 3 May 2022 18:01:08 +0200 Subject: [PATCH 09/10] Change logo color --- logo.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logo.svg b/logo.svg index db3adf1..58d4cd5 100644 --- a/logo.svg +++ b/logo.svg @@ -4,7 +4,7 @@ From 0b2e5308f0290e9f03b9330ab743793ea79e307e Mon Sep 17 00:00:00 2001 From: Jacek Olszak Date: Tue, 3 May 2022 18:56:19 +0200 Subject: [PATCH 10/10] Fix example in README.md Example is using missing variable `s`. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 425da4a..481b82f 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ processor := batch.StartProcessor( // And use the processor inside http/grpc handler or technology-agnostic service. // ResourceKey can be taken from request parameter. -err := s.BatchProcessor.Run(resourceKey, func(r *YourResource) { +err := processor.Run(resourceKey, func(r *YourResource) { // Here you put the code which will executed sequentially inside batch }) ```