From f9dac0742374940f88100eb47838e902e3b51eb8 Mon Sep 17 00:00:00 2001 From: Balearica Date: Sun, 20 Apr 2025 20:45:05 -0700 Subject: [PATCH 1/7] Update api.md --- docs/api.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/api.md b/docs/api.md index cc3f82d7d..2cb0ee039 100644 --- a/docs/api.md +++ b/docs/api.md @@ -34,8 +34,12 @@ - `langs` a string to indicate the languages traineddata to download, multiple languages are specified using an array (['eng', 'chi_sim']) - `oem` a enum to indicate the OCR Engine Mode you use - `options` an object of customized options - - `corePath` path to a directory containing **both** `tesseract-core.wasm.js` and `tesseract-core-simd.wasm.js` from [Tesseract.js-core](https://www.npmjs.com/package/tesseract.js-core) package - - Setting `corePath` to a specific `.js` file is **strongly discouraged.** To provide the best performance on all devices, Tesseract.js needs to be able to pick between `tesseract-core.wasm.js` and `tesseract-core-simd.wasm.js`. See [this issue](https://github.com/naptha/tesseract.js/issues/735) for more detail. + - `corePath` path to a directory containing **all of** the following files from [Tesseract.js-core](https://www.npmjs.com/package/tesseract.js-core) package: + - `tesseract-core.wasm.js` + - `tesseract-core-simd.wasm.js` + - `tesseract-core-lstm.wasm.js` + - `tesseract-core-simd-lstm.wasm.js` + - Some code snippets found online set `corePath` to a specific `.js` file. This is **strongly discouraged.** To provide the best performance and lowest network usage, Tesseract.js needs to be able to pick between builds. - `langPath` path for downloading traineddata, do not include `/` at the end of the path - `workerPath` path for downloading worker script - `dataPath` path for saving traineddata in WebAssembly file system, not common to modify From d82eb05039c9879f5563c3bb0487e8f5683884fd Mon Sep 17 00:00:00 2001 From: Balearica Date: Wed, 27 Aug 2025 22:40:44 -0700 Subject: [PATCH 2/7] Fixed type error per #1031 (#1032) --- src/index.d.ts | 6 +++--- tests/recognize.test.mjs | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/index.d.ts b/src/index.d.ts index 1f5a9c809..18e95fce1 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -41,7 +41,7 @@ declare namespace Tesseract { load_number_dawg: string load_bigram_dawg: string } - + type LoggerMessage = { jobId: string progress: number @@ -49,7 +49,7 @@ declare namespace Tesseract { userJobId: string workerId: string } - + interface WorkerOptions { corePath: string langPath: string @@ -166,7 +166,7 @@ declare namespace Tesseract { interface RowAttributes { ascenders: number; descenders: number; - row_height: number; + rowHeight: number; } interface Bbox { x0: number; diff --git a/tests/recognize.test.mjs b/tests/recognize.test.mjs index 692e0872e..e11d34fde 100644 --- a/tests/recognize.test.mjs +++ b/tests/recognize.test.mjs @@ -318,6 +318,21 @@ describe('recognize()', () => { expect(blocks[0].paragraphs[0].lines[0].words[0].text).to.be('繁體'); expect(blocks[0].paragraphs[0].lines[0].text).to.be('繁體 中 文 測試\n'); }).timeout(TIMEOUT); + + it('should report RowAttributes', async () => { + await worker.reinitialize('eng'); + const { data: { blocks } } = await worker.recognize(`${IMAGE_PATH}/testocr.png`, {}, { blocks: true }); + const firstLine = blocks[0].paragraphs[0].lines[0]; + + expect(firstLine.rowAttributes).to.be.an('object'); + expect(firstLine.rowAttributes.ascenders).to.be.a('number'); + expect(firstLine.rowAttributes.descenders).to.be.a('number'); + expect(firstLine.rowAttributes.rowHeight).to.be.a('number'); + + expect(firstLine.rowAttributes.ascenders).to.be.greaterThan(0); + expect(firstLine.rowAttributes.descenders).to.be.greaterThan(0); + expect(firstLine.rowAttributes.rowHeight).to.be.greaterThan(0); + }).timeout(TIMEOUT); }); describe('should support layout blocks (json) output', () => { From fb2bd4d4b1ce72c1855c504564116459380b7491 Mon Sep 17 00:00:00 2001 From: hata6502 <7702653+hata6502@users.noreply.github.com> Date: Sun, 26 Oct 2025 02:22:57 +0900 Subject: [PATCH 3/7] Add GIF format to documentation (#1035) --- docs/image-format.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/image-format.md b/docs/image-format.md index 8f72f8d1a..b647ed420 100644 --- a/docs/image-format.md +++ b/docs/image-format.md @@ -2,7 +2,7 @@ The main Tesseract.js functions (ex. recognize, detect) take an `image` parameter. The image formats and data types supported are listed below. -Support Image Formats: **bmp, jpg, png, pbm, webp** +Support Image Formats: **bmp, jpg, png, pbm, webp, gif \[non-animated\]**. For browser and Node, supported data types are: - string with base64 encoded image (fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp) @@ -15,4 +15,4 @@ For browser only, supported data types are: For Node only, supported data types are: - string containing a path to local image -Note: images must be a supported image format **and** a supported data type. For example, a buffer containing a png image is supported. A buffer containing raw pixel data is not supported. \ No newline at end of file +Note: images must be a supported image format **and** a supported data type. For example, a buffer containing a png image is supported. A buffer containing raw pixel data is not supported. From 9c6fc3c775434de6e3375bfc9dea1cff2d488161 Mon Sep 17 00:00:00 2001 From: Balearica Date: Sat, 25 Oct 2025 21:19:03 -0700 Subject: [PATCH 4/7] Fixed types per #1033 (#1036) --- src/index.d.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/index.d.ts b/src/index.d.ts index 18e95fce1..2c9689db3 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -161,7 +161,6 @@ declare namespace Tesseract { y0: number; x1: number; y1: number; - has_baseline: boolean; } interface RowAttributes { ascenders: number; From 0676b7897f90e30bbf9e9d6c77f74e09aba19516 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 25 Oct 2025 21:19:31 -0700 Subject: [PATCH 5/7] Bump tmp from 0.2.3 to 0.2.4 (#1029) Bumps [tmp](https://github.com/raszi/node-tmp) from 0.2.3 to 0.2.4. - [Changelog](https://github.com/raszi/node-tmp/blob/master/CHANGELOG.md) - [Commits](https://github.com/raszi/node-tmp/compare/v0.2.3...v0.2.4) --- updated-dependencies: - dependency-name: tmp dependency-version: 0.2.4 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- package-lock.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/package-lock.json b/package-lock.json index 11cbe9ff0..225f559ae 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9292,9 +9292,9 @@ "dev": true }, "node_modules/tmp": { - "version": "0.2.3", - "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz", - "integrity": "sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==", + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.4.tgz", + "integrity": "sha512-UdiSoX6ypifLmrfQ/XfiawN6hkjSBpCjhKxxZcWlUUmoXLaCKQU0bx4HF/tdDK2uzRuchf1txGvrWBzYREssoQ==", "dev": true, "engines": { "node": ">=14.14" @@ -17005,9 +17005,9 @@ "dev": true }, "tmp": { - "version": "0.2.3", - "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz", - "integrity": "sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==", + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.4.tgz", + "integrity": "sha512-UdiSoX6ypifLmrfQ/XfiawN6hkjSBpCjhKxxZcWlUUmoXLaCKQU0bx4HF/tdDK2uzRuchf1txGvrWBzYREssoQ==", "dev": true }, "to-regex-range": { From a20d9ff04579e2fb604d4edad3c3bf1bb543ac69 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 25 Oct 2025 21:22:34 -0700 Subject: [PATCH 6/7] Bump form-data from 4.0.2 to 4.0.4 (#1026) Bumps [form-data](https://github.com/form-data/form-data) from 4.0.2 to 4.0.4. - [Release notes](https://github.com/form-data/form-data/releases) - [Changelog](https://github.com/form-data/form-data/blob/master/CHANGELOG.md) - [Commits](https://github.com/form-data/form-data/compare/v4.0.2...v4.0.4) --- updated-dependencies: - dependency-name: form-data dependency-version: 4.0.4 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- package-lock.json | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/package-lock.json b/package-lock.json index 225f559ae..8b72fe5d4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4630,14 +4630,15 @@ } }, "node_modules/form-data": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz", - "integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz", + "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", "dev": true, "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", "mime-types": "^2.1.12" }, "engines": { @@ -13625,14 +13626,15 @@ } }, "form-data": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz", - "integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz", + "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", "dev": true, "requires": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", "mime-types": "^2.1.12" } }, From 50a3fd2cb8511194d5d8bbc245938716d4134411 Mon Sep 17 00:00:00 2001 From: Frazer Smith Date: Sun, 26 Oct 2025 04:24:58 +0000 Subject: [PATCH 7/7] Added Node.js v24 to Actions (#1021) --- .github/workflows/node.js.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml index 547222d94..2657ce255 100644 --- a/.github/workflows/node.js.yml +++ b/.github/workflows/node.js.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: - node-version: [14.x, 16.x, 18.x, 20.x, 22.x] + node-version: [14.x, 16.x, 18.x, 20.x, 22.x, 24.x] steps: - uses: actions/checkout@v3