Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2e6c1bc

Browse files
🐛 fix: Fix text split
1 parent 5939f0a commit 2e6c1bc

File tree

5 files changed

+46
-31
lines changed

5 files changed

+46
-31
lines changed

package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
"dependencies": {
6969
"@babel/runtime": "^7.26.0",
7070
"lodash-es": "^4.17.21",
71+
"markdown-to-txt": "^2.0.1",
7172
"query-string": "^9.1.1",
7273
"react-error-boundary": "^4.1.2",
7374
"remark-gfm": "^3.0.1",
@@ -80,7 +81,7 @@
8081
},
8182
"devDependencies": {
8283
"@commitlint/cli": "^19.6.0",
83-
"@lobehub/i18n-cli": "^1.20.0",
84+
"@lobehub/i18n-cli": "^1.20.1",
8485
"@lobehub/lint": "^1.24.4",
8586
"@types/lodash-es": "^4.17.12",
8687
"@types/node": "^20.17.7",
@@ -93,7 +94,7 @@
9394
"commitlint": "^19.6.0",
9495
"concurrently": "^9.1.0",
9596
"dumi": "^2.4.14",
96-
"dumi-theme-lobehub": "^1.10.6",
97+
"dumi-theme-lobehub": "^1.10.8",
9798
"eslint": "^8.57.1",
9899
"father": "^4.5.1",
99100
"husky": "^9.1.7",
Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import { markdownToTxt } from 'markdown-to-txt';
2+
13
const toHalfWidthAndCleanSpace = (str: string): string => {
2-
return str
4+
return markdownToTxt(str)
35
.replaceAll(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 0xFE_E0))
46
.replaceAll('\u3000', ' ')
57
.replaceAll('。', '.')
@@ -22,32 +24,42 @@ const toHalfWidthAndCleanSpace = (str: string): string => {
2224
.replaceAll(/\s+/g, ' ');
2325
};
2426

25-
export const splitTextIntoSegments = (text: string, maxChars: number = 100): string[] => {
27+
export const splitTextIntoSegments = (text: string, chunkSize: number = 100): string[] => {
2628
text = toHalfWidthAndCleanSpace(text);
2729

28-
const sentences = text.match(/[^!.;?]+[!.;?]+/g) || [];
29-
const segments: string[] = [];
30-
let currentSegment = '';
30+
const chunks: string[] = [];
31+
const paragraphs = text.split('\n');
32+
let currentChunk = '';
3133

32-
sentences.forEach((sentence) => {
33-
if ((currentSegment + sentence).length > maxChars) {
34-
if (currentSegment.length > 0) {
35-
segments.push(currentSegment.trim());
36-
currentSegment = '';
37-
}
38-
if (sentence.length > maxChars) {
39-
segments.push(sentence.trim());
40-
} else {
41-
currentSegment = sentence;
34+
function addChunk(chunk: string) {
35+
if (chunk.trim()) {
36+
chunks.push(chunk.trim());
37+
}
38+
}
39+
40+
for (const paragraph of paragraphs) {
41+
if (currentChunk.length + paragraph.length + 1 > chunkSize && currentChunk.length > 0) {
42+
addChunk(currentChunk);
43+
currentChunk = '';
44+
}
45+
46+
if (paragraph.length > chunkSize) {
47+
const sentences = paragraph.match(/[^!.?]+[!.?]+/g) || [paragraph];
48+
for (const sentence of sentences) {
49+
if (currentChunk.length + sentence.length + 1 > chunkSize && currentChunk.length > 0) {
50+
addChunk(currentChunk);
51+
currentChunk = '';
52+
}
53+
currentChunk += (currentChunk ? ' ' : '') + sentence.trim();
4254
}
4355
} else {
44-
currentSegment += sentence;
56+
currentChunk += (currentChunk ? '\n' : '') + paragraph;
4557
}
46-
});
58+
}
4759

48-
if (currentSegment.length > 0) {
49-
segments.push(currentSegment.trim());
60+
if (currentChunk) {
61+
addChunk(currentChunk);
5062
}
5163

52-
return segments.filter(Boolean);
64+
return chunks;
5365
};

src/react/useEdgeSpeech/demos/index.tsx

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,19 @@ import { Button, Input } from 'antd';
55
import { Volume2 } from 'lucide-react';
66
import { Flexbox } from 'react-layout-kit';
77

8-
import { EDGE_SPEECH_BACKEND_URL } from '../../_util/api';
98
import { genLevaOptions } from '../../_util/leva';
109

1110
const defaultText = '这是一段使用 Edge Speech 的语音演示';
1211

1312
export default () => {
1413
const store = useCreateStore();
1514

16-
const api: any = useControls(
17-
{
18-
serviceUrl: EDGE_SPEECH_BACKEND_URL,
19-
},
20-
{ store },
21-
);
15+
// const api: any = useControls(
16+
// {
17+
// serviceUrl: EDGE_SPEECH_BACKEND_URL,
18+
// },
19+
// { store },
20+
// );
2221

2322
const options: any = useControls(
2423
{
@@ -31,9 +30,10 @@ export default () => {
3130
);
3231

3332
const { setText, isGlobalLoading, start, stop, audio } = useEdgeSpeech(defaultText, {
34-
api,
33+
// api,
3534
options,
3635
});
36+
3737
return (
3838
<StoryBook levaStore={store}>
3939
<Flexbox gap={8}>

src/react/useEdgeSpeech/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ export const useEdgeSpeech = (defaultText: string, init: EdgeSpeechOptions) => {
1616
options.voice,
1717
text,
1818
async (segmentText: string) => {
19+
console.log(segmentText);
1920
const instance = new EdgeSpeechTTS({ ...api, locale });
2021
const res = await instance.create({ input: segmentText, options });
2122
setResponse(res);

src/react/useTTS/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ export const useTTS = (
4848
}, [handleReset]);
4949

5050
const { isLoading, error, mutate } = useSWR(
51-
shouldFetch && textArray?.length > 0 ? [key, textArray?.[index]] : null,
51+
shouldFetch && textArray?.length > 0 ? [key, textArray?.[index]].join('-') : null,
5252
async () => await fetchTTS(textArray[index]),
5353
{
5454
onError: (err, ...rest) => {
@@ -81,6 +81,7 @@ export const useTTS = (
8181

8282
useEffect(() => {
8383
const texts = splitTextIntoSegments(text);
84+
8485
handleReset(texts);
8586
return () => {
8687
handleReset();

0 commit comments

Comments
 (0)