🐛 fix: Fix text split

canisminor1990 · canisminor1990 · commit 2e6c1bc35d48 · 2024-11-26T01:14:50.000+08:00
diff --git a/package.json b/package.json
@@ -68,6 +68,7 @@
   "dependencies": {
     "@babel/runtime": "^7.26.0",
     "lodash-es": "^4.17.21",
+    "markdown-to-txt": "^2.0.1",
     "query-string": "^9.1.1",
     "react-error-boundary": "^4.1.2",
     "remark-gfm": "^3.0.1",
@@ -80,7 +81,7 @@
   },
   "devDependencies": {
     "@commitlint/cli": "^19.6.0",
-    "@lobehub/i18n-cli": "^1.20.0",
+    "@lobehub/i18n-cli": "^1.20.1",
     "@lobehub/lint": "^1.24.4",
     "@types/lodash-es": "^4.17.12",
     "@types/node": "^20.17.7",
@@ -93,7 +94,7 @@
     "commitlint": "^19.6.0",
     "concurrently": "^9.1.0",
     "dumi": "^2.4.14",
-    "dumi-theme-lobehub": "^1.10.6",
+    "dumi-theme-lobehub": "^1.10.8",
     "eslint": "^8.57.1",
     "father": "^4.5.1",
     "husky": "^9.1.7",
diff --git a/src/core/utils/splitTextIntoSegments.ts b/src/core/utils/splitTextIntoSegments.ts
@@ -1,5 +1,7 @@
+import { markdownToTxt } from 'markdown-to-txt';
+
 const toHalfWidthAndCleanSpace = (str: string): string => {
-  return str
+  return markdownToTxt(str)
     .replaceAll(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 0xFE_E0))
     .replaceAll('\u3000', ' ')
     .replaceAll('。', '.')
@@ -22,32 +24,42 @@ const toHalfWidthAndCleanSpace = (str: string): string => {
     .replaceAll(/\s+/g, ' ');
 };
 
-export const splitTextIntoSegments = (text: string, maxChars: number = 100): string[] => {
+export const splitTextIntoSegments = (text: string, chunkSize: number = 100): string[] => {
   text = toHalfWidthAndCleanSpace(text);
 
-  const sentences = text.match(/[^!.;?]+[!.;?]+/g) || [];
-  const segments: string[] = [];
-  let currentSegment = '';
+  const chunks: string[] = [];
+  const paragraphs = text.split('\n');
+  let currentChunk = '';
 
-  sentences.forEach((sentence) => {
-    if ((currentSegment + sentence).length > maxChars) {
-      if (currentSegment.length > 0) {
-        segments.push(currentSegment.trim());
-        currentSegment = '';
-      }
-      if (sentence.length > maxChars) {
-        segments.push(sentence.trim());
-      } else {
-        currentSegment = sentence;
+  function addChunk(chunk: string) {
+    if (chunk.trim()) {
+      chunks.push(chunk.trim());
+    }
+  }
+
+  for (const paragraph of paragraphs) {
+    if (currentChunk.length + paragraph.length + 1 > chunkSize && currentChunk.length > 0) {
+      addChunk(currentChunk);
+      currentChunk = '';
+    }
+
+    if (paragraph.length > chunkSize) {
+      const sentences = paragraph.match(/[^!.?]+[!.?]+/g) || [paragraph];
+      for (const sentence of sentences) {
+        if (currentChunk.length + sentence.length + 1 > chunkSize && currentChunk.length > 0) {
+          addChunk(currentChunk);
+          currentChunk = '';
+        }
+        currentChunk += (currentChunk ? ' ' : '') + sentence.trim();
       }
     } else {
-      currentSegment += sentence;
+      currentChunk += (currentChunk ? '\n' : '') + paragraph;
     }
-  });
+  }
 
-  if (currentSegment.length > 0) {
-    segments.push(currentSegment.trim());
+  if (currentChunk) {
+    addChunk(currentChunk);
   }
 
-  return segments.filter(Boolean);
+  return chunks;
 };
diff --git a/src/react/useEdgeSpeech/demos/index.tsx b/src/react/useEdgeSpeech/demos/index.tsx
@@ -5,20 +5,19 @@ import { Button, Input } from 'antd';
 import { Volume2 } from 'lucide-react';
 import { Flexbox } from 'react-layout-kit';
 
-import { EDGE_SPEECH_BACKEND_URL } from '../../_util/api';
 import { genLevaOptions } from '../../_util/leva';
 
 const defaultText = '这是一段使用 Edge Speech 的语音演示';
 
 export default () => {
   const store = useCreateStore();
 
-  const api: any = useControls(
-    {
-      serviceUrl: EDGE_SPEECH_BACKEND_URL,
-    },
-    { store },
-  );
+  // const api: any = useControls(
+  //   {
+  //     serviceUrl: EDGE_SPEECH_BACKEND_URL,
+  //   },
+  //   { store },
+  // );
 
   const options: any = useControls(
     {
@@ -31,9 +30,10 @@ export default () => {
   );
 
   const { setText, isGlobalLoading, start, stop, audio } = useEdgeSpeech(defaultText, {
-    api,
+    // api,
     options,
   });
+
   return (
     <StoryBook levaStore={store}>
       <Flexbox gap={8}>
diff --git a/src/react/useEdgeSpeech/index.ts b/src/react/useEdgeSpeech/index.ts
@@ -16,6 +16,7 @@ export const useEdgeSpeech = (defaultText: string, init: EdgeSpeechOptions) => {
     options.voice,
     text,
     async (segmentText: string) => {
+      console.log(segmentText);
       const instance = new EdgeSpeechTTS({ ...api, locale });
       const res = await instance.create({ input: segmentText, options });
       setResponse(res);
diff --git a/src/react/useTTS/index.ts b/src/react/useTTS/index.ts
@@ -48,7 +48,7 @@ export const useTTS = (
   }, [handleReset]);
 
   const { isLoading, error, mutate } = useSWR(
-    shouldFetch && textArray?.length > 0 ? [key, textArray?.[index]] : null,
+    shouldFetch && textArray?.length > 0 ? [key, textArray?.[index]].join('-') : null,
     async () => await fetchTTS(textArray[index]),
     {
       onError: (err, ...rest) => {
@@ -81,6 +81,7 @@ export const useTTS = (
 
   useEffect(() => {
     const texts = splitTextIntoSegments(text);
+
     handleReset(texts);
     return () => {
       handleReset();