From 13d96ed2b305e8b4921e03293c693b0c3e36bcd3 Mon Sep 17 00:00:00 2001 From: CanisMinor Date: Wed, 8 Nov 2023 14:32:53 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8=20feat:=20Update=20README.md=20wi?= =?UTF-8?q?th=20new=20environment=20variables=20and=20their=20descriptions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The README.md file was modified to include a table with new environment variables and their descriptions. Additionally, one variable was renamed and its description was updated. Changes made in the code include: - Configuration items and functions related to the Microsoft Speech API and the Edge Speech Service - Handling requests for the Azure Speech Service - Implementing functions for handling Azure Speech requests and getting allowed origins - Fetching speech synthesis data from a Microsoft Speech service by setting up the necessary configurations, sending a POST request, retrieving audio data, decoding it, and returning an audio buffer source node. This commit adds important information to the README.md file and improves the code related to the Microsoft Speech API and the Azure Speech Service. --- README.md | 21 +++---- api/azure-speech.ts | 15 +++++ api/microsoft-speech.ts | 30 +++------- package.json | 8 ++- src/const/api.ts | 3 + src/server.ts | 2 + {lib => src/server}/cors.ts | 0 src/server/getAllowOrigins.ts | 15 +++++ src/server/handleAzureSpeechRequest.ts | 52 +++++++++++++++++ src/server/handleMicrosoftSpeechRequest.ts | 32 +++++++++++ src/services/fetchAzureSpeech.ts | 65 +++++++--------------- src/services/fetchMicrosoftSpeech.ts | 20 ------- 12 files changed, 162 insertions(+), 101 deletions(-) create mode 100644 api/azure-speech.ts create mode 100644 src/server.ts rename {lib => src/server}/cors.ts (100%) create mode 100644 src/server/getAllowOrigins.ts create mode 100644 src/server/handleAzureSpeechRequest.ts create mode 100644 src/server/handleMicrosoftSpeechRequest.ts diff --git a/README.md b/README.md index e3b7c59..f1c9683 100644 --- a/README.md +++ b/README.md @@ -86,16 +86,17 @@ Click button below to deploy your private plugins' gateway. This project provides some additional configuration items set with environment variables: -| Environment Variable | Description | Example | -| -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | -| `OPENAI_API_KEY` | This is the API key you apply on the OpenAI account page | `sk-xxxxxx...xxxxxx` | -| `OPENAI_PROXY_URL` | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL | `https://api.chatanywhere.cn/v1`
The default value is
`https://api.openai.com/v1` | -| `AZURE_SPEECH_KEY` | This is the API key of Azure Speech Service | | -| `AZURE_SPEECH_REGION` | This is the region of Azure Speech Service | | -| `MICROSOFT_SPEECH_PROXY_URL` | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | | -| `MICROSOFT_SPEECH_ALLOW_ORIGINS` | Allow origins , string or string array | | -| `EDDGE_API_TOKEN` | This is the API key of Edge Speech Service | `6A5AA1D4EAFF4E9FB37E23D68491D6F4` | -| `EDDGE_PROXY_URL` | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL | `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1` | +| Environment Variable | Description | Default | +| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------- | +| `ALLOW_ORIGINS` | Allow origins , string or string array | | +| `OPENAI_API_KEY` | This is the API key you apply on the OpenAI account page | `sk-xxxxxx...xxxxxx` | +| `OPENAI_PROXY_URL` | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL | `https://api.openai.com/v1` | +| `AZURE_SPEECH_KEY` | This is the API key of Azure Speech Service | | +| `AZURE_SPEECH_REGION` | This is the region of Azure Speech Service | | +| `AZURE_SPEECH_PROXY_URL` | If you manually configure the AZURE Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/azure-speech` | +| `MICROSOFT_SPEECH_PROXY_URL` | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/microsoft-speech` | +| `EDDGE_API_TOKEN` | This is the API key of Edge Speech Service | | +| `EDDGE_PROXY_URL` | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL | |
diff --git a/api/azure-speech.ts b/api/azure-speech.ts new file mode 100644 index 0000000..d59cd67 --- /dev/null +++ b/api/azure-speech.ts @@ -0,0 +1,15 @@ +import cors from '../src/server/cors'; +import { getAllowOrigins } from '../src/server/getAllowOrigins'; +import { handleAzureSpeechRequest } from '../src/server/handleAzureSpeechRequest'; + +export const config = { + runtime: 'edge', +}; + +export default async (req: Request) => { + if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 }); + const origin = getAllowOrigins(req); + if (!origin) return new Response('Origin Not Allowed', { status: 403 }); + const res = await handleAzureSpeechRequest(req); + return cors(req, res, { methods: ['POST'], origin }); +}; diff --git a/api/microsoft-speech.ts b/api/microsoft-speech.ts index b2588df..f8e4818 100644 --- a/api/microsoft-speech.ts +++ b/api/microsoft-speech.ts @@ -1,31 +1,15 @@ -import cors from '../lib/cors'; +import cors from '../src/server/cors'; +import { getAllowOrigins } from '../src/server/getAllowOrigins'; +import { handleMicrosoftSpeechRequest } from '../src/server/handleMicrosoftSpeechRequest'; export const config = { runtime: 'edge', }; -const API = - 'https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak'; - -const MICROSOFT_SPEECH_ALLOW_ORIGINS = - process.env?.MICROSOFT_SPEECH_ALLOW_ORIGINS?.split(',') || undefined; - export default async (req: Request) => { if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 }); - - let origin = '*'; - - if (MICROSOFT_SPEECH_ALLOW_ORIGINS) { - const reqOrigin = req.headers.get('origin'); - if (reqOrigin && MICROSOFT_SPEECH_ALLOW_ORIGINS.includes(reqOrigin)) { - origin = reqOrigin; - } else { - return new Response('Origin Not Allowed', { status: 403 }); - } - } - - const res = await fetch(API, { body: req.body, headers: req.headers, method: 'POST' }); - const newResponse = new Response(res.body, res); - - return cors(req, newResponse, { methods: ['POST'], origin }); + const origin = getAllowOrigins(req); + if (!origin) return new Response('Origin Not Allowed', { status: 403 }); + const res = await handleMicrosoftSpeechRequest(req); + return cors(req, res, { methods: ['POST'], origin }); }; diff --git a/package.json b/package.json index 2d6b50b..80c4762 100644 --- a/package.json +++ b/package.json @@ -13,13 +13,15 @@ "license": "MIT", "author": "LobeHub ", "sideEffects": false, - "main": "dist/index.js", - "types": "dist/index.d.ts", + "main": "es/index.js", + "module": "es/index.js", + "types": "es/index.d.ts", "files": [ - "dist" + "es" ], "scripts": { "build": "father build", + "build:server": "tsc server.ts --declaration", "ci": "npm run lint && npm run type-check", "dev": "father dev", "docs:build": "dumi build", diff --git a/src/const/api.ts b/src/const/api.ts index 4fe3a92..339f050 100644 --- a/src/const/api.ts +++ b/src/const/api.ts @@ -1,7 +1,10 @@ import urlJoin from 'url-join'; +export const MICROSOFT_SPPECH_URL = + 'https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak'; export const MICROSOFT_SPEECH_PROXY_URL = process.env.MICROSOFT_SPEECH_PROXY_URL || '/api/microsoft-speech'; +export const AZURE_SPEECH_PROXY_URL = process.env.AZURE_SPEECH_PROXY_URL || '/api/azure-speech'; export const AZURE_SPEECH_KEY = process.env.AZURE_SPEECH_KEY || ''; export const AZURE_SPEECH_REGION = process.env.AZURE_SPEECH_REGION || ''; export const OPENAI_API_KEY = process.env.OPENAI_API_KEY || ''; diff --git a/src/server.ts b/src/server.ts new file mode 100644 index 0000000..4c232a9 --- /dev/null +++ b/src/server.ts @@ -0,0 +1,2 @@ +export { handleAzureSpeechRequest } from './server/handleAzureSpeechRequest'; +export { handleMicrosoftSpeechRequest } from './server/handleMicrosoftSpeechRequest'; diff --git a/lib/cors.ts b/src/server/cors.ts similarity index 100% rename from lib/cors.ts rename to src/server/cors.ts diff --git a/src/server/getAllowOrigins.ts b/src/server/getAllowOrigins.ts new file mode 100644 index 0000000..3fccfdf --- /dev/null +++ b/src/server/getAllowOrigins.ts @@ -0,0 +1,15 @@ +const ALLOW_ORIGINS = process.env?.ALLOW_ORIGINS?.split(',') || undefined; + +export const getAllowOrigins = (req: Request) => { + let origin = '*'; + + if (ALLOW_ORIGINS) { + const reqOrigin = req.headers.get('origin'); + if (reqOrigin && ALLOW_ORIGINS.includes(reqOrigin)) { + origin = reqOrigin; + } else { + return; + } + } + return origin; +}; diff --git a/src/server/handleAzureSpeechRequest.ts b/src/server/handleAzureSpeechRequest.ts new file mode 100644 index 0000000..2e79d29 --- /dev/null +++ b/src/server/handleAzureSpeechRequest.ts @@ -0,0 +1,52 @@ +import { + AudioConfig, + PropertyId, + ResultReason, + SpeechConfig, + SpeechSynthesisOutputFormat, + SpeechSynthesisResult, + SpeechSynthesizer, +} from 'microsoft-cognitiveservices-speech-sdk'; + +import { AZURE_SPEECH_KEY, AZURE_SPEECH_REGION } from '@/const/api'; + +const fetchAzureSpeech = async (ssml: string, { api }: any): Promise => { + const key = api.key || AZURE_SPEECH_KEY; + const region = api.key || AZURE_SPEECH_REGION; + const speechConfig = SpeechConfig.fromSubscription(key, region); + speechConfig.setProperty(PropertyId.SpeechServiceResponse_RequestSentenceBoundary, 'true'); + speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Webm24Khz16BitMonoOpus; + + const audioConfig = AudioConfig.fromDefaultSpeakerOutput(); + const synthesizer: SpeechSynthesizer | null = new SpeechSynthesizer(speechConfig, audioConfig); + + const completeCb = async ( + result: SpeechSynthesisResult, + resolve: (value: ArrayBuffer) => void, + ) => { + if (result.reason === ResultReason.SynthesizingAudioCompleted) { + const audioData = result.audioData; + resolve(audioData); + } + synthesizer.close(); + }; + + const errCb = (err: string, reject: (err?: any) => void) => { + reject(err); + synthesizer.close(); + }; + + return new Promise((resolve, reject) => { + synthesizer.speakSsmlAsync( + ssml, + (result) => completeCb(result, resolve), + (err) => errCb(err, reject), + ); + }); +}; + +export const handleAzureSpeechRequest = async (req: Request) => { + const { ssml, ...options } = req.body as any; + const data = await fetchAzureSpeech(ssml, options); + return new Response(data); +}; diff --git a/src/server/handleMicrosoftSpeechRequest.ts b/src/server/handleMicrosoftSpeechRequest.ts new file mode 100644 index 0000000..fd5c381 --- /dev/null +++ b/src/server/handleMicrosoftSpeechRequest.ts @@ -0,0 +1,32 @@ +import { v4 as uuidv4 } from 'uuid'; + +import { MICROSOFT_SPPECH_URL } from '@/const/api'; + +export const handleMicrosoftSpeechRequest = async (req: Request) => { + const DEFAULT_HEADERS = new Headers({ + 'accept': '*/*', + 'accept-language': 'zh-CN,zh;q=0.9', + 'authority': 'southeastasia.api.speech.microsoft.com', + 'content-type': 'application/json', + 'customvoiceconnectionid': uuidv4(), + 'origin': 'https://speech.microsoft.com', + 'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-site', + 'user-agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + }); + + const res = await fetch(MICROSOFT_SPPECH_URL, { + body: req.body, + headers: DEFAULT_HEADERS, + method: 'POST', + // @ts-ignore + responseType: 'arraybuffer', + }); + + return new Response(res.body, res); +}; diff --git a/src/services/fetchAzureSpeech.ts b/src/services/fetchAzureSpeech.ts index 6aeba1d..12a1fc7 100644 --- a/src/services/fetchAzureSpeech.ts +++ b/src/services/fetchAzureSpeech.ts @@ -1,14 +1,4 @@ -import { - AudioConfig, - PropertyId, - ResultReason, - SpeechConfig, - SpeechSynthesisOutputFormat, - SpeechSynthesisResult, - SpeechSynthesizer, -} from 'microsoft-cognitiveservices-speech-sdk'; - -import { AZURE_SPEECH_KEY, AZURE_SPEECH_REGION } from '@/const/api'; +import { AZURE_SPEECH_PROXY_URL } from '@/const/api'; import { type SsmlOptions, genSSML } from '@/utils/genSSML'; export interface AzureSpeechOptions extends SsmlOptions { @@ -18,45 +8,30 @@ export interface AzureSpeechOptions extends SsmlOptions { }; } -// 纯文本生成语音 export const fetchAzureSpeech = async ( text: string, { api, ...options }: AzureSpeechOptions, ): Promise => { - const key = api.key || AZURE_SPEECH_KEY; - const region = api.key || AZURE_SPEECH_REGION; - const speechConfig = SpeechConfig.fromSubscription(key, region); - speechConfig.setProperty(PropertyId.SpeechServiceResponse_RequestSentenceBoundary, 'true'); - speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Webm24Khz16BitMonoOpus; - - const audioConfig = AudioConfig.fromDefaultSpeakerOutput(); - const synthesizer: SpeechSynthesizer | null = new SpeechSynthesizer(speechConfig, audioConfig); + const data = JSON.stringify({ + api, + ssml: genSSML(text, options), + }); - const completeCb = async ( - result: SpeechSynthesisResult, - resolve: (value: AudioBufferSourceNode) => void, - ) => { - if (result.reason === ResultReason.SynthesizingAudioCompleted) { - const audioData = result.audioData; - const audioContext = new AudioContext(); - const audioBufferSource = audioContext.createBufferSource(); - audioBufferSource.buffer = await audioContext.decodeAudioData(audioData); - audioBufferSource.connect(audioContext.destination); - resolve(audioBufferSource); - } - synthesizer.close(); - }; + const response: Response = await fetch(AZURE_SPEECH_PROXY_URL, { + body: data, + method: 'POST', + // @ts-ignore + responseType: 'arraybuffer', + }); - const errCb = (err: string, reject: (err?: any) => void) => { - reject(err); - synthesizer.close(); - }; + if (!response.ok) { + throw new Error('Network response was not ok'); + } - return new Promise((resolve, reject) => { - synthesizer.speakSsmlAsync( - genSSML(text, options), - (result) => completeCb(result, resolve), - (err) => errCb(err, reject), - ); - }); + const audioData = await response.arrayBuffer(); + const audioContext = new AudioContext(); + const audioBufferSource = audioContext.createBufferSource(); + audioBufferSource.buffer = await audioContext.decodeAudioData(audioData); + audioBufferSource.connect(audioContext.destination); + return audioBufferSource; }; diff --git a/src/services/fetchMicrosoftSpeech.ts b/src/services/fetchMicrosoftSpeech.ts index e303c92..30769c1 100644 --- a/src/services/fetchMicrosoftSpeech.ts +++ b/src/services/fetchMicrosoftSpeech.ts @@ -1,5 +1,3 @@ -import { v4 as uuidv4 } from 'uuid'; - import { MICROSOFT_SPEECH_PROXY_URL } from '@/const/api'; import { type SsmlOptions } from '@/utils/genSSML'; import { genSSML } from '@/utils/genSSML'; @@ -21,26 +19,8 @@ export const fetchMicrosoftSpeech = async ( ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3', }); - const DEFAULT_HEADERS = new Headers({ - 'accept': '*/*', - 'accept-language': 'zh-CN,zh;q=0.9', - 'authority': 'southeastasia.api.speech.microsoft.com', - 'content-type': 'application/json', - 'customvoiceconnectionid': uuidv4(), - 'origin': 'https://speech.microsoft.com', - 'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"Windows"', - 'sec-fetch-dest': 'empty', - 'sec-fetch-mode': 'cors', - 'sec-fetch-site': 'same-site', - 'user-agent': - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', - }); - const response: Response = await fetch(api || MICROSOFT_SPEECH_PROXY_URL, { body: data, - headers: DEFAULT_HEADERS, method: 'POST', // @ts-ignore responseType: 'arraybuffer', From 10592aab4dccf69554d8e7c0c8b302045e04f547 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 8 Nov 2023 06:34:48 +0000 Subject: [PATCH 2/2] :bookmark: chore(release): v1.3.0 [skip ci] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## [Version 1.3.0](https://github.com/lobehub/lobe-tts/compare/v1.2.0...v1.3.0) Released on **2023-11-08** #### ✨ Features - **misc**: Update README.md with new environment variables and their descriptions.
Improvements and Fixes #### What's improved * **misc**: Update README.md with new environment variables and their descriptions ([13d96ed](https://github.com/lobehub/lobe-tts/commit/13d96ed))
[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
--- CHANGELOG.md | 25 +++++++++++++++++++++++++ package.json | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9805b3c..203b07d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,31 @@ # Changelog +## [Version 1.3.0](https://github.com/lobehub/lobe-tts/compare/v1.2.0...v1.3.0) + +Released on **2023-11-08** + +#### ✨ Features + +- **misc**: Update README.md with new environment variables and their descriptions. + +
+ +
+Improvements and Fixes + +#### What's improved + +- **misc**: Update README.md with new environment variables and their descriptions ([13d96ed](https://github.com/lobehub/lobe-tts/commit/13d96ed)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ ## [Version 1.2.0](https://github.com/lobehub/lobe-tts/compare/v1.1.0...v1.2.0) Released on **2023-11-07** diff --git a/package.json b/package.json index 80c4762..fd2c432 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lobehub/tts", - "version": "1.2.0", + "version": "1.3.0", "description": "A high-quality & reliable TTS React Hooks library", "homepage": "https://github.com/lobehub/lobe-tts", "bugs": {