From 3be872e34bc203e821ce04c7343666bc1478150e Mon Sep 17 00:00:00 2001 From: Marco Stocchi Date: Sat, 15 Mar 2025 09:56:55 +0100 Subject: [PATCH] llama-tts : add '-o' option * added -o option to specify an output file name * llama-tts returns ENOENT in case of file write error note : PR #12042 is closed as superseded with this one. --- common/arg.cpp | 2 +- examples/tts/tts.cpp | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 240c699a2cf76..b6bfe6f89bead 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1889,7 +1889,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, const std::string & value) { params.out_file = value; } - ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA})); + ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS})); add_opt(common_arg( {"-ofreq", "--output-frequency"}, "N", string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq), diff --git a/examples/tts/tts.cpp b/examples/tts/tts.cpp index c658f3182f4c2..d953cadd62dcf 100644 --- a/examples/tts/tts.cpp +++ b/examples/tts/tts.cpp @@ -87,11 +87,11 @@ struct wav_header { uint32_t data_size; }; -static void save_wav16(const std::string & fname, const std::vector & data, int sample_rate) { +static bool save_wav16(const std::string & fname, const std::vector & data, int sample_rate) { std::ofstream file(fname, std::ios::binary); if (!file) { - LOG_ERR("%s: Failed to open file '%s' for writing", __func__, fname.c_str()); - return; + LOG_ERR("%s: Failed to open file '%s' for writing.\n", __func__, fname.c_str()); + return false; } wav_header header; @@ -108,7 +108,7 @@ static void save_wav16(const std::string & fname, const std::vector & dat file.write(reinterpret_cast(&pcm_sample), sizeof(pcm_sample)); } - file.close(); + return file.good(); } static void fill_hann_window(int length, bool periodic, float * output) { @@ -536,6 +536,7 @@ static std::string audio_data_from_speaker(json speaker, const outetts_version t int main(int argc, char ** argv) { common_params params; + params.out_file = "output.wav"; params.prompt = ""; params.n_predict = 4096; @@ -1060,8 +1061,6 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14 } #endif - const std::string fname = "output.wav"; - const int n_sr = 24000; // sampling rate // zero out first 0.25 seconds @@ -1072,11 +1071,15 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14 LOG_INF("%s: time for spectral ops: %.3f ms\n", __func__, (ggml_time_us() - t_spec_start) / 1000.0f); LOG_INF("%s: total time: %.3f ms\n", __func__, (ggml_time_us() - t_main_start) / 1000.0f); - save_wav16(fname, audio, n_sr); + int retval = 0; - LOG_INF("%s: audio written to file '%s'\n", __func__, fname.c_str()); + if (save_wav16(params.out_file, audio, n_sr)) { + LOG_INF("%s: audio written to file '%s'\n", __func__, params.out_file.c_str()); + } else { + retval = ENOENT; + } llama_backend_free(); - return 0; + return retval; }