diff --git a/examples/codex/backtranslation.py b/examples/codex/backtranslation.py new file mode 100644 index 0000000000..ca0a3e6ccd --- /dev/null +++ b/examples/codex/backtranslation.py @@ -0,0 +1,187 @@ +import openai +from smokey import Smokey +from typing import List, Union + + +def get_candidates( + prompt: str, + stop: List[str], + temperature: float, + priming_prefix: str, + engine: str, + n: int = 5, +) -> List[str]: + """ + Generate N candidate completions based on the prompt, generated with a specific temperature. + + :param prompt: The prompt to start the conversation with. + :param stop: A list of tokens that indicate the end of the generation. + :param temperature: The temperature of the generation. + :param priming_prefix: The prefix to use for the priming. + :param engine: The engine to use for the generation. + :param n: The number of completions to generate. + :return: A list of completions. + """ + response = openai.Completion.create( + engine=engine, + prompt=prompt, + temperature=temperature, + max_tokens=150, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + stop=stop, + n=n, + ) + responses = [priming_prefix + choice.text for choice in response.choices] + return responses + + +def rindex(lst: List, value: str) -> int: + """ + Return the index of the last occurence of a value in a list. + + :param lst: The list to search in. + :param value: The value to search for. + :return: The index of the last occurence of the value. + """ + try: + return len(lst) - lst[::-1].index(value) - 1 + except ValueError: + raise ValueError(f"Answer start token `{value}` not found in the eval template") + + +def eval_candidate( + candidate_answer: str, + original_instruction: str, + eval_template: str, + answer_start_token: str, + engine: str, +) -> float: + """ + Evaluate a candidate answer by calculating the average log probability + of the original instruction, given the candidate answer with a specific + evaluation template, aimed at reconstructing the original instruction. + + :param candidate_answer: The candidate answer to evaluate. + :param original_instruction: The original instruction. + :param eval_template: The template to use for the evaluation. + :param answer_start_token: The token to use to indicate the start of the answer. + :param engine: The engine to use for the evaluation. + :return: The evaluation of the candidate answer. + """ + response = openai.Completion.create( + engine=engine, + prompt=eval_template.format(candidate_answer, original_instruction), + temperature=0, + max_tokens=0, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + logprobs=1, + echo=True, + ) + + answer_start = rindex( + response["choices"][0]["logprobs"]["tokens"], answer_start_token + ) + logprobs = response["choices"][0]["logprobs"]["token_logprobs"][answer_start + 1 :] + return sum(logprobs) / len(logprobs) + + +def backtranslation( + prompt_template: str, + additional_info: str, + instruction: str, + eval_template: str, + priming_prefix: str = "SELECT", + stop1: List[str] = ["#", ";"], + answer_start_token: str = "--", + n: int = 5, + temperature: float = 0.5, + return_all_results: bool = False, + engine: str = "davinci-codex", +) -> Union[str, List[str, float]]: + """ + Generate a number of SQL queries given a natural language instruction, + and pick the best one based on the average log probability of explaining the + candidate SQL query with the exact original instruction, when prompted for + a natural language explanation of the candidate SQL query. + + :param prompt_template: The template to use for the prompt to generate SQL. + :param additional_info: Additional information to include in the prompt + (SQL Tables, and their properties). + :param instruction: The instruction in natural language. + :param eval_template: The template to use for the evaluation. + :param priming_prefix: The prefix to use for the priming of the SQL query. + :param stop1: A list of tokens that indicate the end of the generation. + :param answer_start_token: The token to use to indicate the start of the + natural answer. + :param n: The number of candidates to generate. + :param temperature: The temperature of the generation. + :param return_all_results: Whether to return all results or just the best one. + :param engine: The engine to use for the generation and evaluation. + :return: The best SQL query, or a list of all scored generated SQL queries. + """ + prompt_template = prompt_template.format( + additional_info, instruction, priming_prefix + ) + + candidates = [] + responses = get_candidates( + prompt_template, stop1, temperature, priming_prefix, engine=engine, n=n + ) + for i in range(n): + quality = eval_candidate( + responses[i], + instruction, + eval_template, + answer_start_token, + engine=engine, + ) + candidates.append((responses[i], quality)) + + candidates.sort(key=lambda x: x[1], reverse=True) + if return_all_results: + return candidates + return candidates[0][0] + + +def main( + nl_query: str = "Return the name of each department that had more than 10 employees in June 2021", + eval_template: str = "{};\n-- Explanation of the above query in human readable format\n-- {}", + table_definitions: str = "# Employee(id, name, department_id)\n# Department(id, name, address)\n# Salary_Payments(id, employee_id, amount, date)\n", + prompt_template: str = "### Postgres SQL tables, with their properties:\n#\n{}#\n### {}\n{}", + n: int = 3, + temperature: float = 0.3, + engine: str = "davinci-codex", +): + """ + Generate a number of SQL queries given a natural language instruction, + and pick the best one based on the highest backtranslation score. + + :param nl_query: The natural language query. + :param eval_template: The template to use for the evaluation. + :param table_definitions: The definitions of the tables used in the query. + :param prompt_template: The template to use for the prompt to generate SQL. + :param n: The number of candidates to generate. + :param temperature: The temperature of the generation. + :param engine: The engine to use for the generation and evaluation. + :return: The best SQL query, or a list of all scored generated SQL queries. + """ + + result = backtranslation( + prompt_template, + table_definitions, + nl_query, + eval_template, + priming_prefix="SELECT", + temperature=temperature, + n=n, + engine=engine, + ) + print(result) + + +if __name__ == "__main__": + Smokey(main) diff --git a/examples/finetuning/answers-with-ft.py b/examples/finetuning/answers-with-ft.py new file mode 100644 index 0000000000..672f0e4b40 --- /dev/null +++ b/examples/finetuning/answers-with-ft.py @@ -0,0 +1,142 @@ +import openai +import argparse + + +def create_context( + question, search_file_id, max_len=1800, search_model="ada", max_rerank=10 +): + """ + Create a context for a question by finding the most similar context from the search file. + :param question: The question + :param search_file_id: The file id of the search file + :param max_len: The maximum length of the returned context (in tokens) + :param search_model: The search model to use + :param max_rerank: The maximum number of reranking + :return: The context + """ + results = openai.Engine(search_model).search( + search_model=search_model, + query=question, + max_rerank=max_rerank, + file=search_file_id, + return_metadata=True, + ) + returns = [] + cur_len = 0 + for result in results["data"]: + cur_len += int(result["metadata"]) + 4 + if cur_len > max_len: + break + returns.append(result["text"]) + return "\n\n###\n\n".join(returns) + + +def answer_question( + search_file_id="", + fine_tuned_qa_model="", + question="Which country won the European Football championship in 2021?", + max_len=1800, + search_model="ada", + max_rerank=10, + debug=False, + stop_sequence=["\n", "."], + max_tokens=100, +): + """ + Answer a question based on the most similar context from the search file, using your fine-tuned model. + :param question: The question + :param fine_tuned_qa_model: The fine tuned QA model + :param search_file_id: The file id of the search file + :param max_len: The maximum length of the returned context (in tokens) + :param search_model: The search model to use + :param max_rerank: The maximum number of reranking + :param debug: Whether to output debug information + :param stop_sequence: The stop sequence for Q&A model + :param max_tokens: The maximum number of tokens to return + :return: The answer + """ + context = create_context( + question, + search_file_id, + max_len=max_len, + search_model=search_model, + max_rerank=max_rerank, + ) + if debug: + print("Context:\n" + context) + print("\n\n") + try: + response = openai.Completion.create( + model=fine_tuned_qa_model, + prompt=f"Answer the question based on the context below\n\nText: {context}\n\n---\n\nQuestion: {question}\nAnswer:", + temperature=0, + max_tokens=max_tokens, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + stop=stop_sequence, + ) + return response["choices"][0]["text"] + except Exception as e: + print(e) + return "" + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Rudimentary functionality of the answers endpoint with a fine-tuned Q&A model.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--search_file_id", help="Search file id", required=True, type=str + ) + parser.add_argument( + "--fine_tuned_qa_model", help="Fine-tuned QA model id", required=True, type=str + ) + parser.add_argument( + "--question", help="Question to answer", required=True, type=str + ) + parser.add_argument( + "--max_len", + help="Maximum length of the returned context (in tokens)", + default=1800, + type=int, + ) + parser.add_argument( + "--search_model", help="Search model to use", default="ada", type=str + ) + parser.add_argument( + "--max_rerank", + help="Maximum number of reranking for the search", + default=10, + type=int, + ) + parser.add_argument( + "--debug", help="Print debug information (context used)", action="store_true" + ) + parser.add_argument( + "--stop_sequence", + help="Stop sequences for the Q&A model", + default=["\n", "."], + nargs="+", + type=str, + ) + parser.add_argument( + "--max_tokens", + help="Maximum number of tokens to return", + default=100, + type=int, + ) + args = parser.parse_args() + response = answer_question( + search_file_id=args.search_file_id, + fine_tuned_qa_model=args.fine_tuned_qa_model, + question=args.question, + max_len=args.max_len, + search_model=args.search_model, + max_rerank=args.max_rerank, + debug=args.debug, + stop_sequence=args.stop_sequence, + max_tokens=args.max_tokens, + ) + print(f"Answer:{response}") diff --git a/examples/finetuning/finetuning-classification.ipynb b/examples/finetuning/finetuning-classification.ipynb index f6b886a494..ff576ba35a 100644 --- a/examples/finetuning/finetuning-classification.ipynb +++ b/examples/finetuning/finetuning-classification.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 1, "source": [ "from sklearn.datasets import fetch_20newsgroups\n", "import pandas as pd\n", @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "source": [ "print(sports_dataset['data'][0])" ], @@ -75,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "source": [ "sports_dataset.target_names[sports_dataset['target'][0]]\n" ], @@ -88,14 +88,14 @@ ] }, "metadata": {}, - "execution_count": 5 + "execution_count": 3 } ], "metadata": {} }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "source": [ "len_all, len_baseball, len_hockey = len(sports_dataset.data), len([e for e in sports_dataset.target if e == 0]), len([e for e in sports_dataset.target if e == 1])\n", "print(f\"Total examples: {len_all}, Baseball examples: {len_baseball}, Hockey examples: {len_hockey}\")" @@ -128,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "source": [ "import pandas as pd\n", "\n", @@ -204,7 +204,7 @@ ] }, "metadata": {}, - "execution_count": 10 + "execution_count": 5 } ], "metadata": {} @@ -218,9 +218,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "source": [ - "df.to_json(\"sport1.jsonl\", orient='records', lines=True)" + "df.to_json(\"sport2.jsonl\", orient='records', lines=True)" ], "outputs": [], "metadata": {} @@ -235,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "source": [ "!pip install --upgrade openai" ], @@ -244,9 +244,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "source": [ - "!openai tools fine_tunes.prepare_data -f sport1.jsonl -q" + "!openai tools fine_tunes.prepare_data -f sport2.jsonl -q" ], "outputs": [ { @@ -259,21 +259,28 @@ "- Based on your data it seems like you're trying to fine-tune a model for classification\n", "- For classification, we recommend you try one of the faster and cheaper models, such as `ada`. You should also set the `--no_packing` parameter when fine-tuning\n", "- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training\n", + "- There are 11 examples that are very long. These are rows: [134, 200, 281, 320, 404, 595, 704, 838, 1113, 1139, 1174]\n", + "For conditional generation, and for classification the examples shouldn't be longer than 2048 tokens.\n", "- Your data does not contain a common separator at the end of your prompts. Having a separator string appended to the end of the prompt makes it clearer to the fine-tuned model where the completion should begin. See https://beta.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples. If you intend to do open-ended generation, then you should leave the prompts empty\n", "- The completion should start with a whitespace character (` `). This tends to produce better results due to the tokenization we use. See https://beta.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details\n", "\n", "Based on the analysis we will perform the following actions:\n", - "- [Recommended] Add a suffix separator `\\n\\n###\\n\\n` to all prompts [Y/n]: Y- [Recommended] Add a whitespace character to the beginning of the completion [Y/n]: Y- [Recommended] Would you like to split into training and validation set? [Y/n]: Y\n", + "- [Recommended] Remove 11 long examples [Y/n]: Y\n", + "- [Recommended] Add a suffix separator `\\n\\n###\\n\\n` to all prompts [Y/n]: Y\n", + "- [Recommended] Add a whitespace character to the beginning of the completion [Y/n]: Y\n", + "- [Recommended] Would you like to split into training and validation set? [Y/n]: Y\n", + "\n", "\n", "Your data will be written to a new JSONL file. Proceed [Y/n]: Y\n", - "Wrote modified files to `sport1_prepared_train.jsonl` and `sport1_prepared_valid.jsonl`\n", + "\n", + "Wrote modified files to `sport2_prepared_train.jsonl` and `sport2_prepared_valid.jsonl`\n", "Feel free to take a look!\n", "\n", "Now use that file when fine-tuning:\n", - "> openai api fine_tunes.create -t \"sport1_prepared_train.jsonl\" -v \"sport1_prepared_valid.jsonl\" --no_packing\n", + "> openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --no_packing --compute_classification_metrics --classification_positive_class \" baseball\"\n", "\n", "After you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `\\n\\n###\\n\\n` for the model to start generating completions, rather than continuing with the prompt.\n", - "Once your model starts training, it'll approximately take 31.06 minutes. Queue will approximately take half an hour per job ahead of you.\n" + "Once your model starts training, it'll approximately take 30.8 minutes to train a `curie` model, and less for `ada` and `babbage`. Queue will approximately take half an hour per job ahead of you.\n" ] } ], @@ -294,44 +301,46 @@ "cell_type": "markdown", "source": [ "## Fine-tuning\n", - "The tool suggests we run the following command to train the dataset. We specifically add `-m ada` to fine-tune a cheaper and faster ada model, which is usually comperable in performance to slower and more expensive models on classification use cases. Since this is a classification task, we would like to know what the generalization performance on the provided validation set is for our classification use case. We add `--compute_classification_metrics --classification_positive_class \" hockey\"` in order to compute the classification metrics." + "The tool suggests we run the following command to train the dataset. Since this is a classification task, we would like to know what the generalization performance on the provided validation set is for our classification use case. The tool suggests to add `--compute_classification_metrics --classification_positive_class \" baseball\"` in order to compute the classification metrics. Classification performs better with a hyperparameter `--no_packing`.\n", + "\n", + "We can simply copy the suggested command from the CLI tool. We specifically add `-m ada` to fine-tune a cheaper and faster ada model, which is usually comperable in performance to slower and more expensive models on classification use cases. " ], "metadata": {} }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "source": [ - "!openai api fine_tunes.create -t \"sport1_prepared_train.jsonl\" -v \"sport1_prepared_valid.jsonl\" --no_packing -m ada --compute_classification_metrics --classification_positive_class \" hockey\"" + "!openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --no_packing --compute_classification_metrics --classification_positive_class \" baseball\" -m ada" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Upload progress: 100%|████████████████████| 1.76M/1.76M [00:00<00:00, 1.85Mit/s]\n", - "Uploaded file from sport1_prepared_train.jsonl: file-6TJY51ApcI0YzumClqdpyhjk\n", - "Upload progress: 100%|███████████████████████| 395k/395k [00:00<00:00, 754kit/s]\n", - "Uploaded file from sport1_prepared_valid.jsonl: file-7jmZYAJHneAuzVGlauejsas9\n", - "Created fine-tune: ft-T4UkKqMbMM1Eu56q8ks6g8u5\n", + "Upload progress: 100%|████████████████████| 1.52M/1.52M [00:00<00:00, 1.81Mit/s]\n", + "Uploaded file from sport2_prepared_train.jsonl: file-Dxx2xJqyjcwlhfDHpZdmCXlF\n", + "Upload progress: 100%|███████████████████████| 388k/388k [00:00<00:00, 507kit/s]\n", + "Uploaded file from sport2_prepared_valid.jsonl: file-Mvb8YAeLnGdneSAFcfiVcgcN\n", + "Created fine-tune: ft-2zaA7qi0rxJduWQpdvOvmGn3\n", "Streaming events until fine-tuning is complete...\n", "\n", "(Ctrl-C will interrupt the stream, but not cancel the fine-tune)\n", - "[2021-07-26 12:13:52] Created fine-tune: ft-T4UkKqMbMM1Eu56q8ks6g8u5\n", - "[2021-07-26 12:13:57] Fine-tune enqueued. Queue number: 0\n", - "[2021-07-26 12:14:00] Fine-tune started\n", - "[2021-07-26 12:16:56] Completed epoch 1/4\n", - "[2021-07-26 12:18:37] Completed epoch 2/4\n", - "[2021-07-26 12:20:29] Completed epoch 3/4\n", - "[2021-07-26 12:22:31] Completed epoch 4/4\n", - "[2021-07-26 12:24:02] Uploaded model: ada:ft-openai-internal-2021-07-26-11-24-00\n", - "[2021-07-26 12:24:06] Uploaded result file: file-ForZ3pSAQ6db7bxmMJhw6GEo\n", - "[2021-07-26 12:24:07] Fine-tune succeeded\n", + "[2021-07-30 13:15:50] Created fine-tune: ft-2zaA7qi0rxJduWQpdvOvmGn3\n", + "[2021-07-30 13:15:52] Fine-tune enqueued. Queue number: 0\n", + "[2021-07-30 13:15:56] Fine-tune started\n", + "[2021-07-30 13:18:55] Completed epoch 1/4\n", + "[2021-07-30 13:20:47] Completed epoch 2/4\n", + "[2021-07-30 13:22:40] Completed epoch 3/4\n", + "[2021-07-30 13:24:31] Completed epoch 4/4\n", + "[2021-07-30 13:26:22] Uploaded model: ada:ft-openai-2021-07-30-12-26-20\n", + "[2021-07-30 13:26:27] Uploaded result file: file-6Ki9RqLQwkChGsr9CHcr1ncg\n", + "[2021-07-30 13:26:28] Fine-tune succeeded\n", "\n", "Job complete! Status: succeeded 🎉\n", "Try out your fine-tuned model:\n", "\n", - "openai api completions.create -m ada:ft-openai-internal-2021-07-26-11-24-00 -p \n" + "openai api completions.create -m ada:ft-openai-2021-07-30-12-26-20 -p \n" ] } ], @@ -340,7 +349,7 @@ { "cell_type": "markdown", "source": [ - "The model is successfully trained in about ten minutes. We can see the model name is `ada:ft-openai-internal-2021-07-26-11-24-00`, which we can use for doing inference." + "The model is successfully trained in about ten minutes. We can see the model name is `ada:ft-openai-2021-07-30-12-26-20`, which we can use for doing inference." ], "metadata": {} }, @@ -354,16 +363,16 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "source": [ - "!openai api fine_tunes.results -i ft-T4UkKqMbMM1Eu56q8ks6g8u5 > result.csv" + "!openai api fine_tunes.results -i ft-2zaA7qi0rxJduWQpdvOvmGn3 > result.csv" ], "outputs": [], "metadata": {} }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 11, "source": [ "results = pd.read_csv('result.csv')\n", "results[results['classification/accuracy'].notnull()].tail(1)" @@ -374,19 +383,19 @@ "data": { "text/plain": [ " step elapsed_tokens elapsed_examples training_loss \\\n", - "926 927 3108476 3708 0.022579 \n", + "929 930 3027688 3720 0.044408 \n", "\n", " training_sequence_accuracy training_token_accuracy \\\n", - "926 1.0 1.0 \n", + "929 1.0 1.0 \n", "\n", " classification/accuracy classification/precision classification/recall \\\n", - "926 0.995833 1.0 0.991667 \n", + "929 0.991597 0.983471 1.0 \n", "\n", " classification/auroc classification/auprc classification/f1.0 \\\n", - "926 0.99875 0.998909 0.995816 \n", + "929 1.0 1.0 0.991667 \n", "\n", " validation_loss validation_sequence_accuracy validation_token_accuracy \n", - "926 NaN NaN NaN " + "929 NaN NaN NaN " ], "text/html": [ "
\n", @@ -426,19 +435,19 @@ " \n", " \n", " \n", - " 926\n", - " 927\n", - " 3108476\n", - " 3708\n", - " 0.022579\n", + " 929\n", + " 930\n", + " 3027688\n", + " 3720\n", + " 0.044408\n", + " 1.0\n", + " 1.0\n", + " 0.991597\n", + " 0.983471\n", " 1.0\n", " 1.0\n", - " 0.995833\n", " 1.0\n", " 0.991667\n", - " 0.99875\n", - " 0.998909\n", - " 0.995816\n", " NaN\n", " NaN\n", " NaN\n", @@ -449,7 +458,7 @@ ] }, "metadata": {}, - "execution_count": 17 + "execution_count": 11 } ], "metadata": {} @@ -463,7 +472,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 12, "source": [ "results[results['classification/accuracy'].notnull()]['classification/accuracy'].plot()" ], @@ -476,7 +485,7 @@ ] }, "metadata": {}, - "execution_count": 18 + "execution_count": 12 }, { "output_type": "display_data", @@ -484,7 +493,7 @@ "text/plain": [ "
" ], - "image/png": "" + "image/png": "" }, "metadata": { "needs_background": "light" @@ -503,9 +512,9 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 13, "source": [ - "test = pd.read_json('sport1_prepared_valid.jsonl', lines=True)\n", + "test = pd.read_json('sport2_prepared_valid.jsonl', lines=True)\n", "test.head()" ], "outputs": [ @@ -575,16 +584,23 @@ ] }, "metadata": {}, - "execution_count": 19 + "execution_count": 13 } ], "metadata": {} }, + { + "cell_type": "markdown", + "source": [ + "We need to use the same separator following the prompt which we used during fine-tuning. In this case it is `\\n\\n###\\n\\n`. Since we're concerned with classification, we want the temperature to be as low as possible, and we only require one token completion to determine the prediction of the model." + ], + "metadata": {} + }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 14, "source": [ - "ft_model = 'ada:ft-openai-internal-2021-07-26-11-24-00'\n", + "ft_model = 'ada:ft-openai-2021-07-30-12-26-20'\n", "res = openai.Completion.create(model=ft_model, prompt=test['prompt'][0] + '\\n\\n###\\n\\n', max_tokens=1, temperature=0)\n", "res['choices'][0]['text']\n" ], @@ -597,7 +613,7 @@ ] }, "metadata": {}, - "execution_count": 30 + "execution_count": 14 } ], "metadata": {} @@ -611,7 +627,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 15, "source": [ "res = openai.Completion.create(model=ft_model, prompt=test['prompt'][0] + '\\n\\n###\\n\\n', max_tokens=1, temperature=0, logprobs=2)\n", "res['choices'][0]['logprobs']['top_logprobs'][0]" @@ -621,14 +637,14 @@ "output_type": "execute_result", "data": { "text/plain": [ - " JSON: {\n", - " \" baseball\": -6.3311357,\n", - " \" hockey\": -0.0018503045\n", + " JSON: {\n", + " \" baseball\": -7.6311407,\n", + " \" hockey\": -0.0006307676\n", "}" ] }, "metadata": {}, - "execution_count": 29 + "execution_count": 15 } ], "metadata": {} @@ -650,7 +666,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 16, "source": [ "sample_hockey_tweet = \"\"\"Thank you to the \n", "@Canes\n", @@ -669,14 +685,14 @@ ] }, "metadata": {}, - "execution_count": 28 + "execution_count": 16 } ], "metadata": {} }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 17, "source": [ "sample_baseball_tweet=\"\"\"BREAKING: The Tampa Bay Rays are finalizing a deal to acquire slugger Nelson Cruz from the Minnesota Twins, sources tell ESPN.\"\"\"\n", "res = openai.Completion.create(model=ft_model, prompt=sample_baseball_tweet + '\\n\\n###\\n\\n', max_tokens=1, temperature=0, logprobs=2)\n", @@ -691,17 +707,10 @@ ] }, "metadata": {}, - "execution_count": 31 + "execution_count": 17 } ], "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [], - "outputs": [], - "metadata": {} } ], "metadata": { diff --git a/openai/cli.py b/openai/cli.py index 2b57ab0bd2..6d67e2b0ef 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -201,6 +201,7 @@ def create(cls, args): resp = openai.File.create( file=open(args.file), purpose=args.purpose, + model=args.model, ) print(resp) @@ -669,6 +670,11 @@ def help(args): help="Why are you uploading this file? (see https://beta.openai.com/docs/api-reference/ for purposes)", required=True, ) + sub.add_argument( + "-m", + "--model", + help="Model for search indexing (e.g. 'ada'). Only meaningful if --purpose is 'search'.", + ) sub.set_defaults(func=File.create) sub = subparsers.add_parser("files.get") diff --git a/openai/http_client.py b/openai/http_client.py index a25bbd0053..bef8c48dd6 100644 --- a/openai/http_client.py +++ b/openai/http_client.py @@ -5,9 +5,9 @@ import threading import time from typing import Any, Dict +from urllib.parse import urlparse import requests -from urllib.parse import urlparse import openai from openai import error, util @@ -265,7 +265,12 @@ def _handle_request_error(self, e): err = "%s: %s" % (type(e).__name__, str(e)) # Retry only timeout and connect errors; similar to urllib3 Retry elif isinstance( - e, (requests.exceptions.Timeout, requests.exceptions.ConnectionError) + e, + ( + requests.exceptions.Timeout, + requests.exceptions.ConnectionError, + requests.exceptions.ChunkedEncodingError, + ), ): msg = ( "Unexpected error communicating with OpenAI. " diff --git a/openai/validators.py b/openai/validators.py index 2a900cefd5..181aacb7dd 100644 --- a/openai/validators.py +++ b/openai/validators.py @@ -1,6 +1,7 @@ import os import sys import pandas as pd +import numpy as np from typing import NamedTuple, Optional, Callable, Any @@ -567,7 +568,7 @@ def apply_necessary_remediation(df, remediation): def accept_suggestion(input_text, auto_accept): sys.stdout.write(input_text) if auto_accept: - sys.stdout.write("Y") + sys.stdout.write("Y\n") return True return input().lower() != "n" @@ -638,6 +639,26 @@ def get_classification_hyperparams(df): return n_classes, pos_class +def get_batch_size_suggestion(df, no_packing): + """ + Suggest the batch size based on the number of examples after packing optionally is applied. + """ + n_examples, n_characters = ( + len(df), + df.completion.str.len().sum() + df.prompt.str.len().sum(), + ) + BATCH_SIZE_TO_N_EXAMPLES_RATIO = 0.002 + BATCH_SIZE_TO_N_CHARACTERS_RATIO = BATCH_SIZE_TO_N_EXAMPLES_RATIO / 10_000 + + if no_packing: + batch_size = BATCH_SIZE_TO_N_EXAMPLES_RATIO * n_examples + else: + batch_size = BATCH_SIZE_TO_N_CHARACTERS_RATIO * n_characters + batch_size = 2 ** int(np.log2(batch_size)) + batch_size_suggestion = f" --batch_size {batch_size}" + return batch_size_suggestion + + def write_out_file(df, fname, any_remediations, auto_accept): """ This function will write out a dataframe to a file, if the user would like to proceed, and also offer a fine-tuning command with the newly created file. @@ -653,11 +674,14 @@ def write_out_file(df, fname, any_remediations, auto_accept): if accept_suggestion(input_text, auto_accept): split = True - classification_params = "" - if ft_format == "classification" or ( + no_packing = ft_format == "classification" or ( ft_format == "conditional generation" and len(df) < 1000 - ): - classification_params = " --no_packing" + ) + additional_params = "" + if no_packing: + additional_params = " --no_packing" + additional_params += get_batch_size_suggestion(df, no_packing) + common_prompt_suffix_new_line_handled = common_prompt_suffix.replace("\n", "\\n") common_completion_suffix_new_line_handled = common_completion_suffix.replace( "\n", "\\n" @@ -672,7 +696,7 @@ def write_out_file(df, fname, any_remediations, auto_accept): if not any_remediations: sys.stdout.write( - f'\nYou can use your file for fine-tuning:\n> openai api fine_tunes.create -t "{fname}"{classification_params}\n\nAfter you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt.{optional_ending_string}\n' + f'\nYou can use your file for fine-tuning:\n> openai api fine_tunes.create -t "{fname}"{additional_params}\n\nAfter you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt.{optional_ending_string}\n' ) estimate_fine_tuning_time(df) @@ -692,13 +716,11 @@ def write_out_file(df, fname, any_remediations, auto_accept): ) n_classes, pos_class = get_classification_hyperparams(df) - classification_params += " --compute_classification_metrics" + additional_params += " --compute_classification_metrics" if n_classes == 2: - classification_params += ( - f' --classification_positive_class "{pos_class}"' - ) + additional_params += f' --classification_positive_class "{pos_class}"' else: - classification_params += f" --classification_n_classes {n_classes}" + additional_params += f" --classification_n_classes {n_classes}" else: assert len(fnames) == 1 df[["prompt", "completion"]].to_json( @@ -714,7 +736,7 @@ def write_out_file(df, fname, any_remediations, auto_accept): else f"After you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt." ) sys.stdout.write( - f'\nWrote modified file{files_string}`\nFeel free to take a look!\n\nNow use that file when fine-tuning:\n> openai api fine_tunes.create -t "{fnames[0]}"{valid_string}{classification_params}\n\n{separator_reminder}{optional_ending_string}\n' + f'\nWrote modified file{files_string}`\nFeel free to take a look!\n\nNow use that file when fine-tuning:\n> openai api fine_tunes.create -t "{fnames[0]}"{valid_string}{additional_params}\n\n{separator_reminder}{optional_ending_string}\n' ) estimate_fine_tuning_time(df) else: diff --git a/openai/version.py b/openai/version.py index 96dd69e4f3..d3e0bd4aa1 100644 --- a/openai/version.py +++ b/openai/version.py @@ -1 +1 @@ -VERSION = "0.10.2" +VERSION = "0.10.3"