|
81 | 81 | "cell_type": "markdown",
|
82 | 82 | "metadata": {},
|
83 | 83 | "source": [
|
84 |
| - "For code search models we use babbage-code-search-code to obtain embeddings for code snippets, and code-search-text to embed natural language queries." |
| 84 | + "For code search models we use code-search-{model}-code to obtain embeddings for code snippets, and code-search-{model}-text to embed natural language queries." |
85 | 85 | ]
|
86 | 86 | },
|
87 | 87 | {
|
|
188 | 188 | "from openai.embeddings_utils import get_embedding\n",
|
189 | 189 | "\n",
|
190 | 190 | "df = pd.DataFrame(all_funcs)\n",
|
191 |
| - "df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='babbage-code-search-code'))\n", |
| 191 | + "df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='code-search-babbage-code-001'))\n", |
192 | 192 | "df['filepath'] = df['filepath'].apply(lambda x: x.replace(code_root, \"\"))\n",
|
193 | 193 | "df.to_csv(\"output/code_search_openai-python.csv\", index=False)\n",
|
194 | 194 | "df.head()"
|
|
234 | 234 | "from openai.embeddings_utils import cosine_similarity\n",
|
235 | 235 | "\n",
|
236 | 236 | "def search_functions(df, code_query, n=3, pprint=True, n_lines=7):\n",
|
237 |
| - " embedding = get_embedding(code_query, engine='babbage-code-search-text')\n", |
| 237 | + " embedding = get_embedding(code_query, engine='code-search-babbage-text-001')\n", |
238 | 238 | " df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))\n",
|
239 | 239 | "\n",
|
240 | 240 | " res = df.sort_values('similarities', ascending=False).head(n)\n",
|
|
0 commit comments