Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit eabf01f

Browse files
ted-at-openai/update-embedding-examples (openai#67)
* updates embeddings examples * updates README with new example notebook using embeddings for recommendation
1 parent c93af95 commit eabf01f

12 files changed

+33588
-26
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ Examples of how to use embeddings are shared in the following Jupyter notebooks:
100100
- [Semantic text search using embeddings](https://github.com/openai/openai-python/blob/main/examples/embeddings/Semantic_text_search_using_embeddings.ipynb)
101101
- [User and product embeddings](https://github.com/openai/openai-python/blob/main/examples/embeddings/User_and_product_embeddings.ipynb)
102102
- [Zero-shot classification using embeddings](https://github.com/openai/openai-python/blob/main/examples/embeddings/Zero-shot_classification.ipynb)
103+
- [Recommendation using embeddings](https://github.com/openai/openai-python/blob/main/examples/embeddings/Recommendation.ipynb)
103104

104105
For more information on embeddings and the types of embeddings OpenAI offers, read the [embeddings guide](https://beta.openai.com/docs/guides/embeddings) in the OpenAI documentation.
105106

examples/embeddings/Clustering.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
"\n",
3232
"\n",
3333
"df = pd.read_csv('output/embedded_1k_reviews.csv')\n",
34-
"df['babbage_similarity'] = df.babbage_similarity.apply(eval).apply(np.array)\n",
34+
"df['text-similarity-babbage-001'] = df.babbage_similarity.apply(eval).apply(np.array)\n",
3535
"matrix = np.vstack(df.babbage_similarity.values)\n",
3636
"matrix.shape"
3737
]
@@ -253,7 +253,7 @@
253253
"name": "python",
254254
"nbconvert_exporter": "python",
255255
"pygments_lexer": "ipython3",
256-
"version": "3.7.3"
256+
"version": "3.9.9"
257257
},
258258
"orig_nbformat": 4
259259
},

examples/embeddings/Code_search.ipynb

+3-3
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
"cell_type": "markdown",
8282
"metadata": {},
8383
"source": [
84-
"For code search models we use babbage-code-search-code to obtain embeddings for code snippets, and code-search-text to embed natural language queries."
84+
"For code search models we use code-search-{model}-code to obtain embeddings for code snippets, and code-search-{model}-text to embed natural language queries."
8585
]
8686
},
8787
{
@@ -188,7 +188,7 @@
188188
"from openai.embeddings_utils import get_embedding\n",
189189
"\n",
190190
"df = pd.DataFrame(all_funcs)\n",
191-
"df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='babbage-code-search-code'))\n",
191+
"df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='code-search-babbage-code-001'))\n",
192192
"df['filepath'] = df['filepath'].apply(lambda x: x.replace(code_root, \"\"))\n",
193193
"df.to_csv(\"output/code_search_openai-python.csv\", index=False)\n",
194194
"df.head()"
@@ -234,7 +234,7 @@
234234
"from openai.embeddings_utils import cosine_similarity\n",
235235
"\n",
236236
"def search_functions(df, code_query, n=3, pprint=True, n_lines=7):\n",
237-
" embedding = get_embedding(code_query, engine='babbage-code-search-text')\n",
237+
" embedding = get_embedding(code_query, engine='code-search-babbage-text-001')\n",
238238
" df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))\n",
239239
"\n",
240240
" res = df.sort_values('similarities', ascending=False).head(n)\n",

examples/embeddings/Get_embeddings.ipynb

+2-3
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,12 @@
5050
"from tenacity import retry, wait_random_exponential, stop_after_attempt\n",
5151
"\n",
5252
"@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))\n",
53-
"def get_embedding(text, engine=\"text-similarity-davinci-001\"):\n",
54-
"\n",
53+
"def get_embedding(text: str, engine=\"text-similarity-davinci-001\") -> List[float]:\n",
5554
"\n",
5655
" # replace newlines, which can negatively affect performance.\n",
5756
" text = text.replace(\"\\n\", \" \")\n",
5857
"\n",
59-
" return openai.Embedding.create(input=[text], engine=engine)['data'][0]['embedding']\n",
58+
" return openai.Embedding.create(input=[text], engine=engine)[\"data\"][0][\"embedding\"]\n",
6059
"\n",
6160
"embedding = get_embedding(\"Sample query text goes here\", engine=\"text-search-ada-query-001\")\n",
6261
"print(len(embedding))"

examples/embeddings/Obtain_dataset.ipynb

+3-3
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@
159159
"from openai.embeddings_utils import get_embedding\n",
160160
"\n",
161161
"# This will take just under 10 minutes\n",
162-
"df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, engine='babbage-similarity'))\n",
163-
"df['babbage_search'] = df.combined.apply(lambda x: get_embedding(x, engine='babbage-search-document'))\n",
162+
"df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, engine='text-similarity-babbage-001'))\n",
163+
"df['babbage_search'] = df.combined.apply(lambda x: get_embedding(x, engine='text-search-babbage-doc-001'))\n",
164164
"df.to_csv('output/embedded_1k_reviews.csv')"
165165
]
166166
}
@@ -183,7 +183,7 @@
183183
"name": "python",
184184
"nbconvert_exporter": "python",
185185
"pygments_lexer": "ipython3",
186-
"version": "3.7.3"
186+
"version": "3.9.9"
187187
},
188188
"orig_nbformat": 4
189189
},

0 commit comments

Comments
 (0)