updates embedding examples with new embedding model
This commit is contained in:
committed by
Ted Sanders
parent
7de3d50816
commit
fd181ec78f
@ -11,6 +11,14 @@
|
||||
"We will combine the review summary and review text into a single combined text. The model will encode this combined text and it will output a single vector embedding."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To run this notebook, you will need to install: pandas, openai, transformers, plotly, matplotlib, scikit-learn, torch (transformer dep), torchvision, and scipy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@ -131,7 +139,7 @@
|
||||
"\n",
|
||||
"# remove reviews that are too long\n",
|
||||
"df['n_tokens'] = df.combined.apply(lambda x: len(tokenizer.encode(x)))\n",
|
||||
"df = df[df.n_tokens<2000].tail(1_000)\n",
|
||||
"df = df[df.n_tokens<8000].tail(1_000)\n",
|
||||
"len(df)"
|
||||
]
|
||||
},
|
||||
@ -148,20 +156,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import openai\n",
|
||||
"from openai.embeddings_utils import get_embedding\n",
|
||||
"# Ensure you have your API key set in your environment per the README: https://github.com/openai/openai-python#usage\n",
|
||||
"\n",
|
||||
"# This will take just under 10 minutes\n",
|
||||
"df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, engine='text-similarity-babbage-001'))\n",
|
||||
"df['babbage_search'] = df.combined.apply(lambda x: get_embedding(x, engine='text-search-babbage-doc-001'))\n",
|
||||
"# This will take just between 5 and 10 minutes\n",
|
||||
"df['ada_similarity'] = df.combined.apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))\n",
|
||||
"df['ada_search'] = df.combined.apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))\n",
|
||||
"df.to_csv('data/fine_food_reviews_with_embeddings_1k.csv')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.9 ('openai')",
|
||||
"display_name": "openai-cookbook",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "openai-cookbook"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@ -173,12 +183,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.9"
|
||||
"version": "3.9.6"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
|
||||
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
Reference in New Issue
Block a user