updates embedding examples with new embedding model

2022-12-13 17:28:39 -06:00
parent 7de3d50816
commit fd181ec78f
12 changed files with 12387 additions and 12390 deletions
--- a/examples/Regression_using_embeddings.ipynb
+++ b/examples/Regression_using_embeddings.ipynb
@@ -20,7 +20,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Babbage similarity embedding performance on 1k Amazon reviews: mse=0.39, mae=0.38\n"
+      "Ada similarity embedding performance on 1k Amazon reviews: mse=0.60, mae=0.51\n"
     ]
    }
   ],
@@ -32,11 +32,13 @@
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "\n",
-    "datafile_path = \"https://cdn.openai.com/API/examples/data/fine_food_reviews_with_embeddings_1k.csv\"  # for your convenience, we precomputed the embeddings\n",
-    "df = pd.read_csv(datafile_path)\n",
-    "df[\"babbage_similarity\"] = df.babbage_similarity.apply(eval).apply(np.array)\n",
+    "# If you have not run the \"Obtain_dataset.ipynb\" notebook, you can download the datafile from here: https://cdn.openai.com/API/examples/data/fine_food_reviews_with_embeddings_1k.csv\n",
+    "datafile_path = \"./data/fine_food_reviews_with_embeddings_1k.csv\"\n",
    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(list(df.babbage_similarity.values), df.Score, test_size=0.2, random_state=42)\n",
+    "df = pd.read_csv(datafile_path)\n",
+    "df[\"ada_similarity\"] = df.ada_similarity.apply(eval).apply(np.array)\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(list(df.ada_similarity.values), df.Score, test_size=0.2, random_state=42)\n",
    "\n",
    "rfr = RandomForestRegressor(n_estimators=100)\n",
    "rfr.fit(X_train, y_train)\n",
@@ -45,7 +47,7 @@
    "mse = mean_squared_error(y_test, preds)\n",
    "mae = mean_absolute_error(y_test, preds)\n",
    "\n",
-    "print(f\"Babbage similarity embedding performance on 1k Amazon reviews: mse={mse:.2f}, mae={mae:.2f}\")\n"
+    "print(f\"Ada similarity embedding performance on 1k Amazon reviews: mse={mse:.2f}, mae={mae:.2f}\")\n"
   ]
  },
  {
@@ -57,7 +59,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Dummy mean prediction performance on Amazon reviews: mse=1.81, mae=1.08\n"
+      "Dummy mean prediction performance on Amazon reviews: mse=1.73, mae=1.03\n"
     ]
    }
   ],
@@ -70,10 +72,11 @@
   ]
  },
  {
+   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "We can see that the embeddings are able to predict the scores with an average error of 0.39 per score prediction. This is roughly equivalent to predicting 2 out of 3 reviews perfectly, and 1 out of three reviews by a one star error."
+    "We can see that the embeddings are able to predict the scores with an average error of 0.60 per score prediction. This is roughly equivalent to predicting 1 out of 3 reviews perfectly, and 1 out of two reviews by a one star error."
   ]
  },
  {
@@ -86,9 +89,9 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3.9.9 ('openai')",
+   "display_name": "openai-cookbook",
   "language": "python",
-   "name": "python3"
+   "name": "openai-cookbook"
  },
  "language_info": {
   "codemirror_mode": {
@@ -100,7 +103,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.9"
+   "version": "3.9.6"
  },
  "orig_nbformat": 4,
  "vscode": {