Initial commit of vector database example with new embeddings

2023-01-05 01:54:46 -08:00
5 changed files with 1219 additions and 3 deletions
--- a/examples/Code_search.ipynb
+++ b/examples/Code_search.ipynb
@ -84,6 +84,13 @@
    "print(\"Total number of functions extracted:\", len(all_funcs))"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For code search models we use code-search-{model}-code to obtain embeddings for code snippets, and code-search-{model}-text to embed natural language queries."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
--- a/examples/How_to_handle_rate_limits.ipynb
+++ b/examples/How_to_handle_rate_limits.ipynb
@ -95,9 +95,11 @@
    "\n",
    "### Requesting a rate limit increase\n",
    "\n",
-    "If you'd like your organization's rate limit increased, please fill out the following form:\n",
+    "If you'd like your organization's rate limit increased, please feel free to reach out to <support@openai.com> with the following information:\n",
    "\n",
-        "- [OpenAI Rate Limit Increase Request form](https://forms.gle/56ZrwXXoxAN1yt6i9)\n"
+    "- The model(s) you need increased limits on\n",
+    "- The estimated rate of requests\n",
+    "- The reason for the increase"
   ]
  },
  {
--- a/examples/Obtain_dataset.ipynb
+++ b/examples/Obtain_dataset.ipynb
@ -162,7 +162,7 @@
    "\n",
    "# This will take just between 5 and 10 minutes\n",
    "df['ada_similarity'] = df.combined.apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))\n",
-    "df['ada_search'] = df['ada_similarity']\n",
+    "df['ada_search'] = df.combined.apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))\n",
    "df.to_csv('data/fine_food_reviews_with_embeddings_1k.csv')"
   ]
  }
--- a/examples/vector_databases/Vector_db_introduction.ipynb
+++ b/examples/vector_databases/Vector_db_introduction.ipynb
--- a/examples/vector_databases/weaviate/docker-compose.yaml
+++ b/examples/vector_databases/weaviate/docker-compose.yaml
@ -0,0 +1,20 @@
+version: '3.4'
+services:
+  weaviate:
+    image: semitechnologies/weaviate:1.14.0
+    restart: on-failure:0
+    ports:
+     - "8080:8080"
+    environment:
+      QUERY_DEFAULTS_LIMIT: 20
+      AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
+      PERSISTENCE_DATA_PATH: "./data"
+      DEFAULT_VECTORIZER_MODULE: text2vec-transformers
+      ENABLE_MODULES: text2vec-transformers
+      TRANSFORMERS_INFERENCE_API: http://t2v-transformers:8080
+      CLUSTER_HOSTNAME: 'node1'
+  t2v-transformers:
+    image: semitechnologies/transformers-inference:sentence-transformers-msmarco-distilroberta-base-v2
+    environment:
+      ENABLE_CUDA: 0 # set to 1 to enable
+      # NVIDIA_VISIBLE_DEVICES: all # enable if running with CUDA