{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "98de82f6-2dc9-4d27-a5d8-d07ae04b496c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/peportier/miniforge3/envs/RAG_ENV/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "/Users/peportier/miniforge3/envs/RAG_ENV/lib/python3.9/site-packages/transformers/utils/generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", " _torch_pytree._register_pytree_node(\n" ] } ], "source": [ "from embedding import EmbeddingModel" ] }, { "cell_type": "code", "execution_count": 2, "id": "37408a48-ce90-4176-bc9f-b71ebc22a178", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-01-03 11:13:53,279 - INFO - Load pretrained SentenceTransformer: intfloat/multilingual-e5-large\n", "/Users/peportier/miniforge3/envs/RAG_ENV/lib/python3.9/site-packages/transformers/utils/generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", " _torch_pytree._register_pytree_node(\n", "2024-01-03 11:13:56,891 - INFO - Use pytorch device: cpu\n", "2024-01-03 11:13:56,894 - INFO - Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n", "2024-01-03 11:13:56,990 - INFO - 4a06529f5f.txt : Start\n", "Batches: 0%| | 0/1 [00:00 512). Running this sequence through the model will result in indexing errors\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:00<00:00, 1.93it/s]\n", "2024-01-03 11:13:58,189 - INFO - 4aac6081e0.txt : Done\n", "2024-01-03 11:13:58,189 - INFO - 4a5736d002.txt : Start\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:00<00:00, 5.89it/s]\n", "2024-01-03 11:13:58,365 - INFO - 4a5736d002.txt : Done\n", "2024-01-03 11:13:58,366 - INFO - 3d159cbe89.txt : Start\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:00<00:00, 1.63it/s]\n", "2024-01-03 11:13:58,988 - INFO - 3d159cbe89.txt : Done\n", "2024-01-03 11:13:58,989 - INFO - 3f3e46760c.txt : Start\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:00<00:00, 6.07it/s]\n", "2024-01-03 11:13:59,159 - INFO - 3f3e46760c.txt : Done\n", "2024-01-03 11:13:59,160 - INFO - 3ced86d1db.txt : Start\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:00<00:00, 2.12it/s]\n", "2024-01-03 11:13:59,640 - INFO - 3ced86d1db.txt : Done\n", "2024-01-03 11:13:59,641 - INFO - 3bbe30b18a.txt : Start\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:01<00:00, 1.46s/it]\n", "2024-01-03 11:14:01,116 - INFO - 3bbe30b18a.txt : Done\n", "2024-01-03 11:14:01,116 - INFO - 3dbfdeb28e.txt : Start\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:01<00:00, 1.17s/it]\n", "2024-01-03 11:14:02,299 - INFO - 3dbfdeb28e.txt : Done\n", "2024-01-03 11:14:02,299 - INFO - 4adf02d48f.txt : Start\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:00<00:00, 1.71it/s]\n", "2024-01-03 11:14:02,895 - INFO - 4adf02d48f.txt : Done\n", "2024-01-03 11:14:02,896 - INFO - 3c25273538.txt : Start\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:02<00:00, 2.02s/it]\n", "2024-01-03 11:14:04,940 - INFO - 3c25273538.txt : Done\n", "2024-01-03 11:14:04,940 - INFO - 4aeb967bdb.txt : Start\n", "Batches: 100%|████████████████████████████████████| 1/1 [00:00<00:00, 2.00it/s]\n", "2024-01-03 11:14:05,449 - INFO - 4aeb967bdb.txt : Done\n" ] } ], "source": [ "model_name = 'intfloat/multilingual-e5-large'\n", "chromadb_path = './chromadbtest'\n", "folder_path = './docs/test'\n", "collection_name = 'cera'\n", "\n", "embedding_model = EmbeddingModel(model_name, chromadb_path, collection_name)\n", "embedding_model.embed_folder(folder_path)" ] }, { "cell_type": "code", "execution_count": null, "id": "2acd9c49-5676-4e72-9eff-f6fb8ffa94fe", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "RAG_ENV", "language": "python", "name": "rag_env" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 5 }