rag/embedding_script.py

12 lines
438 B
Python
Raw Normal View History

2024-01-17 14:50:28 -05:00
from embedding2 import EmbeddingModel
# model_name = 'intfloat/multilingual-e5-large'
model_name = 'dangvantuan/sentence-camembert-large'
chromadb_path = './chromadb'
html_folder_path = '../scrapcera/htmls/'
txt_folder_path = '../scrapcera/docs/'
collection_name = 'cera'
embedding_model = EmbeddingModel(model_name, chromadb_path, collection_name, mulitlingual_e5=False)
embedding_model.embed_folder(html_folder_path, txt_folder_path)