rag/embedding_script.py

12 lines
438 B
Python

from embedding2 import EmbeddingModel
# model_name = 'intfloat/multilingual-e5-large'
model_name = 'dangvantuan/sentence-camembert-large'
chromadb_path = './chromadb'
html_folder_path = '../scrapcera/htmls/'
txt_folder_path = '../scrapcera/docs/'
collection_name = 'cera'
embedding_model = EmbeddingModel(model_name, chromadb_path, collection_name, mulitlingual_e5=False)
embedding_model.embed_folder(html_folder_path, txt_folder_path)