202401172050

This commit is contained in:
2024-01-17 20:50:28 +01:00
parent efaa188404
commit 8f78f6c656
12 changed files with 1575 additions and 1248 deletions

11
embedding_script.py Normal file
View File

@@ -0,0 +1,11 @@
from embedding2 import EmbeddingModel
# model_name = 'intfloat/multilingual-e5-large'
model_name = 'dangvantuan/sentence-camembert-large'
chromadb_path = './chromadb'
html_folder_path = '../scrapcera/htmls/'
txt_folder_path = '../scrapcera/docs/'
collection_name = 'cera'
embedding_model = EmbeddingModel(model_name, chromadb_path, collection_name, mulitlingual_e5=False)
embedding_model.embed_folder(html_folder_path, txt_folder_path)