202401172050
This commit is contained in:
11
embedding_script.py
Normal file
11
embedding_script.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from embedding2 import EmbeddingModel
|
||||
|
||||
# model_name = 'intfloat/multilingual-e5-large'
|
||||
model_name = 'dangvantuan/sentence-camembert-large'
|
||||
chromadb_path = './chromadb'
|
||||
html_folder_path = '../scrapcera/htmls/'
|
||||
txt_folder_path = '../scrapcera/docs/'
|
||||
collection_name = 'cera'
|
||||
|
||||
embedding_model = EmbeddingModel(model_name, chromadb_path, collection_name, mulitlingual_e5=False)
|
||||
embedding_model.embed_folder(html_folder_path, txt_folder_path)
|
||||
Reference in New Issue
Block a user