12 lines
438 B
Python
12 lines
438 B
Python
|
from embedding2 import EmbeddingModel
|
||
|
|
||
|
# model_name = 'intfloat/multilingual-e5-large'
|
||
|
model_name = 'dangvantuan/sentence-camembert-large'
|
||
|
chromadb_path = './chromadb'
|
||
|
html_folder_path = '../scrapcera/htmls/'
|
||
|
txt_folder_path = '../scrapcera/docs/'
|
||
|
collection_name = 'cera'
|
||
|
|
||
|
embedding_model = EmbeddingModel(model_name, chromadb_path, collection_name, mulitlingual_e5=False)
|
||
|
embedding_model.embed_folder(html_folder_path, txt_folder_path)
|