In [43]:
from llama_index import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
    set_global_tokenizer,
    load_index_from_storage,
)
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.query_engine import CitationQueryEngine
from llama_index.prompts import PromptTemplate

from IPython.display import Markdown, display

from transformers import AutoTokenizer

import os

import chromadb

In [2]:
def messages_to_prompt(messages):
  prompt = ""
  for message in messages:
    if message.role == 'system':
      prompt += f"<|system|>\n{message.content}</s>\n"
    elif message.role == 'user':
      prompt += f"<|user|>\n{message.content}</s>\n"
    elif message.role == 'assistant':
      prompt += f"<|assistant|>\n{message.content}</s>\n"

  # ensure we start with a system prompt, insert blank if needed
  if not prompt.startswith("<|system|>\n"):
    prompt = "<|system|>\n</s>\n" + prompt

  # add final assistant prompt
  prompt = prompt + "<|assistant|>\n"

  return prompt

In [3]:
def completion_to_prompt(completion, system_prompt=None):
    prompt = ""
    system_prompt_str = system_prompt or """\
You carefully provide accurate, factual, thoughtful, nuanced answers, and are brilliant at reasoning. \
If you think there might not be a correct answer, you say so. \
Don't be verbose in your answers, but do provide details and examples where it might help the explanation. \
"""
    prompt = f"<|system|>\n {system_prompt_str.strip()} </s>\n"
    prompt += f"<|user|>\n {completion} </s>\n"
    prompt += f"<|assistant|>\n"
    return prompt

In [4]:
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_url=None,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    # model_path='/Users/peportier/llm/a/a/mistral-7b-openorca.Q4_K_M.gguf',
    model_path='/Users/peportier/llm/a/a/zephyr-7b-beta.Q5_K_M.gguf',
    temperature=0.1,
    max_new_tokens=1024,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    # https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__call__
    generate_kwargs={
        "temperature": 0.1,
        "mirostat_mode": 2,
    },
    # kwargs to pass to __init__()
    # https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__init__
    # set to at least 1 to use GPU
    model_kwargs={
        "n_gpu_layers": 1,
    },
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=False,
)

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /Users/peportier/llm/a/a/zephyr-7b-beta.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 14336,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q5_K     [  409

In [5]:
model_name = "HuggingFaceH4/zephyr-7b-beta"
set_global_tokenizer(
    AutoTokenizer.from_pretrained(model_name).encode
)

In [6]:
# embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
# embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/distiluse-base-multilingual-cased-v1")

  _torch_pytree._register_pytree_node(


In [7]:
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    chunk_size=512,
)

In [8]:
if not os.path.exists("./index"):
    documents = SimpleDirectoryReader("./docs/env").load_data()
    index = VectorStoreIndex.from_documents(
        documents, service_context=service_context
    )
    index.storage_context.persist(persist_dir="./index")
else:
    index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./index"),
        service_context=service_context,
    )

In [None]:
# db = chromadb.PersistentClient(path="./chroma_db")
# chroma_collection = db.get_or_create_collection("env")
# vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
# storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
# # Create chroma index
# index = VectorStoreIndex.from_documents(
#     documents, storage_context=storage_context,
#     service_context=service_context
# )

In [54]:
text_qa_template_str_fr = (
  "<|system|>: Vous êtes un assistant IA qui répond à la question posée à la fin en utilisant le contexte suivant. Toutes les informations factuelles que vous utilisez pour répondre proviennent exclusivement du contexte. Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas, n'essayez pas d'inventer une réponse. Veuillez répondre exclusivement en français. </s>\n"
  "<|user|>: {context_str}\n"
  "Question: {query_str} </s>\n"
  "<|assistant|>:"
)
text_qa_template_str_en = (
  "<|system|>: You are an AI assistant who answers the question posed at the end using the following context. All the factual information you use to answer comes exclusively from the context, do not use prior knowledge. If you don't know the answer, just say you don't know, don't try to invent an answer. </s>\n"
  "<|user|>: {context_str}\n"
  "Question: {query_str} </s>\n"
  "<|assistant|>:"
)
text_qa_template = PromptTemplate(text_qa_template_str_en)

refine_template_str_en = (
  "<|user|>: The original query is as follows: {query_str}\n"
  "We have provided an existing answer: {existing_answer}\n"
  "We have the opportunity to refine the existing answer (only if needed) with some more context below.\n"
  "------------\n"
  "{context_msg}\n"
  "------------\n"
  "Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer. </s>\n"
  "<|assistant|>:"
)
refine_template = PromptTemplate(refine_template_str_en)

In [63]:
query_engine = index.as_query_engine(
    text_qa_template=text_qa_template,
    refine_template=refine_template,
    #response_mode="compact",
    response_mode="refine",
    similarity_top_k=5,
    streaming=True)

In [57]:
def display_prompt_dict(prompts_dict):
    for k, p in prompts_dict.items():
        text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
        display(Markdown(text_md))
        print(p.get_template())
        display(Markdown("<br><br>"))

In [58]:
display_prompt_dict(query_engine.get_prompts())

**Prompt Key**: response_synthesizer:text_qa_template<br>**Text:** <br>

<|system|>: You are an AI assistant who answers the question posed at the end using the following context. All the factual information you use to answer comes exclusively from the context, do not use prior knowledge. If you don't know the answer, just say you don't know, don't try to invent an answer. </s>
<|user|>: {context_str}
Question: {query_str} </s>
<|assistant|>:


<br><br>

**Prompt Key**: response_synthesizer:refine_template<br>**Text:** <br>

<|user|>: The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer. </s>
<|assistant|>:


<br><br>

In [67]:
response = query_engine.query('''What is a green premium?''')
response.print_response_stream()

The green premium is a concept used to compare the additional cost of choosing a zero-carbon alternative over its fossil fuel counterpart. It helps in deciding which clean alternatives should be deployed now and where innovation is needed because the clean alternatives are not yet cheap enough. The calculation of green premiums involves various assumptions, and different well-informed people may arrive at different numbers.

In the context provided, we can see that while electric vehicles (EVs) have become more affordable in recent years due to a significant decrease in battery costs and government incentives, they still come with a modest Green Premium compared to their gas-powered counterparts. For example, the Chevrolet Bolt EV is $14,000 more expensive than the gas-powered Malibu before any tax credits, but when accounting for factors such as maintenance costs and fuel expenses, the Bolt will cost 10 cents more per mile driven.

It's essential to understand whether a given green te

In [70]:
%%time
response = query_engine.query('''Qu'est-ce que le "green premium". Réponds en français.''')
response.print_response_stream()

Le "green premium" désigne la différence de prix entre les technologies écologiques et leurs contreparties traditionnelles. Il s'agit de l'augmentation supplémentaire que les consommateurs doivent payer pour choisir des options écologiques plutôt que des solutions conventionnelles. Dans le cadre de cet document, l'auteur calcule ces différences de prix pour diverses technologies vertes et évalue si elles sont abordables suffisamment pour une adoption mondiale généralisée. L'auteur souligne l'importance d'avoir des primes basses pour permettre à tous les pays de respecter les normes environnementales sans coûts prohibitifs.

L'exemple du carburant jet d'avion illustre le calcul direct des primes vertes, mais lorsque nous appliquons ce calcul de manière plus générale, nous rencontrons un problème : nous n'avons pas actuellement une équivalent vert direct dans tous les cas. Il n'y a pas de ciment à faible émission de carbone (au moins non encore). Comment obtenir un sens du coût d'une sol

In [71]:
for node in response.source_nodes:
    print(node.metadata["page_label"] + "\n" + node.metadata["file_name"])

65
How_to_Avoid_a_Climate_Disaster_for_9712958_compressed.pdf
64
How_to_Avoid_a_Climate_Disaster_for_9712958_compressed.pdf
66
How_to_Avoid_a_Climate_Disaster_for_9712958_compressed.pdf
67
How_to_Avoid_a_Climate_Disaster_for_9712958_compressed.pdf
150
How_to_Avoid_a_Climate_Disaster_for_9712958_compressed.pdf


In [61]:
response = query_engine.query('''What is a carbon tax?''')
response.print_response_stream()

A carbon tax is a policy tool that places a price on greenhouse gas emissions, specifically carbon dioxide (CO2). It aims to reduce greenhouse gas emissions by making high-carbon alternatives like traditional steel production and coal-based plastics more expensive. By doing so, governments can incentivize the adoption of low-carbon alternatives such as electric arc furnace (EAF) steel and zero-carbon plastics made from renewable sources or captured CO2. This policy tool sends a market signal to individuals and businesses to reduce greenhouse gas emissions and contributes to mitigating climate change while promoting the development of clean energy technology and breakthroughs that generate and store lots of zero-carbon electricity inexpensively. A carbon tax can also help ensure that the transition to a low-carbon economy is both effective and equitable as we pursue electrification as another way to reduce emissions. By making carbon-free things cheaper and carbon-emitting things more e

In [64]:
response = query_engine.query('''What are critical biospheric boundaries?''')
response.print_response_stream()

Critical biospheric boundaries refer to limits beyond which human activities could cause irreversible and potentially catastrophic environmental changes that threaten the stability and functioning of Earth's ecosystems. These boundaries include climate change, ocean acidification, loss of biodiversity, land use changes, freshwater consumption, introduction of novel entities (such as chemicals and genetically modified organisms), atmospheric aerosol loading, stratospheric ozone depletion, and global nitrogen and phosphorus cycles. It's crucial to maintain these boundaries within safe limits because they represent the most significant, persistent, existential concerns related to environmental change.

However, it's essential to remain skeptical and doubtful while continuously reassessing our understanding of environmental issues as some critical problems, such as acid rain and stratospheric ozone depletion, were not yet recognized 40 years ago. This list would have been different back th

In [None]:
query_engine_cit = CitationQueryEngine.from_args(
    index,
    similarity_top_k=3,
    citation_chunk_size=512,
    streaming=True,
)

In [None]:
response = query_engine_cit.query('''What is a green premium?''')
response.print_response_stream()

In [None]:
len(response.source_nodes)

In [None]:
print(response.source_nodes[0].node.get_text())

In [None]:
print(response.source_nodes[0].node.metadata)

In [None]:
response.response_txt

In [None]:
response = query_engine_cit.query('''How are fossil fuels related to food production?''')
response.print_response_stream()

In [None]:
print(response.source_nodes[1].node.metadata)
print(response.source_nodes[1].node.get_text())