import os import json import time import numpy as np from lightrag import LightRAG from lightrag.utils import EmbeddingFunc from lightrag.llm import openai_complete_if_cache, openai_embedding ## For Upstage API # please check if embedding_dim=4096 in lightrag.py and llm.py in lightrag direcotry async def llm_model_func( prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: return await openai_complete_if_cache( "solar-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("UPSTAGE_API_KEY"), base_url="https://api.upstage.ai/v1/solar", **kwargs, ) async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embedding( texts, model="solar-embedding-1-large-query", api_key=os.getenv("UPSTAGE_API_KEY"), base_url="https://api.upstage.ai/v1/solar", ) ## /For Upstage API def insert_text(rag, file_path): with open(file_path, mode="r") as f: unique_contexts = json.load(f) retries = 0 max_retries = 3 while retries < max_retries: try: rag.insert(unique_contexts) break except Exception as e: retries += 1 print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}") time.sleep(10) if retries == max_retries: print("Insertion failed after exceeding the maximum number of retries") cls = "mix" WORKING_DIR = f"../{cls}" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=4096, max_token_size=8192, func=embedding_func ), ) insert_text(rag, f"../datasets/unique_contexts/{cls}_unique_contexts.json")