lightrag-comments/examples/batch_eval.py

import re
import json
import jsonlines

from openai import OpenAI


def batch_eval(query_file, result1_file, result2_file, output_file_path):
    client = OpenAI()

    with open(query_file, "r") as f:
        data = f.read()

    queries = re.findall(r"- Question \d+: (.+)", data)

    with open(result1_file, "r") as f:
        answers1 = json.load(f)
    answers1 = [i["result"] for i in answers1]

    with open(result2_file, "r") as f:
        answers2 = json.load(f)
    answers2 = [i["result"] for i in answers2]

    requests = []
    for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)):
        sys_prompt = """
        ---Role---
        You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
        """

        prompt = f"""
        You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.

        - **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
        - **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
        - **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?

        For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.

        Here is the question:
        {query}

        Here are the two answers:

        **Answer 1:**
        {answer1}

        **Answer 2:**
        {answer2}

        Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.

        Output your evaluation in the following JSON format:

        {{
            "Comprehensiveness": {{
                "Winner": "[Answer 1 or Answer 2]",
                "Explanation": "[Provide explanation here]"
            }},
            "Empowerment": {{
                "Winner": "[Answer 1 or Answer 2]",
                "Explanation": "[Provide explanation here]"
            }},
            "Overall Winner": {{
                "Winner": "[Answer 1 or Answer 2]",
                "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
            }}
        }}
        """

        request_data = {
            "custom_id": f"request-{i+1}",
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": "gpt-4o-mini",
                "messages": [
                    {"role": "system", "content": sys_prompt},
                    {"role": "user", "content": prompt},
                ],
            },
        }

        requests.append(request_data)

    with jsonlines.open(output_file_path, mode="w") as writer:
        for request in requests:
            writer.write(request)

    print(f"Batch API requests written to {output_file_path}")

    batch_input_file = client.files.create(
        file=open(output_file_path, "rb"), purpose="batch"
    )
    batch_input_file_id = batch_input_file.id

    batch = client.batches.create(
        input_file_id=batch_input_file_id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"description": "nightly eval job"},
    )

    print(f"Batch {batch.id} has been created.")


if __name__ == "__main__":
    batch_eval()
github官方项目 2024/11/12 commit： 2a13bf98331fa51babc3ac76b649068157981b57 2024-11-16 11:26:57 +08:00			`import re`
			`import json`
			`import jsonlines`

			`from openai import OpenAI`


			`def batch_eval(query_file, result1_file, result2_file, output_file_path):`
			`client = OpenAI()`

			`with open(query_file, "r") as f:`
			`data = f.read()`

			`queries = re.findall(r"- Question \d+: (.+)", data)`

			`with open(result1_file, "r") as f:`
			`answers1 = json.load(f)`
			`answers1 = [i["result"] for i in answers1]`

			`with open(result2_file, "r") as f:`
			`answers2 = json.load(f)`
			`answers2 = [i["result"] for i in answers2]`

			`requests = []`
			`for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)):`
			`sys_prompt = """`
			`---Role---`
			`You are an expert tasked with evaluating two answers to the same question based on three criteria: Comprehensiveness, Diversity, and Empowerment.`
			`"""`

			`prompt = f"""`
			`You will evaluate two answers to the same question based on three criteria: Comprehensiveness, Diversity, and Empowerment.`

			`- Comprehensiveness: How much detail does the answer provide to cover all aspects and details of the question?`
			`- Diversity: How varied and rich is the answer in providing different perspectives and insights on the question?`
			`- Empowerment: How well does the answer help the reader understand and make informed judgments about the topic?`

			`For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.`

			`Here is the question:`
			`{query}`

			`Here are the two answers:`

			`Answer 1:`
			`{answer1}`

			`Answer 2:`
			`{answer2}`

			`Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.`

			`Output your evaluation in the following JSON format:`

			`{{`
			`"Comprehensiveness": {{`
			`"Winner": "[Answer 1 or Answer 2]",`
			`"Explanation": "[Provide explanation here]"`
			`}},`
			`"Empowerment": {{`
			`"Winner": "[Answer 1 or Answer 2]",`
			`"Explanation": "[Provide explanation here]"`
			`}},`
			`"Overall Winner": {{`
			`"Winner": "[Answer 1 or Answer 2]",`
			`"Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"`
			`}}`
			`}}`
			`"""`

			`request_data = {`
			`"custom_id": f"request-{i+1}",`
			`"method": "POST",`
			`"url": "/v1/chat/completions",`
			`"body": {`
			`"model": "gpt-4o-mini",`
			`"messages": [`
			`{"role": "system", "content": sys_prompt},`
			`{"role": "user", "content": prompt},`
			`],`
			`},`
			`}`

			`requests.append(request_data)`

			`with jsonlines.open(output_file_path, mode="w") as writer:`
			`for request in requests:`
			`writer.write(request)`

			`print(f"Batch API requests written to {output_file_path}")`

			`batch_input_file = client.files.create(`
			`file=open(output_file_path, "rb"), purpose="batch"`
			`)`
			`batch_input_file_id = batch_input_file.id`

			`batch = client.batches.create(`
			`input_file_id=batch_input_file_id,`
			`endpoint="/v1/chat/completions",`
			`completion_window="24h",`
			`metadata={"description": "nightly eval job"},`
			`)`

			`print(f"Batch {batch.id} has been created.")`


			`if __name__ == "__main__":`
			`batch_eval()`