generated from The-Pocket/PocketFlow-Template-Python
-
Notifications
You must be signed in to change notification settings - Fork 618
/
Copy pathcall_llm.py
125 lines (109 loc) · 3.94 KB
/
call_llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from google import genai
import os
import logging
import json
from datetime import datetime
# Configure logging
log_directory = os.getenv("LOG_DIR", "logs")
os.makedirs(log_directory, exist_ok=True)
log_file = os.path.join(log_directory, f"llm_calls_{datetime.now().strftime('%Y%m%d')}.log")
# Set up logger
logger = logging.getLogger("llm_logger")
logger.setLevel(logging.INFO)
logger.propagate = False # Prevent propagation to root logger
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(file_handler)
# Simple cache configuration
cache_file = "llm_cache.json"
# By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
def call_llm(prompt: str, use_cache: bool = True) -> str:
# Log the prompt
logger.info(f"PROMPT: {prompt}")
# Check cache if enabled
if use_cache:
# Load cache from disk
cache = {}
if os.path.exists(cache_file):
try:
with open(cache_file, 'r') as f:
cache = json.load(f)
except:
logger.warning(f"Failed to load cache, starting with empty cache")
# Return from cache if exists
if prompt in cache:
logger.info(f"RESPONSE: {cache[prompt]}")
return cache[prompt]
# Call the LLM if not in cache or cache disabled
client = genai.Client(
vertexai=True,
# TODO: change to your own project id and location
project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
location=os.getenv("GEMINI_LOCATION", "us-central1")
)
# You can comment the previous line and use the AI Studio key instead:
# client = genai.Client(
# api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
# )
model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
response = client.models.generate_content(
model=model,
contents=[prompt]
)
response_text = response.text
# Log the response
logger.info(f"RESPONSE: {response_text}")
# Update cache if enabled
if use_cache:
# Load cache again to avoid overwrites
cache = {}
if os.path.exists(cache_file):
try:
with open(cache_file, 'r') as f:
cache = json.load(f)
except:
pass
# Add to cache and save
cache[prompt] = response_text
try:
with open(cache_file, 'w') as f:
json.dump(cache, f)
except Exception as e:
logger.error(f"Failed to save cache: {e}")
return response_text
# # Use Anthropic Claude 3.7 Sonnet Extended Thinking
# def call_llm(prompt, use_cache: bool = True):
# from anthropic import Anthropic
# client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY", "your-api-key"))
# response = client.messages.create(
# model="claude-3-7-sonnet-20250219",
# max_tokens=21000,
# thinking={
# "type": "enabled",
# "budget_tokens": 20000
# },
# messages=[
# {"role": "user", "content": prompt}
# ]
# )
# return response.content[1].text
# # Use OpenAI o1
# def call_llm(prompt, use_cache: bool = True):
# from openai import OpenAI
# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key"))
# r = client.chat.completions.create(
# model="o1",
# messages=[{"role": "user", "content": prompt}],
# response_format={
# "type": "text"
# },
# reasoning_effort="medium",
# store=False
# )
# return r.choices[0].message.content
if __name__ == "__main__":
test_prompt = "Hello, how are you?"
# First call - should hit the API
print("Making call...")
response1 = call_llm(test_prompt, use_cache=False)
print(f"Response: {response1}")