From 4f07bf7bec7e7d3d26ce17469aea417804fe5bf9 Mon Sep 17 00:00:00 2001 From: Markov Date: Sun, 22 Feb 2026 23:25:03 +0100 Subject: [PATCH] Add OpenAI, Gemini, xAI providers (all via httpx, no SDKs) --- config.example.json | 29 ++++- picogent/agent.py | 21 +++- picogent/config.py | 4 +- picogent/providers/__init__.py | 8 +- picogent/providers/gemini.py | 192 +++++++++++++++++++++++++++++++++ picogent/providers/openai.py | 187 ++++++++++++++++++++++++++++++++ 6 files changed, 433 insertions(+), 8 deletions(-) create mode 100644 picogent/providers/gemini.py create mode 100644 picogent/providers/openai.py diff --git a/config.example.json b/config.example.json index 8a07e2f..f4e74c0 100644 --- a/config.example.json +++ b/config.example.json @@ -1,9 +1,36 @@ { + "_comment": "Provider: anthropic | openai | gemini | xai (or any OpenAI-compatible)", + "provider": "anthropic", "model": "claude-sonnet-4-20250514", "api_key": "env:ANTHROPIC_API_KEY", + + "_examples": { + "openai": { + "provider": "openai", + "model": "gpt-4o", + "api_key": "env:OPENAI_API_KEY" + }, + "gemini": { + "provider": "gemini", + "model": "gemini-2.5-flash", + "api_key": "env:GEMINI_API_KEY" + }, + "xai": { + "provider": "xai", + "model": "grok-3", + "api_key": "env:XAI_API_KEY" + }, + "custom_openai_compatible": { + "provider": "openai", + "model": "my-model", + "api_key": "env:MY_API_KEY", + "base_url": "https://my-provider.com/v1" + } + }, + "max_tokens": 8192, "max_iterations": 20, "workspace": ".", "system_prompt": "You are a helpful coding assistant." -} \ No newline at end of file +} diff --git a/picogent/agent.py b/picogent/agent.py index 2ab842c..542e50d 100644 --- a/picogent/agent.py +++ b/picogent/agent.py @@ -10,6 +10,8 @@ from .config import Config from .session import Session from .context import ContextBuilder from .providers.anthropic import AnthropicProvider +from .providers.openai import OpenAIProvider +from .providers.gemini import GeminiProvider from .tools.registry import ToolRegistry from .tools.read import ReadTool from .tools.write import WriteTool @@ -26,10 +28,27 @@ class Agent: self.context_builder = ContextBuilder(config.workspace) # Initialize provider + base_url = getattr(config, 'base_url', None) if config.provider == "anthropic": self.provider = AnthropicProvider(config.api_key, config.model) + elif config.provider == "openai": + self.provider = OpenAIProvider( + config.api_key, config.model, + base_url=base_url or "https://api.openai.com/v1" + ) + elif config.provider == "xai": + self.provider = OpenAIProvider( + config.api_key, config.model, + base_url=base_url or "https://api.x.ai/v1" + ) + elif config.provider == "gemini": + self.provider = GeminiProvider(config.api_key, config.model) else: - raise ValueError(f"Unknown provider: {config.provider}") + # Assume OpenAI-compatible for unknown providers + self.provider = OpenAIProvider( + config.api_key, config.model, + base_url=base_url or "https://api.openai.com/v1" + ) # Initialize tools self.tool_registry = ToolRegistry() diff --git a/picogent/config.py b/picogent/config.py index 82455b8..1cc09d8 100644 --- a/picogent/config.py +++ b/picogent/config.py @@ -18,6 +18,7 @@ class Config: max_iterations: int = 20 workspace: str = "." system_prompt: str = "You are a helpful coding assistant." + base_url: Optional[str] = None @classmethod def from_file(cls, config_path: str) -> "Config": @@ -43,7 +44,8 @@ class Config: max_tokens=data.get('max_tokens', 8192), max_iterations=data.get('max_iterations', 20), workspace=data.get('workspace', '.'), - system_prompt=data.get('system_prompt', 'You are a helpful coding assistant.') + system_prompt=data.get('system_prompt', 'You are a helpful coding assistant.'), + base_url=data.get('base_url', None) ) def to_dict(self) -> Dict[str, Any]: diff --git a/picogent/providers/__init__.py b/picogent/providers/__init__.py index 6804f81..b98b31b 100644 --- a/picogent/providers/__init__.py +++ b/picogent/providers/__init__.py @@ -1,8 +1,6 @@ -""" -PicoGent Providers Package -""" - from .base import BaseProvider from .anthropic import AnthropicProvider +from .openai import OpenAIProvider +from .gemini import GeminiProvider -__all__ = ["BaseProvider", "AnthropicProvider"] \ No newline at end of file +__all__ = ["BaseProvider", "AnthropicProvider", "OpenAIProvider", "GeminiProvider"] diff --git a/picogent/providers/gemini.py b/picogent/providers/gemini.py new file mode 100644 index 0000000..cf9d0e8 --- /dev/null +++ b/picogent/providers/gemini.py @@ -0,0 +1,192 @@ +""" +Google Gemini provider using httpx (REST API, no SDK) +""" + +import httpx +import json +import uuid +from typing import Dict, List, Any, Optional +from .base import BaseProvider + + +class GeminiProvider(BaseProvider): + """Google Gemini provider using generateContent REST API""" + + def __init__( + self, + api_key: str, + model: str = "gemini-2.5-flash", + ): + super().__init__(api_key, model) + self.base_url = "https://generativelanguage.googleapis.com/v1beta" + + def _convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Convert Anthropic-style tool defs to Gemini function declarations""" + declarations = [] + for tool in tools: + schema = tool.get("input_schema", {}) + # Gemini doesn't support 'additionalProperties' in some cases + clean_schema = {k: v for k, v in schema.items() if k != "additionalProperties"} + declarations.append({ + "name": tool["name"], + "description": tool.get("description", ""), + "parameters": clean_schema, + }) + return [{"functionDeclarations": declarations}] + + def _convert_messages( + self, messages: List[Dict[str, Any]], system_prompt: str + ) -> tuple[str, List[Dict[str, Any]]]: + """Convert Anthropic-style messages to Gemini contents format. + Returns (system_instruction, contents). + """ + contents: List[Dict[str, Any]] = [] + + for msg in messages: + role = msg.get("role") + + if role == "user": + content = msg.get("content") + if isinstance(content, str): + contents.append({ + "role": "user", + "parts": [{"text": content}], + }) + elif isinstance(content, list): + # Check for tool_result blocks + tool_results = [b for b in content if b.get("type") == "tool_result"] + if tool_results: + parts = [] + for tr in tool_results: + parts.append({ + "functionResponse": { + "name": tr.get("tool_name", "unknown"), + "response": {"result": tr.get("content", "")}, + } + }) + contents.append({"role": "user", "parts": parts}) + else: + text = " ".join( + b.get("text", "") for b in content if b.get("type") == "text" + ) + if text: + contents.append({ + "role": "user", + "parts": [{"text": text}], + }) + + elif role == "assistant": + content = msg.get("content") + if isinstance(content, str): + contents.append({ + "role": "model", + "parts": [{"text": content}], + }) + elif isinstance(content, list): + parts = [] + for block in content: + if block.get("type") == "text": + text = block.get("text", "") + if text: + parts.append({"text": text}) + elif block.get("type") == "tool_use": + parts.append({ + "functionCall": { + "name": block.get("name", ""), + "args": block.get("input", {}), + } + }) + if parts: + contents.append({"role": "model", "parts": parts}) + + return system_prompt, contents + + async def generate_response( + self, + messages: List[Dict[str, Any]], + system_prompt: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_tokens: int = 8192, + ) -> Dict[str, Any]: + """Generate response using Gemini generateContent API""" + + system_instruction, contents = self._convert_messages(messages, system_prompt) + + payload: Dict[str, Any] = { + "contents": contents, + "generationConfig": { + "maxOutputTokens": max_tokens, + }, + } + + if system_instruction: + payload["systemInstruction"] = { + "parts": [{"text": system_instruction}] + } + + if tools: + payload["tools"] = self._convert_tools(tools) + + url = f"{self.base_url}/models/{self.model}:generateContent?key={self.api_key}" + + async with httpx.AsyncClient() as client: + response = await client.post( + url, + json=payload, + timeout=120.0, + ) + + if response.status_code != 200: + raise Exception( + f"Gemini API error {response.status_code}: {response.text}" + ) + + result = response.json() + + # Parse candidates + candidates = result.get("candidates", []) + if not candidates: + return {"content": "", "usage": {}, "model": self.model} + + parts = candidates[0].get("content", {}).get("parts", []) + + text_parts = [] + tool_calls = [] + raw_content = [] + + for part in parts: + if "text" in part: + text_parts.append(part["text"]) + raw_content.append({"type": "text", "text": part["text"]}) + elif "functionCall" in part: + fc = part["functionCall"] + call_id = f"call_{uuid.uuid4().hex[:12]}" + tool_calls.append({ + "id": call_id, + "name": fc.get("name", ""), + "input": fc.get("args", {}), + }) + raw_content.append({ + "type": "tool_use", + "id": call_id, + "name": fc.get("name", ""), + "input": fc.get("args", {}), + }) + + # Usage info + usage_meta = result.get("usageMetadata", {}) + + response_data: Dict[str, Any] = { + "content": "\n".join(text_parts) if text_parts else "", + "usage": { + "input_tokens": usage_meta.get("promptTokenCount", 0), + "output_tokens": usage_meta.get("candidatesTokenCount", 0), + }, + "model": self.model, + } + + if tool_calls: + response_data["tool_calls"] = tool_calls + response_data["raw_content"] = raw_content + + return response_data diff --git a/picogent/providers/openai.py b/picogent/providers/openai.py new file mode 100644 index 0000000..58ceafd --- /dev/null +++ b/picogent/providers/openai.py @@ -0,0 +1,187 @@ +""" +OpenAI-compatible provider (ChatGPT, xAI/Grok, etc.) using httpx +""" + +import httpx +from typing import Dict, List, Any, Optional +from .base import BaseProvider + + +class OpenAIProvider(BaseProvider): + """OpenAI-compatible provider using direct API calls. + Works with: OpenAI, xAI (Grok), Azure OpenAI, any OpenAI-compatible API. + """ + + def __init__( + self, + api_key: str, + model: str = "gpt-4o", + base_url: str = "https://api.openai.com/v1", + ): + super().__init__(api_key, model) + self.base_url = base_url.rstrip("/") + + def _convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Convert Anthropic-style tool defs to OpenAI function calling format""" + openai_tools = [] + for tool in tools: + openai_tools.append({ + "type": "function", + "function": { + "name": tool["name"], + "description": tool.get("description", ""), + "parameters": tool.get("input_schema", {}), + }, + }) + return openai_tools + + def _convert_messages( + self, messages: List[Dict[str, Any]], system_prompt: str + ) -> List[Dict[str, Any]]: + """Convert Anthropic-style messages to OpenAI format""" + oai_messages: List[Dict[str, Any]] = [] + + # System prompt as first message + if system_prompt: + oai_messages.append({"role": "system", "content": system_prompt}) + + for msg in messages: + role = msg.get("role") + + # --- user message --- + if role == "user": + content = msg.get("content") + if isinstance(content, str): + oai_messages.append({"role": "user", "content": content}) + elif isinstance(content, list): + # Could contain tool_result blocks (Anthropic format) + tool_results = [b for b in content if b.get("type") == "tool_result"] + if tool_results: + for tr in tool_results: + oai_messages.append({ + "role": "tool", + "tool_call_id": tr.get("tool_use_id", ""), + "content": tr.get("content", ""), + }) + else: + # Plain text blocks + text = " ".join( + b.get("text", "") for b in content if b.get("type") == "text" + ) + if text: + oai_messages.append({"role": "user", "content": text}) + + # --- assistant message --- + elif role == "assistant": + content = msg.get("content") + if isinstance(content, str): + oai_messages.append({"role": "assistant", "content": content}) + elif isinstance(content, list): + text_parts = [] + tool_calls = [] + for block in content: + if block.get("type") == "text": + text_parts.append(block.get("text", "")) + elif block.get("type") == "tool_use": + import json + tool_calls.append({ + "id": block.get("id", ""), + "type": "function", + "function": { + "name": block.get("name", ""), + "arguments": json.dumps(block.get("input", {})), + }, + }) + assistant_msg: Dict[str, Any] = { + "role": "assistant", + "content": "\n".join(text_parts) if text_parts else None, + } + if tool_calls: + assistant_msg["tool_calls"] = tool_calls + oai_messages.append(assistant_msg) + + return oai_messages + + async def generate_response( + self, + messages: List[Dict[str, Any]], + system_prompt: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_tokens: int = 8192, + ) -> Dict[str, Any]: + """Generate response using OpenAI Chat Completions API""" + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.api_key}", + } + + oai_messages = self._convert_messages(messages, system_prompt) + + payload: Dict[str, Any] = { + "model": self.model, + "max_tokens": max_tokens, + "messages": oai_messages, + } + + if tools: + payload["tools"] = self._convert_tools(tools) + + async with httpx.AsyncClient() as client: + response = await client.post( + f"{self.base_url}/chat/completions", + headers=headers, + json=payload, + timeout=120.0, + ) + + if response.status_code != 200: + raise Exception( + f"OpenAI API error {response.status_code}: {response.text}" + ) + + result = response.json() + choice = result.get("choices", [{}])[0] + message = choice.get("message", {}) + + text_content = message.get("content", "") or "" + oai_tool_calls = message.get("tool_calls", []) + + response_data: Dict[str, Any] = { + "content": text_content, + "usage": result.get("usage", {}), + "model": result.get("model", self.model), + } + + if oai_tool_calls: + import json + # Convert OpenAI tool_calls → our standard format + tool_calls = [] + # Also build Anthropic-style raw_content for session storage + raw_content = [] + if text_content: + raw_content.append({"type": "text", "text": text_content}) + + for tc in oai_tool_calls: + fn = tc.get("function", {}) + args_str = fn.get("arguments", "{}") + try: + args = json.loads(args_str) + except json.JSONDecodeError: + args = {} + tool_calls.append({ + "id": tc.get("id", ""), + "name": fn.get("name", ""), + "input": args, + }) + raw_content.append({ + "type": "tool_use", + "id": tc.get("id", ""), + "name": fn.get("name", ""), + "input": args, + }) + + response_data["tool_calls"] = tool_calls + response_data["raw_content"] = raw_content + + return response_data