Initial commit
This commit is contained in:
commit
92f7ff7058
2 changed files with 467 additions and 0 deletions
72
README.md
Normal file
72
README.md
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
Below is an updated README.md with the additional context:
|
||||
|
||||
Ollama-OpenAI Proxy
|
||||
|
||||
This is a Go-based proxy server that enables applications designed to work with the Ollama API to interact seamlessly with an OpenAI-compatible endpoint. It translates and forwards requests and responses between the two APIs while applying custom transformations to the model names and data formats.
|
||||
|
||||
Note: This is a pet project I use to forward requests to LiteLLM for use with Kerlig, which doesn’t support custom OpenAI endpoints. As this is a personal project, there might be issues and rough edges. Contributions and feedback are welcome!
|
||||
|
||||
Features
|
||||
• Endpoint Proxying:
|
||||
• /v1/models & /v1/completions: These endpoints are forwarded directly to the downstream OpenAI-compatible server.
|
||||
• /api/tags: Queries the downstream /v1/models endpoint, transforms the response into the Ollama-style model list, and appends :proxy to model names if they don’t already contain a colon.
|
||||
• /api/chat: Rewrites the request to the downstream /v1/chat/completions endpoint. It intercepts and transforms streaming NDJSON responses from the OpenAI format into the expected Ollama format, including stripping any trailing :proxy from model names.
|
||||
• /api/pull and other unknown endpoints: Forwarded to a local Ollama instance running on 127.0.0.1:11505.
|
||||
• Debug Logging:
|
||||
When running in debug mode, the proxy logs:
|
||||
• Every incoming request.
|
||||
• The outgoing /api/chat payload.
|
||||
• Raw downstream streaming chunks and their transformed equivalents.
|
||||
• Model Name Handling:
|
||||
• For /api/tags, if a model ID does not contain a colon, the proxy appends :proxy to the name.
|
||||
• For other endpoints, any :proxy suffix in model names is stripped before forwarding.
|
||||
|
||||
Prerequisites
|
||||
• Go 1.18+ installed.
|
||||
• An OpenAI-compatible server endpoint (e.g., running on http://127.0.0.1:4000).
|
||||
• (Optional) A local Ollama instance running on 127.0.0.1:11505 for endpoints not handled by the downstream server.
|
||||
|
||||
Installation
|
||||
|
||||
Clone this repository:
|
||||
|
||||
git clone https://github.com/yourusername/ollama-openai-proxy.git
|
||||
cd ollama-openai-proxy
|
||||
|
||||
Build the project:
|
||||
|
||||
go build -o proxy-server main.go
|
||||
|
||||
Usage
|
||||
|
||||
Run the proxy server with the desired flags:
|
||||
|
||||
./proxy-server --listen=":11434" --target="http://127.0.0.1:4000" --api-key="YOUR_API_KEY" --debug
|
||||
|
||||
Command-Line Flags
|
||||
• --listen: The address and port the proxy server listens on (default :11434).
|
||||
• --target: The base URL of the OpenAI-compatible downstream server (e.g., http://127.0.0.1:4000).
|
||||
• --api-key: (Optional) The API key for the downstream server.
|
||||
• --debug: Enable detailed debug logging for every request and response.
|
||||
|
||||
How It Works
|
||||
1. Request Routing:
|
||||
The proxy intercepts requests and routes them based on the endpoint:
|
||||
• Requests to /v1/models and /v1/completions are forwarded directly.
|
||||
• Requests to /api/tags are handled locally by querying /v1/models on the downstream, transforming the JSON response, and appending :proxy where needed.
|
||||
• Requests to /api/chat are rewritten to /v1/chat/completions, with the payload and response processed to strip or add the :proxy suffix as appropriate.
|
||||
• All other endpoints are forwarded to the local Ollama instance.
|
||||
2. Response Transformation:
|
||||
Streaming responses from the downstream /v1/chat/completions endpoint (in NDJSON format) are read line by line. Each chunk is parsed, transformed into the Ollama format, and streamed back to the client.
|
||||
3. Logging:
|
||||
With debug mode enabled, detailed logs of incoming requests, outgoing payloads, and both raw and transformed response chunks are printed.
|
||||
|
||||
Contributing
|
||||
|
||||
Contributions are welcome! As this is a pet project, there may be rough edges and issues. Please feel free to open issues or submit pull requests for improvements and bug fixes.
|
||||
|
||||
License
|
||||
|
||||
This project is licensed under the MIT License. See the LICENSE file for details.
|
||||
|
||||
Feel free to adjust this README to better fit your project details before publishing it to GitHub.
|
||||
395
ollama.go
Normal file
395
ollama.go
Normal file
|
|
@ -0,0 +1,395 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// --------------------
|
||||
// Data Structures
|
||||
// --------------------
|
||||
|
||||
// Structures used for /api/tags transformation.
|
||||
type DownstreamModel struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Created int64 `json:"created"`
|
||||
OwnedBy string `json:"owned_by"`
|
||||
}
|
||||
|
||||
type DownstreamModelsResponse struct {
|
||||
Data []DownstreamModel `json:"data"`
|
||||
Object string `json:"object"`
|
||||
}
|
||||
|
||||
type ModelDetails struct {
|
||||
ParentModel string `json:"parent_model"`
|
||||
Format string `json:"format"`
|
||||
Family string `json:"family"`
|
||||
Families []string `json:"families"`
|
||||
ParameterSize string `json:"parameter_size"`
|
||||
QuantizationLevel string `json:"quantization_level"`
|
||||
}
|
||||
|
||||
type Model struct {
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
ModifiedAt string `json:"modified_at"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Details ModelDetails `json:"details"`
|
||||
}
|
||||
|
||||
type TagsResponse struct {
|
||||
Models []Model `json:"models"`
|
||||
}
|
||||
|
||||
// Structures used for transforming the /api/chat response.
|
||||
|
||||
// OpenAIChunk represents one NDJSON chunk from the OpenAI‑compatible streaming endpoint.
|
||||
type OpenAIChunk struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Created int64 `json:"created"`
|
||||
Model string `json:"model"`
|
||||
Choices []struct {
|
||||
Delta struct {
|
||||
Role string `json:"role,omitempty"`
|
||||
Content string `json:"content,omitempty"`
|
||||
} `json:"delta"`
|
||||
Index int `json:"index"`
|
||||
FinishReason *string `json:"finish_reason,omitempty"`
|
||||
} `json:"choices"`
|
||||
}
|
||||
|
||||
// OllamaChunk is the expected response format for Ollama.
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type OllamaChunk struct {
|
||||
Model string `json:"model"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
Message Message `json:"message"`
|
||||
Done bool `json:"done"`
|
||||
}
|
||||
|
||||
// --------------------
|
||||
// Helpers & Middleware
|
||||
// --------------------
|
||||
|
||||
// logMiddleware logs every HTTP request when debug mode is enabled.
|
||||
func logMiddleware(debug bool, next http.Handler) http.Handler {
|
||||
if !debug {
|
||||
return next
|
||||
}
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
log.Printf("DEBUG: Received %s request for %s", r.Method, r.URL.String())
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
// forwardToOllama logs the request and response before proxying the request to the local Ollama instance.
|
||||
func forwardToOllama(w http.ResponseWriter, r *http.Request) {
|
||||
log.Println("=== Unknown Request Received ===")
|
||||
log.Printf("Method: %s", r.Method)
|
||||
log.Printf("URL: %s", r.URL.String())
|
||||
log.Printf("Headers: %v", r.Header)
|
||||
bodyBytes, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
log.Printf("Error reading request body: %v", err)
|
||||
} else if len(bodyBytes) > 0 {
|
||||
log.Printf("Body: %s", string(bodyBytes))
|
||||
} else {
|
||||
log.Printf("Body: <empty>")
|
||||
}
|
||||
r.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||
|
||||
targetOllama, err := url.Parse("http://127.0.0.1:11505")
|
||||
if err != nil {
|
||||
log.Printf("Error parsing target Ollama URL: %v", err)
|
||||
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
ollamaProxy := httputil.NewSingleHostReverseProxy(targetOllama)
|
||||
ollamaProxy.ModifyResponse = func(resp *http.Response) error {
|
||||
respBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
log.Printf("Error reading response body: %v", err)
|
||||
return err
|
||||
}
|
||||
log.Println("=== Response from 127.0.0.1:11505 ===")
|
||||
log.Printf("Status: %s", resp.Status)
|
||||
log.Printf("Headers: %v", resp.Header)
|
||||
log.Printf("Body: %s", string(respBody))
|
||||
resp.Body = io.NopCloser(bytes.NewBuffer(respBody))
|
||||
return nil
|
||||
}
|
||||
ollamaProxy.ServeHTTP(w, r)
|
||||
}
|
||||
|
||||
// --------------------
|
||||
// Main
|
||||
// --------------------
|
||||
|
||||
func main() {
|
||||
// Command-line flags.
|
||||
listenAddr := flag.String("listen", ":11434", "Address to listen on (e.g. :11434)")
|
||||
targetUrlStr := flag.String("target", "http://127.0.0.1:4000", "Target OpenAI-compatible server URL")
|
||||
openaiApiKey := flag.String("api-key", "", "OpenAI API key (optional)")
|
||||
debug := flag.Bool("debug", false, "Print debug logs for every call")
|
||||
flag.Parse()
|
||||
|
||||
// Parse the target URL.
|
||||
targetUrl, err := url.Parse(*targetUrlStr)
|
||||
if err != nil {
|
||||
log.Fatalf("Error parsing target URL: %v", err)
|
||||
}
|
||||
|
||||
// Create a reverse proxy for /v1/models and /v1/completions.
|
||||
proxy := httputil.NewSingleHostReverseProxy(targetUrl)
|
||||
originalDirector := proxy.Director
|
||||
proxy.Director = func(req *http.Request) {
|
||||
originalDirector(req)
|
||||
// For downstream endpoints, also strip any ":proxy" from the model field in the URL query or headers if needed.
|
||||
if *openaiApiKey != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+*openaiApiKey)
|
||||
}
|
||||
}
|
||||
|
||||
// Handler for /v1/models.
|
||||
http.HandleFunc("/v1/models", func(w http.ResponseWriter, r *http.Request) {
|
||||
log.Printf("Proxying /v1/models request to %s", targetUrl.String())
|
||||
proxy.ServeHTTP(w, r)
|
||||
})
|
||||
|
||||
// Handler for /v1/completions.
|
||||
http.HandleFunc("/v1/completions", func(w http.ResponseWriter, r *http.Request) {
|
||||
log.Printf("Proxying /v1/completions request to %s", targetUrl.String())
|
||||
proxy.ServeHTTP(w, r)
|
||||
})
|
||||
|
||||
// Handler for /api/tags.
|
||||
// When building the list, if a model's ID does not contain a colon,
|
||||
// append ":proxy" to it.
|
||||
http.HandleFunc("/api/tags", func(w http.ResponseWriter, r *http.Request) {
|
||||
log.Printf("Handling /api/tags request by querying downstream /v1/models")
|
||||
modelsURL := targetUrl.ResolveReference(&url.URL{Path: "/v1/models"})
|
||||
reqDown, err := http.NewRequest("GET", modelsURL.String(), nil)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if *openaiApiKey != "" {
|
||||
reqDown.Header.Set("Authorization", "Bearer "+*openaiApiKey)
|
||||
}
|
||||
client := &http.Client{}
|
||||
respDown, err := client.Do(reqDown)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
defer respDown.Body.Close()
|
||||
|
||||
if respDown.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(respDown.Body)
|
||||
http.Error(w, string(body), respDown.StatusCode)
|
||||
return
|
||||
}
|
||||
|
||||
var dsResp DownstreamModelsResponse
|
||||
if err := json.NewDecoder(respDown.Body).Decode(&dsResp); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
var tagsResp TagsResponse
|
||||
for _, dm := range dsResp.Data {
|
||||
modelName := dm.ID
|
||||
// Append ":proxy" if there is no colon in the model name.
|
||||
if !strings.Contains(modelName, ":") {
|
||||
modelName += ":proxy"
|
||||
}
|
||||
modelEntry := Model{
|
||||
Name: modelName,
|
||||
Model: modelName,
|
||||
ModifiedAt: time.Unix(dm.Created, 0).UTC().Format(time.RFC3339Nano),
|
||||
Size: 1337,
|
||||
Digest: "",
|
||||
Details: ModelDetails{
|
||||
ParentModel: "",
|
||||
Format: "unknown",
|
||||
Family: "",
|
||||
Families: []string{},
|
||||
ParameterSize: "",
|
||||
QuantizationLevel: "",
|
||||
},
|
||||
}
|
||||
tagsResp.Models = append(tagsResp.Models, modelEntry)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(tagsResp); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
})
|
||||
|
||||
// Explicit handler for /api/pull: forward to the local Ollama instance.
|
||||
http.HandleFunc("/api/pull", func(w http.ResponseWriter, r *http.Request) {
|
||||
log.Println("Handling /api/pull")
|
||||
forwardToOllama(w, r)
|
||||
})
|
||||
|
||||
// Explicit handler for /api/chat.
|
||||
// This handler rewrites the URL to /v1/chat/completions, logs the outgoing payload,
|
||||
// strips any trailing ":proxy" from the model name in the request payload,
|
||||
// intercepts the downstream streaming response, transforms each chunk from OpenAI format
|
||||
// to Ollama format (stripping any ":proxy" from the model field), logs both the raw and transformed
|
||||
// chunks when debug is enabled, and streams the transformed chunks to the client.
|
||||
http.HandleFunc("/api/chat", func(w http.ResponseWriter, r *http.Request) {
|
||||
log.Println("Handling /api/chat transformation")
|
||||
// Read the original request body.
|
||||
bodyBytes, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
r.Body.Close()
|
||||
if *debug {
|
||||
log.Printf("Outgoing /api/chat payload: %s", string(bodyBytes))
|
||||
}
|
||||
|
||||
// Unmarshal and modify the request payload: strip ":proxy" from model field.
|
||||
var payload map[string]interface{}
|
||||
if err := json.Unmarshal(bodyBytes, &payload); err == nil {
|
||||
if modelVal, ok := payload["model"].(string); ok {
|
||||
payload["model"] = strings.TrimSuffix(modelVal, ":proxy")
|
||||
}
|
||||
// Re-marshal payload.
|
||||
bodyBytes, err = json.Marshal(payload)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
// If unmarshalling fails, continue with the original bytes.
|
||||
log.Printf("Warning: could not unmarshal payload for transformation: %v", err)
|
||||
}
|
||||
|
||||
// Create a new request to the downstream /v1/chat/completions endpoint.
|
||||
newURL := targetUrl.ResolveReference(&url.URL{Path: "/v1/chat/completions"})
|
||||
newReq, err := http.NewRequest("POST", newURL.String(), bytes.NewReader(bodyBytes))
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
newReq.Header = r.Header.Clone()
|
||||
if *openaiApiKey != "" {
|
||||
newReq.Header.Set("Authorization", "Bearer "+*openaiApiKey)
|
||||
}
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(newReq)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Copy response headers.
|
||||
for key, values := range resp.Header {
|
||||
for _, value := range values {
|
||||
w.Header().Add(key, value)
|
||||
}
|
||||
}
|
||||
w.WriteHeader(resp.StatusCode)
|
||||
|
||||
// Process streaming NDJSON response.
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if *debug {
|
||||
log.Printf("Raw downstream chunk: %s", line)
|
||||
}
|
||||
// Strip off the SSE "data:" prefix if present.
|
||||
if strings.HasPrefix(line, "data:") {
|
||||
line = strings.TrimSpace(strings.TrimPrefix(line, "data:"))
|
||||
}
|
||||
// Skip if the line is empty or indicates completion.
|
||||
if line == "" || line == "[DONE]" {
|
||||
continue
|
||||
}
|
||||
// Parse the JSON chunk.
|
||||
var chunk OpenAIChunk
|
||||
if err := json.Unmarshal([]byte(line), &chunk); err != nil {
|
||||
log.Printf("Error unmarshalling chunk: %v", err)
|
||||
// In case of error, send the raw line.
|
||||
w.Write([]byte(line + "\n"))
|
||||
continue
|
||||
}
|
||||
// Transform the chunk into Ollama format.
|
||||
var content string
|
||||
role := "assistant" // default role
|
||||
done := false
|
||||
if len(chunk.Choices) > 0 {
|
||||
choice := chunk.Choices[0]
|
||||
content = choice.Delta.Content
|
||||
if choice.Delta.Role != "" {
|
||||
role = choice.Delta.Role
|
||||
}
|
||||
if choice.FinishReason != nil && *choice.FinishReason != "" {
|
||||
done = true
|
||||
}
|
||||
}
|
||||
// Strip any ":proxy" from the model name.
|
||||
modelName := strings.TrimSuffix(chunk.Model, ":proxy")
|
||||
transformed := OllamaChunk{
|
||||
Model: modelName,
|
||||
CreatedAt: time.Unix(chunk.Created, 0).UTC().Format(time.RFC3339Nano),
|
||||
Message: Message{
|
||||
Role: role,
|
||||
Content: content,
|
||||
},
|
||||
Done: done,
|
||||
}
|
||||
transformedLine, err := json.Marshal(transformed)
|
||||
if err != nil {
|
||||
log.Printf("Error marshalling transformed chunk: %v", err)
|
||||
w.Write([]byte(line + "\n"))
|
||||
continue
|
||||
}
|
||||
if *debug {
|
||||
log.Printf("Transformed chunk: %s", string(transformedLine))
|
||||
}
|
||||
w.Write(transformedLine)
|
||||
w.Write([]byte("\n"))
|
||||
if f, ok := w.(http.Flusher); ok {
|
||||
f.Flush()
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Printf("Scanner error: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
// Catch-all handler for any other unknown endpoints.
|
||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
forwardToOllama(w, r)
|
||||
})
|
||||
|
||||
log.Printf("Proxy server listening on %s\n- /v1/models & /v1/completions forwarded to %s\n- /api/tags dynamically transformed\n- /api/pull explicitly forwarded to 127.0.0.1:11505\n- /api/chat rewritten and transformed before forwarding to downstream (/v1/chat/completions)\n- Other unknown endpoints are also forwarded", *listenAddr, targetUrl.String())
|
||||
if err := http.ListenAndServe(*listenAddr, logMiddleware(*debug, http.DefaultServeMux)); err != nil {
|
||||
log.Fatalf("Server failed: %v", err)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue