A lightweight Go SDK that unifies multiple AI providers behind a consistent interface for text and image generation.
- One client for text & image generation across providers
- Provider-agnostic by default, extensible when needed
- Multimodal-first: ordered text + image inputs
- Flexible Configuration: Client, provider and per-request options
- Type-Safe Errors: Typed error codes for predictable error handling
go get github.com/montanaflynn/grail// Create a provider (automatically uses OPENAI_API_KEY if not provided)
provider, _ := openai.New()
// Create a client
client := grail.NewClient(provider)
// Generate text
res, _ := client.Generate(ctx, grail.Request{
Inputs: []grail.Input{grail.InputText("Create a haiku")},
Output: grail.OutputText(),
})
text, _ := res.Text()
fmt.Println(text)
// Generate image
imgRes, _ := client.Generate(ctx, grail.Request{
Inputs: []grail.Input{grail.InputText("A beautiful sunset")},
Output: grail.OutputImage(grail.ImageSpec{Count: 1}),
})
imgs, _ := imgRes.Images()
os.WriteFile("sunset.png", imgs[0], 0644)
// Generate image with provider-specific options
import "github.com/montanaflynn/grail/providers/gemini"
imgRes2, _ := client.Generate(ctx, grail.Request{
Inputs: []grail.Input{grail.InputText("A landscape photo")},
Output: grail.OutputImage(grail.ImageSpec{Count: 1}),
ProviderOptions: []grail.ProviderOption{
gemini.WithImageAspectRatio(gemini.ImageAspectRatio16_9),
gemini.WithImageSize(gemini.ImageSize2K),
},
})
// Image understanding (text from image)
imgData, _ := os.ReadFile("photo.jpg")
imgInput := grail.InputImage(imgData)
textRes, _ := client.Generate(ctx, grail.Request{
Inputs: []grail.Input{
grail.InputText("Describe this image"),
imgInput,
},
Output: grail.OutputText(),
})
text, _ := textRes.Text()
fmt.Println(text)
// Multimodal image generation (image from text + image)
imgRes3, _ := client.Generate(ctx, grail.Request{
Inputs: []grail.Input{
grail.InputText("Create a variation of this image"),
imgInput,
grail.InputText("but make it more colorful"),
},
Output: grail.OutputImage(grail.ImageSpec{Count: 1}),
})
imgs, _ := imgRes3.Images()
os.WriteFile("variation.png", imgs[0], 0644)
// PDF understanding (text from PDF)
pdfData, _ := os.ReadFile("document.pdf")
pdfRes, _ := client.Generate(ctx, grail.Request{
Inputs: []grail.Input{
grail.InputText("Summarize this document"),
grail.InputPDF(pdfData),
},
Output: grail.OutputText(),
})
text, _ := pdfRes.Text()
fmt.Println(text)
// Model selection: explicit model name
res, _ := client.Generate(ctx, grail.Request{
Inputs: []grail.Input{grail.InputText("Hello")},
Output: grail.OutputText(),
Model: "gpt-4o", // Use this specific model
})
// Model selection: tier-based (provider picks the right model)
res, _ := client.Generate(ctx, grail.Request{
Inputs: []grail.Input{grail.InputText("Hello")},
Output: grail.OutputText(),
Tier: grail.ModelTierFast, // Let provider pick the fast text model
})
// Query available models
models, _ := client.ListModels(ctx)
for _, m := range models {
fmt.Printf("%s: role=%s tier=%s\n", m.Name, m.Role, m.Tier)
}
// Get specific model by role and tier
model, _ := client.GetModel(ctx, grail.ModelRoleText, grail.ModelTierBest)
fmt.Printf("Best text model: %s\n", model.Name)See the examples/ directory for complete, runnable examples:
- Simple Text: Minimal text generation
- Text Generation: Text generation with provider selection
- Text to Image: Image generation from text prompts
- Image Understanding: Text generation from images
- PDF Understanding: Text generation from PDF documents
- PDF to Image: Image generation from PDF documents (e.g., infographics)
- OpenAI Image Options: Provider-specific image options (format, background, size, moderation, compression)
- Gemini Image Options: Provider-specific image options (aspect ratio, size)
import "github.com/montanaflynn/grail/providers/openai"
// Basic usage (uses OPENAI_API_KEY env var)
provider, err := openai.New()
// With options
provider, err := openai.New(
openai.WithAPIKey("sk-..."),
openai.WithTextModel("gpt-4"),
openai.WithImageModel("gpt-image-1"),
openai.WithLogger(logger),
)Options:
WithAPIKey(key string)- Set API key explicitlyWithAPIKeyFromEnv(env string)- Read API key from environment variableWithTextModel(model string)- Override default text model (default:gpt-5.2)WithImageModel(model string)- Override default image model (default:gpt-image-1)WithLogger(logger *slog.Logger)- Set custom logger
Image Options:
WithImageFormat(format ImageFormat)- Set output format (png,jpeg,webp)WithImageBackground(bg ImageBackground)- Set background (auto,transparent,opaque)WithImageSize(size ImageSize)- Set image size (auto,1024x1024,1536x1024,1024x1536,256x256,512x512,1792x1024,1024x1792)WithImageModeration(moderation ImageModeration)- Set moderation level (auto,low)WithImageOutputCompression(compression int)- Set output compression quality (0-100)
Text Options:
TextOptions{Model, MaxTokens, Temperature, TopP, SystemPrompt}- Provider-specific text generation options
import "github.com/montanaflynn/grail/providers/gemini"
// Basic usage (uses GEMINI_API_KEY env var)
provider, err := gemini.New(ctx)
// With options
provider, err := gemini.New(ctx,
gemini.WithAPIKey("..."),
gemini.WithTextModel("gemini-3-flash-preview"),
gemini.WithImageModel("gemini-2.5-flash-image"),
gemini.WithLogger(logger),
)Options:
WithAPIKey(key string)- Set API key explicitlyWithAPIKeyFromEnv(env string)- Read API key from environment variableWithTextModel(model string)- Override default text model (default:gemini-3-flash-preview)WithImageModel(model string)- Override default image model (default:gemini-2.5-flash-image)WithLogger(logger *slog.Logger)- Set custom logger
Image Options:
WithImageAspectRatio(ratio ImageAspectRatio)- Set aspect ratio (1:1,16:9, etc.)WithImageSize(size ImageSize)- Set image size (1K,2K,4K)
Text Options:
TextOptions{Model, MaxTokens, Temperature, TopP, SystemPrompt}- Provider-specific text generation options
# Run tests
go test ./...
# Format code
go fmt ./...
# Run linter
go vet ./...
# Or use make
make format
make lint
make test
make # runs allSee CONTRIBUTING.md for contribution guidelines.
See CHANGELOG.md for a detailed list of changes.
MIT License - see LICENSE for details.