Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.perplexity.ai/llms.txt

Use this file to discover all available pages before exploring further.

This guide shows how to use Perplexity’s Agent API as a unified gateway to models from OpenAI, Anthropic, Google, xAI, and Perplexity — all through a single API key with zero markup. You will learn how to route to specific providers, build fallback chains for high availability, compare responses across models, and dynamically discover available models via the /v1/models endpoint.
Perplexity passes through third-party model usage at cost with no markup. You pay only what the provider charges, consolidated on a single bill. See Models for the full list.

Prerequisites

Install the Perplexity SDK:
pip install perplexityai
If you don’t have an API key yet:

Get your Perplexity API Key

Navigate to the API Keys tab in the API Portal and generate a new key.
Then export your API key as an environment variable:
export PERPLEXITY_API_KEY="your-api-key"

Why Multi-Provider?

BenefitDetails
Single API keyAccess OpenAI, Anthropic, Google, xAI, and Perplexity models without separate accounts
Zero markupThird-party model costs are passed through at provider pricing
Unified formatSame request/response format across all providers
Built-in fallbackThe models parameter tries providers in order until one succeeds
Tool compatibilityweb_search, fetch_url, and custom functions work with all models

Available Models

Use the /v1/models endpoint to discover all available models dynamically.
import requests
import os

resp = requests.get(
    "https://api.perplexity.ai/v1/models",
    headers={"Authorization": f"Bearer {os.environ['PERPLEXITY_API_KEY']}"}
)
models = resp.json()["data"]

# Group by provider
providers = {}
for model in models:
    provider = model["id"].split("/")[0] if "/" in model["id"] else "perplexity"
    providers.setdefault(provider, []).append(model["id"])

for provider, model_ids in sorted(providers.items()):
    print(f"\n{provider}:")
    for mid in model_ids:
        print(f"  {mid}")
Key models across providers:
ProviderModelsBest For
OpenAIopenai/gpt-5.4, openai/gpt-5.1, openai/gpt-5-mini, openai/gpt-5.4General reasoning, code, analysis
Anthropicanthropic/claude-opus-4-6, anthropic/claude-sonnet-4-6, anthropic/claude-haiku-4-5Long context, instruction following
Googlegoogle/gemini-3.1-flash-lite, google/gemini-3.1-pro-previewMultimodal, fast inference
xAIxai/grok-4-1-fast-non-reasoningFast responses, conversational
Perplexityperplexity/sonarSearch-grounded answers

Routing to a Specific Provider

Use the model parameter to target a specific provider’s model.
from perplexity import Perplexity

client = Perplexity()

# Route to OpenAI
openai_response = client.responses.create(
    model="openai/gpt-5.4",
    input="Explain the difference between TCP and UDP.",
    max_output_tokens=500,
)
print(f"OpenAI: {openai_response.output_text[:200]}...")

# Route to Anthropic
anthropic_response = client.responses.create(
    model="anthropic/claude-sonnet-4-6",
    input="Explain the difference between TCP and UDP.",
    max_output_tokens=500,
)
print(f"Anthropic: {anthropic_response.output_text[:200]}...")

# Route to Google
google_response = client.responses.create(
    model="google/gemini-3.1-flash-lite",
    input="Explain the difference between TCP and UDP.",
    max_output_tokens=500,
)
print(f"Google: {google_response.output_text[:200]}...")

Model Fallback Chains

The models parameter accepts an array of up to 5 models. The API tries each in order and returns the first successful response. This is ideal for production systems where availability matters.
from perplexity import Perplexity

client = Perplexity()

# Primary: OpenAI, fallback: Anthropic, then Google
response = client.responses.create(
    models=[
        "openai/gpt-5.4",
        "anthropic/claude-sonnet-4-6",
        "google/gemini-3.1-flash-lite",
    ],
    input="What are the key principles of zero-trust security?",
    tools=[{"type": "web_search"}],
)

print(f"Model used: {response.model}")
print(f"Response: {response.output_text[:300]}...")
Order your fallback chain by preference: put your primary model first, then alternatives in decreasing order of preference. The API returns the response from the first model that succeeds.

Comparing Responses Across Providers

Send the same prompt to multiple models and compare quality, latency, and cost.
import time
import json
from perplexity import Perplexity

client = Perplexity()

MODELS = [
    "openai/gpt-5.4",
    "anthropic/claude-sonnet-4-6",
    "google/gemini-3.1-flash-lite",
    "xai/grok-4-1-fast-non-reasoning",
    "perplexity/sonar",
]

prompt = "What are the three most important design patterns in microservices architecture?"

results = []
for model in MODELS:
    print(f"Querying {model}...")
    start = time.time()
    try:
        response = client.responses.create(
            model=model,
            input=prompt,
            max_output_tokens=800,
        )
        elapsed = time.time() - start
        results.append({
            "model": model,
            "latency": round(elapsed, 2),
            "tokens": response.usage.output_tokens,
            "cost": response.usage.cost.total_cost,
            "preview": response.output_text[:150].replace("\n", " "),
        })
    except Exception as e:
        results.append({"model": model, "error": str(e)})

# Display comparison
print(f"\n{'Model':<42} {'Latency':>8} {'Tokens':>7} {'Cost':>10}")
print("-" * 70)
for r in results:
    if "error" in r:
        print(f"{r['model']:<42} {'ERROR':>8}")
    else:
        print(f"{r['model']:<42} {r['latency']:>7.2f}s {r['tokens']:>7} ${r['cost']:.5f}")

Task-Based Model Routing

Different tasks suit different models. Build a router that picks the best model for each task type.
from perplexity import Perplexity

client = Perplexity()

# Route based on task characteristics
MODEL_ROUTING = {
    "code": "anthropic/claude-sonnet-4-6",      # Strong at code generation
    "analysis": "openai/gpt-5.4",               # Strong at structured analysis
    "fast_chat": "xai/grok-4-1-fast-non-reasoning",  # Lowest latency
    "research": "perplexity/sonar",              # Built-in search grounding
    "multimodal": "google/gemini-3.1-flash-lite",  # Vision + speed
}


def route_request(task_type: str, prompt: str, **kwargs) -> dict:
    """Route a request to the optimal model based on task type."""
    model = MODEL_ROUTING.get(task_type)
    if not model:
        raise ValueError(f"Unknown task type: {task_type}. Options: {list(MODEL_ROUTING.keys())}")

    # Add web_search for research tasks
    tools = kwargs.pop("tools", None)
    if task_type == "research" and tools is None:
        tools = [{"type": "web_search"}]

    response = client.responses.create(
        model=model,
        input=prompt,
        tools=tools,
        **kwargs,
    )

    return {
        "model": response.model,
        "task_type": task_type,
        "output": response.output_text,
        "cost": response.usage.cost.total_cost,
    }


# Code task → Anthropic
code_result = route_request(
    "code",
    "Write a Python function that implements binary search on a sorted list.",
    max_output_tokens=500,
)
print(f"[{code_result['task_type']}] via {code_result['model']} (${code_result['cost']:.5f})")
print(code_result["output"][:200])

# Research task → Perplexity Sonar
research_result = route_request(
    "research",
    "What were the key announcements at the latest WWDC?",
)
print(f"\n[{research_result['task_type']}] via {research_result['model']} (${research_result['cost']:.5f})")
print(research_result["output"][:200])

Combining Multi-Provider with Tools

All models accessed through the Agent API support the same tool interface — web_search, fetch_url, and custom functions work identically regardless of provider.
from perplexity import Perplexity
import json

client = Perplexity()

tools = [
    {"type": "web_search"},
    {
        "type": "function",
        "name": "calculate_roi",
        "description": "Calculate return on investment given initial cost and revenue.",
        "parameters": {
            "type": "object",
            "properties": {
                "initial_cost": {"type": "number", "description": "Initial investment in USD"},
                "annual_revenue": {"type": "number", "description": "Expected annual revenue in USD"},
                "years": {"type": "integer", "description": "Number of years"},
            },
            "required": ["initial_cost", "annual_revenue", "years"],
        },
    },
]


def calculate_roi(initial_cost: float, annual_revenue: float, years: int) -> dict:
    total_revenue = annual_revenue * years
    roi = ((total_revenue - initial_cost) / initial_cost) * 100
    return {"roi_percent": round(roi, 2), "total_revenue": total_revenue, "net_profit": total_revenue - initial_cost}


# Use Anthropic Claude with web search + custom function
response = client.responses.create(
    model="anthropic/claude-sonnet-4-6",
    tools=tools,
    input=(
        "Research the average cost to deploy a 100kW commercial solar installation in 2026, "
        "then calculate the 10-year ROI assuming $18,000 annual energy savings."
    ),
)

# Handle function calls
while any(item.type == "function_call" for item in response.output):
    next_input = [item.model_dump() for item in response.output]
    for item in response.output:
        if item.type == "function_call":
            args = json.loads(item.arguments)
            result = calculate_roi(**args)
            next_input.append({
                "type": "function_call_output",
                "call_id": item.call_id,
                "output": json.dumps(result),
            })
    response = client.responses.create(
        model="anthropic/claude-sonnet-4-6",
        tools=tools,
        input=next_input,
    )

print(response.output_text)

Dynamic Model Discovery

Build applications that automatically adapt to newly available models by querying the /v1/models endpoint at startup.
import requests
import os
from perplexity import Perplexity

client = Perplexity()


def discover_models() -> dict[str, list[str]]:
    """Fetch available models and group by provider."""
    resp = requests.get(
        "https://api.perplexity.ai/v1/models",
        headers={"Authorization": f"Bearer {os.environ['PERPLEXITY_API_KEY']}"},
    )
    resp.raise_for_status()
    models = resp.json()["data"]

    providers = {}
    for model in models:
        provider = model["id"].split("/")[0] if "/" in model["id"] else "perplexity"
        providers.setdefault(provider, []).append(model["id"])
    return providers


def build_fallback_chain(providers: dict[str, list[str]], preferred_order: list[str]) -> list[str]:
    """Build a fallback chain from available models, picking one per provider."""
    chain = []
    for provider in preferred_order:
        if provider in providers and providers[provider]:
            chain.append(providers[provider][0])  # Pick first available model
    return chain[:5]  # Max 5 models in fallback chain


# Discover and build chain
available = discover_models()
print(f"Available providers: {list(available.keys())}")

chain = build_fallback_chain(available, ["openai", "anthropic", "google", "xai", "perplexity"])
print(f"Fallback chain: {chain}")

# Use the dynamic chain
response = client.responses.create(
    models=chain,
    input="Summarize the latest developments in AI regulation worldwide.",
    tools=[{"type": "web_search"}],
)
print(f"\nModel used: {response.model}")
print(response.output_text[:300])
The /v1/models endpoint returns the current list of supported models. Query it at application startup or cache it with a TTL to stay current as new models are added.

Next Steps

Models

Full list of available models, capabilities, and pricing.

Model Fallback

Deep dive into fallback chain configuration and behavior.

Model Comparison Example

CLI tool for benchmarking models side-by-side.

Presets

Use presets like pro-search for optimized defaults.