Overview

Generate AI responses with web-grounded knowledge using either the Python or TypeScript SDKs. Both SDKs provide full support for chat completions, streaming responses, async operations, and comprehensive error handling.

Quick Start

from perplexity import Perplexity

client = Perplexity()

completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Tell me about the latest developments in AI",
        }
    ],
    model="sonar",
)

print(f"Response: {completion.choices[0].message.content}")

Features

Model Selection

Choose from different Sonar models based on your needs:
# Standard Sonar model for general queries
completion = client.chat.completions.create(
    messages=[{"role": "user", "content": "What is quantum computing?"}],
    model="sonar"
)

# Sonar Pro for more complex queries
completion = client.chat.completions.create(
    messages=[{"role": "user", "content": "Analyze the economic implications of renewable energy adoption"}],
    model="sonar-pro"
)

# Sonar Reasoning for complex analytical tasks
completion = client.chat.completions.create(
    messages=[{"role": "user", "content": "Solve this complex mathematical problem step by step"}],
    model="sonar-reasoning"
)

Conversation Context

Build multi-turn conversations with context:
messages = [
    {"role": "system", "content": "You are a helpful research assistant."},
    {"role": "user", "content": "What are the main causes of climate change?"},
    {"role": "assistant", "content": "The main causes of climate change include..."},
    {"role": "user", "content": "What are some potential solutions?"}
]

completion = client.chat.completions.create(
    messages=messages,
    model="sonar"
)

Web Search Options

Control how the model searches and uses web information:
completion = client.chat.completions.create(
    messages=[
        {"role": "user", "content": "What are the latest developments in renewable energy?"}
    ],
    model="sonar",
    web_search_options={
        "search_recency_filter": "week",  # Focus on recent results
        "search_domain_filter": ["energy.gov", "iea.org", "irena.org"],  # Trusted sources
        "max_search_results": 10
    }
)

Response Customization

Customize response format and behavior:
completion = client.chat.completions.create(
    messages=[
        {"role": "user", "content": "Explain machine learning in simple terms"}
    ],
    model="sonar",
    max_tokens=500,  # Limit response length
    temperature=0.7,  # Control creativity
    top_p=0.9,       # Control diversity
    presence_penalty=0.1,  # Reduce repetition
    frequency_penalty=0.1
)

Streaming Responses

Get real-time response streaming for better user experience:
stream = client.chat.completions.create(
    messages=[
        {"role": "user", "content": "Write a summary of recent AI breakthroughs"}
    ],
    model="sonar",
    stream=True
)

for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="")
For comprehensive streaming documentation including metadata collection, error handling, advanced patterns, and raw HTTP examples, see the Streaming Guide.

Async Chat Completions

For long-running or batch processing tasks, use the async endpoints:

Creating Async Requests

# Start an async completion request
async_request = client.async_.chat.completions.create(
    messages=[
        {"role": "user", "content": "Write a comprehensive analysis of renewable energy trends"}
    ],
    model="sonar-pro",
    max_tokens=2000
)

print(f"Request submitted with ID: {async_request.request_id}")
print(f"Status: {async_request.status}")

Checking Request Status

# Check the status of an async request
request_id = "req_123abc456def789"
status = client.async_.chat.completions.get(request_id)

print(f"Status: {status.status}")
if status.status == "completed":
    print(f"Response: {status.result.choices[0].message.content}")
elif status.status == "failed":
    print(f"Error: {status.error}")

Listing Async Requests

# List recent async requests
requests = client.async_.chat.completions.list(
    limit=10,
    status="completed"
)

for request in requests.data:
    print(f"ID: {request.id}, Status: {request.status}")

Advanced Usage

Error Handling

Handle chat-specific errors:
import perplexity

try:
    completion = client.chat.completions.create(
        messages=[{"role": "user", "content": "What is AI?"}],
        model="sonar",
        max_tokens=50000  # Exceeds limit
    )
except perplexity.BadRequestError as e:
    print(f"Invalid request parameters: {e}")
except perplexity.RateLimitError as e:
    print("Rate limit exceeded, please retry later")
except perplexity.APIStatusError as e:
    print(f"API error: {e.status_code}")

Custom Instructions

Use system messages for consistent behavior:
system_prompt = """You are an expert research assistant specializing in technology and science. 
Always provide well-sourced, accurate information and cite your sources. 
Format your responses with clear headings and bullet points when appropriate."""

completion = client.chat.completions.create(
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": "Explain quantum computing applications"}
    ],
    model="sonar-pro"
)

Concurrent Operations

Handle multiple conversations efficiently:
async def handle_multiple_chats(user_messages):
    client = AsyncPerplexity()
    
    tasks = [
        client.chat.completions.create(
            messages=[{"role": "user", "content": msg}],
            model="sonar"
        )
        for msg in user_messages
    ]
    
    return await asyncio.gather(*tasks, return_exceptions=True)

Best Practices

1

Use appropriate models

Choose the right model for your use case: sonar for general queries, sonar-pro for complex analysis, sonar-reasoning for analytical tasks.
# For quick factual queries
simple_query = client.chat.completions.create(
    messages=[{"role": "user", "content": "What is the capital of France?"}],
    model="sonar"
)

# For complex analysis
complex_query = client.chat.completions.create(
    messages=[{"role": "user", "content": "Analyze the economic impact of AI on employment"}],
    model="sonar-pro"
)
2

Implement streaming for long responses

Use streaming for better user experience with lengthy responses.
def stream_response(query):
    stream = client.chat.completions.create(
        messages=[{"role": "user", "content": query}],
        model="sonar",
        stream=True
    )
    
    response = ""
    for chunk in stream:
        if chunk.choices[0].delta.content:
            content = chunk.choices[0].delta.content
            print(content, end="", flush=True)
            response += content
    
    return response
3

Handle rate limits gracefully

Implement exponential backoff for production applications.
import time
import random

def chat_with_retry(messages, max_retries=3):
    for attempt in range(max_retries):
        try:
            return client.chat.completions.create(
                messages=messages,
                model="sonar"
            )
        except perplexity.RateLimitError:
            if attempt == max_retries - 1:
                raise
            delay = (2 ** attempt) + random.uniform(0, 1)
            time.sleep(delay)
4

Optimize for specific use cases

Configure parameters based on your application’s needs.
# For factual Q&A
factual_config = {
    "temperature": 0.1,  # Low creativity for accuracy
    "top_p": 0.9,
    "search_recency_filter": "month"
}

# For creative writing
creative_config = {
    "temperature": 0.8,  # Higher creativity
    "top_p": 0.95,
    "presence_penalty": 0.1,
    "frequency_penalty": 0.1
}

# Usage
factual_response = client.chat.completions.create(
    messages=[{"role": "user", "content": "What is the current inflation rate?"}],
    model="sonar",
    **factual_config
)

Resources