Streaming allows you to receive partial responses from the Perplexity API as they are generated, rather than waiting for the complete response. This is particularly useful for:
Real-time user experiences - Display responses as they’re generated
Long responses - Start showing content immediately for lengthy analyses
Interactive applications - Provide immediate feedback to users
Streaming is supported across all Perplexity models including Sonar, Sonar Pro, and reasoning models.
from openai import OpenAIclient = OpenAI( api_key="YOUR_API_KEY", base_url="https://api.perplexity.ai")stream = client.chat.completions.create( model="sonar", messages=[{"role": "user", "content": "What is the latest in AI research?"}], stream=True)for chunk in stream: if chunk.choices[0].delta.content is not None: print(chunk.choices[0].delta.content, end="")
With this code snippet, you can stream responses from the Perplexity API using the requests library. However, you will need to parse the response manually to get the content, citations, and metadata.
Copy
Ask AI
import requests# Set up the API endpoint and headersurl = "https://api.perplexity.ai/chat/completions"headers = { "Authorization": "Bearer YOUR_API_KEY", "Content-Type": "application/json"}payload = { "model": "sonar-pro", "messages": [ {"role": "user", "content": "Who are the top 5 tech influencers on X?"} ], "stream": True # Enable streaming for real-time responses}response = requests.post(url, headers=headers, json=payload, stream=True)# Process the streaming response (simplified example)for line in response.iter_lines(): if line: print(line.decode('utf-8'))
For production use, you should properly parse Server-Sent Events (SSE) format:
Copy
Ask AI
import requestsimport jsondef stream_with_proper_parsing(): url = "https://api.perplexity.ai/chat/completions" headers = { "Authorization": "Bearer YOUR_API_KEY", "Content-Type": "application/json" } payload = { "model": "sonar", "messages": [{"role": "user", "content": "Explain quantum computing"}], "stream": True } response = requests.post(url, headers=headers, json=payload, stream=True) for line in response.iter_lines(): if line: line = line.decode('utf-8') if line.startswith('data: '): data_str = line[6:] # Remove 'data: ' prefix if data_str == '[DONE]': break try: chunk_data = json.loads(data_str) content = chunk_data['choices'][0]['delta'].get('content', '') if content: print(content, end='') except json.JSONDecodeError: continuestream_with_proper_parsing()
import requestsimport jsondef stream_with_requests_metadata(): url = "https://api.perplexity.ai/chat/completions" headers = { "Authorization": "Bearer YOUR_API_KEY", "Content-Type": "application/json" } payload = { "model": "sonar", "messages": [{"role": "user", "content": "Explain quantum computing"}], "stream": True } response = requests.post(url, headers=headers, json=payload, stream=True) content = "" metadata = {} for line in response.iter_lines(): if line: line = line.decode('utf-8') if line.startswith('data: '): data_str = line[6:] if data_str == '[DONE]': break try: chunk = json.loads(data_str) # Process content if 'choices' in chunk and chunk['choices'][0]['delta'].get('content'): content_piece = chunk['choices'][0]['delta']['content'] content += content_piece print(content_piece, end='', flush=True) # Collect metadata for key in ['citations', 'search_results', 'usage']: if key in chunk: metadata[key] = chunk[key] # Check if streaming is complete if chunk['choices'][0].get('finish_reason'): print(f"\n\nMetadata: {metadata}") except json.JSONDecodeError: continue return content, metadatastream_with_requests_metadata()
Important: If you need citations immediately for your user interface, consider using non-streaming requests for use cases where citation display is critical to the real-time user experience.