Consume streaming responses from the Agent API and extract, validate, and display citations in real-time as chunks arrive
This guide shows how to consume streaming responses from the Agent API, extract citations as they arrive, validate source URLs, and build a fully cited output. Streaming is essential for responsive UIs and long-running searches — you can display text and sources progressively instead of waiting for the full response.
The fast-search preset is optimized for quick, citation-rich answers. The model inserts numbered references like [1], [2] in the text, and the corresponding source URLs arrive in the search_results output item. See the Agent API Presets docs for all available presets.
The model inserts numbered references like [1], [2] into the generated text. To build a rich output with clickable links, parse these references and map them to source URLs using the search results.
import refrom perplexity import Perplexityclient = Perplexity()def extract_citation_refs(text: str) -> list[int]: """Extract all citation reference numbers from text, e.g. [1], [2].""" return sorted(set(int(m) for m in re.findall(r"\[(\d+)\]", text)))def build_cited_output(content: str, search_results: list) -> str: """Replace [N] references with markdown links and append a references section.""" cited_content = content # Build a map from id to URL url_map = {r.id: r.url for r in search_results} title_map = {r.id: r.title for r in search_results} # Replace inline references with markdown links for ref_id, url in url_map.items(): cited_content = cited_content.replace( f"[{ref_id}]", f"[[{ref_id}]]({url})" ) # Append a references section with all cited sources used_refs = extract_citation_refs(content) if used_refs: cited_content += "\n\n---\n**References:**\n" for ref in used_refs: if ref in url_map: cited_content += f"- [{ref}] {title_map[ref]} — {url_map[ref]}\n" return cited_content# Non-streaming request to get content + search resultsresponse = client.responses.create( preset="fast-search", input="What is CRISPR gene editing and how does it work?",)# Extract search results from the response outputcontent = response.output_textsearch_results = []for item in response.output: if item.type == "search_results": search_results = item.results break# Build the final output with linked citationsoutput = build_cited_output(content, search_results)print(output)
In production systems, you should validate that citation URLs are well-formed and reachable before presenting them to users. This avoids broken links and improves trust in the output.
import asyncioimport aiohttpfrom urllib.parse import urlparsedef is_valid_url(url: str) -> bool: """Check that a URL has a valid structure.""" try: result = urlparse(url) return all([result.scheme in ("http", "https"), result.netloc]) except Exception: return Falseasync def check_url_reachable(url: str, timeout: float = 5.0) -> dict: """HEAD-request a URL to check if it's reachable.""" if not is_valid_url(url): return {"url": url, "valid": False, "reason": "malformed URL"} try: async with aiohttp.ClientSession() as session: async with session.head(url, timeout=aiohttp.ClientTimeout(total=timeout), allow_redirects=True) as resp: return { "url": url, "valid": resp.status < 400, "status": resp.status, } except asyncio.TimeoutError: return {"url": url, "valid": False, "reason": "timeout"} except Exception as e: return {"url": url, "valid": False, "reason": str(e)}async def validate_citations(search_results: list) -> list[dict]: """Validate all citation URLs from search results concurrently.""" tasks = [check_url_reachable(r.url) for r in search_results] return await asyncio.gather(*tasks)# Usage after getting a response:# results = asyncio.run(validate_citations(search_results))# for r in results:# status = "OK" if r["valid"] else f"FAILED ({r.get('reason', r.get('status'))})"# print(f" {r['url']}: {status}")
Never ask the model to generate source URLs. Always use the search_results output from the API response. Model-generated URLs can be hallucinated. The search results contain verified URLs from real web searches.
For chat UIs, it’s useful to show a live citation counter as text streams in, then render the full reference list once the stream completes.
import osimport reimport sysfrom openai import OpenAIclient = OpenAI( api_key=os.environ["PERPLEXITY_API_KEY"], base_url="https://api.perplexity.ai/v1",)def stream_with_progress(query: str): """Stream a response with a live citation counter.""" stream = client.responses.create( input=query, stream=True, extra_body={"preset": "fast-search"}, ) full_content = "" search_results = [] seen_refs = set() for event in stream: if event.type == "response.reasoning.search_results": search_results = event.results if event.type == "response.output_text.delta": full_content += event.delta sys.stdout.write(event.delta) sys.stdout.flush() # Track new citation references against accumulated text # (individual deltas may split [N] across chunks) current_refs = set(int(m) for m in re.findall(r"\[(\d+)\]", full_content)) if current_refs - seen_refs: seen_refs = current_refs sys.stdout.write(f" [📚 {len(seen_refs)} sources]") sys.stdout.flush() # Final summary print(f"\n\n{'='*60}") print(f"Response complete: {len(search_results)} sources found, {len(seen_refs)} cited") print(f"{'='*60}") # Build URL map from search results url_map = {r["id"]: r for r in search_results} for ref_id in sorted(seen_refs): if ref_id in url_map: r = url_map[ref_id] print(f" ✓ [{ref_id}] {r['title']} — {r['url']}") return full_content, search_resultscontent, results = stream_with_progress( "What are the environmental impacts of lithium mining?")
The Agent API returns a search_results output item with rich metadata (id, title, snippet, URL, date) for each source. This is richer than a flat URL list — use it to build source cards, sidebars, or detailed reference sections.
from perplexity import Perplexityclient = Perplexity()# Non-streaming request to show the full response structureresponse = client.responses.create( preset="fast-search", input="What is the current state of fusion energy research?",)content = response.output_text# Extract search results from the outputsearch_results = []for item in response.output: if item.type == "search_results": search_results = item.results breakprint("--- Answer ---")print(content)print("\n--- Search Results (rich metadata) ---")for result in search_results: print(f" [{result.id}] {result.title}") print(f" URL: {result.url}") print(f" Date: {result.date}") print(f" Snippet: {result.snippet[:100]}...") print()
Each search result includes id, title, url, snippet, and date. The id maps directly to the [N] references in the text. Use this to build rich source cards for your UI.
Use a search-enabled preset like fast-search or pro-search for citation-rich responses. Different presets use different citation formats — fast-search uses [1], while pro-search uses [web:1].
Collect search results before processing text. During streaming, response.reasoning.search_results events arrive before text deltas. Buffer them so you have the URL map ready when citations appear.
Use the id field to map citations. Each search result has a numeric id that corresponds to the [N] reference in the text.
Validate URLs before displaying them. Use HEAD requests with timeouts to filter out any unreachable sources.
Never generate your own URLs. Use only the search_results from the API response. Model-generated URLs can be hallucinated.
Handle missing references gracefully. If a [N] reference in the text exceeds the number of search results, display the reference number without a link rather than crashing.
Consider rate limiting for URL validation. If the response includes many sources, validate them with concurrency limits to avoid overwhelming target servers.