Analytics Multi-Agente com AutoGen e DataSpoc
Um agente que pode consultar dados é útil. Três agentes que colaboram — um buscando dados, um analisando padrões, um escrevendo relatórios — isso é um time. O AutoGen torna a orquestração multi-agente simples. O DataSpoc Lens dá a esses agentes um data lake para trabalhar.
A Arquitetura
User Request: "Analyze Q1 sales performance and write a report" ↓┌─────────────────────────────────────────┐│ AutoGen Group Chat ││ ││ DataQuerier → runs SQL via LensClient ││ ↓ ││ DataAnalyst → interprets + visualizes ││ ↓ ││ ReportWriter → generates markdown │└─────────────────────────────────────────┘ ↓Final Report (markdown + charts)Instalar Dependências
pip install dataspoc-lens pyautogen matplotlib pandasDefinir os Agentes
Cada agente tem um papel distinto e acesso a ferramentas específicas:
import jsonimport autogenfrom dataspoc_lens import LensClient
lens = LensClient()
# LLM configurationllm_config = { "config_list": [{"model": "gpt-4o", "api_key": "sk-..."}], "temperature": 0,}Agente 1: DataQuerier
O trabalho deste agente é buscar dados do lake. Ele tem acesso aos métodos do LensClient:
data_querier = autogen.AssistantAgent( name="DataQuerier", system_message="""You are a data retrieval specialist. Your job is to:1. List available tables when asked2. Check table schemas before writing queries3. Write and execute SQL queries to fetch the data other agents need4. Return clean, well-formatted results
Always check the schema before writing SQL. Never guess column names.When returning data, include column headers and format numbers clearly.""", llm_config=llm_config,)
@data_querier.register_for_execution()@data_querier.register_for_llm(description="List all tables in the data lake")def list_tables() -> str: tables = lens.tables() return json.dumps({"tables": tables})
@data_querier.register_for_execution()@data_querier.register_for_llm(description="Get schema for a table")def get_schema(table: str) -> str: schema = lens.schema(table) return json.dumps({"table": table, "schema": schema})
@data_querier.register_for_execution()@data_querier.register_for_llm(description="Execute a SQL query against the data lake")def run_query(sql: str) -> str: df = lens.query(sql) return json.dumps({ "columns": list(df.columns), "rows": df.head(100).to_dict(orient="records"), "total_rows": len(df), })Agente 2: DataAnalyst
Este agente interpreta dados e gera visualizações:
data_analyst = autogen.AssistantAgent( name="DataAnalyst", system_message="""You are a data analyst. Your job is to:1. Interpret query results from the DataQuerier2. Identify trends, anomalies, and key insights3. Generate Python code for charts when visualization helps4. Provide statistical context (growth rates, comparisons, averages)
When you receive data, analyze it thoroughly before passing to the ReportWriter.Use specific numbers. Calculate percentages. Highlight what matters.""", llm_config=llm_config,)
@data_analyst.register_for_execution()@data_analyst.register_for_llm(description="Ask a natural language question about the data")def ask_data(question: str) -> str: answer = lens.ask(question) return json.dumps({"answer": str(answer)})
@data_analyst.register_for_execution()@data_analyst.register_for_llm(description="Generate and save a chart from data")def create_chart( chart_type: str, title: str, x_data: list, y_data: list, x_label: str, y_label: str, filename: str,) -> str: import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(10, 6))
if chart_type == "bar": ax.bar(x_data, y_data) elif chart_type == "line": ax.plot(x_data, y_data, marker="o") elif chart_type == "pie": ax.pie(y_data, labels=x_data, autopct="%1.1f%%")
ax.set_title(title) if chart_type != "pie": ax.set_xlabel(x_label) ax.set_ylabel(y_label) plt.xticks(rotation=45, ha="right")
plt.tight_layout() plt.savefig(filename, dpi=150) plt.close() return json.dumps({"chart_saved": filename})Agente 3: ReportWriter
Este agente produz o relatório final em markdown:
report_writer = autogen.AssistantAgent( name="ReportWriter", system_message="""You are a business report writer. Your job is to:1. Take insights from the DataAnalyst and write a clear markdown report2. Structure the report with executive summary, key findings, and recommendations3. Include specific numbers and reference any charts generated4. Write for a business audience — no jargon, clear takeaways
Output the report in markdown format. Start with an executive summary.Use tables for comparative data. End with actionable recommendations.""", llm_config=llm_config,)Configurar o Group Chat
O GroupChat do AutoGen permite que os agentes conversem entre si em sequência:
# User proxy to initiate the conversationuser_proxy = autogen.UserProxyAgent( name="UserProxy", human_input_mode="NEVER", # fully autonomous max_consecutive_auto_reply=0, code_execution_config=False,)
# Create the group chatgroupchat = autogen.GroupChat( agents=[user_proxy, data_querier, data_analyst, report_writer], messages=[], max_round=15, speaker_selection_method="auto",)
manager = autogen.GroupChatManager( groupchat=groupchat, llm_config=llm_config,)Executar o Time
user_proxy.initiate_chat( manager, message="""Analyze Q1 2026 sales performance. I need:1. Total revenue by month (Jan, Feb, Mar)2. Top 5 regions by revenue3. Month-over-month growth rates4. A bar chart of revenue by region5. A final report with executive summary and recommendations""",)O Que Acontece Durante a Execução
Os agentes colaboram através do group chat. Aqui está um trace simplificado:
UserProxy → "Analyze Q1 2026 sales performance..."
DataQuerier → list_tables() Result: ["curated_sales", "curated_customers", "gold_revenue", ...]
DataQuerier → get_schema("curated_sales") Result: {sale_date: DATE, amount: DOUBLE, region: VARCHAR, ...}
DataQuerier → run_query(""" SELECT DATE_TRUNC('month', sale_date) AS month, SUM(amount) AS revenue, COUNT(*) AS orders FROM curated_sales WHERE sale_date >= '2026-01-01' AND sale_date < '2026-04-01' GROUP BY month ORDER BY month""") Result: [{month: "2026-01", revenue: 3200000, orders: 12400}, ...]
DataQuerier → run_query(""" SELECT region, SUM(amount) AS revenue FROM curated_sales WHERE sale_date >= '2026-01-01' AND sale_date < '2026-04-01' GROUP BY region ORDER BY revenue DESC LIMIT 5""") Result: [{region: "North America", revenue: 4100000}, ...]
DataAnalyst → "Monthly revenue shows consistent growth: Jan: $3.2M, Feb: $3.5M (+9.4%), Mar: $3.8M (+8.6%). Q1 total: $10.5M. North America leads at $4.1M (39%)."
DataAnalyst → create_chart( chart_type="bar", title="Q1 2026 Revenue by Region", x_data=["North America", "Europe", "APAC", "LATAM", "MEA"], y_data=[4100000, 2900000, 1800000, 1200000, 500000], x_label="Region", y_label="Revenue ($)", filename="q1_revenue_by_region.png") Result: {"chart_saved": "q1_revenue_by_region.png"}
ReportWriter → [generates full markdown report]O Relatório Gerado
O ReportWriter produz algo assim:
# Q1 2026 Sales Performance Report
## Executive Summary
Q1 2026 delivered $10.5M in total revenue across 38,200 orders,representing 18.2% growth over Q4 2025. All three months showedpositive month-over-month growth, with a consistent 8-9% monthlyincrease.
## Monthly Performance
| Month | Revenue | Orders | MoM Growth ||-------|---------|--------|------------|| January | $3,200,000 | 12,400 | — || February | $3,500,000 | 13,100 | +9.4% || March | $3,800,000 | 12,700 | +8.6% |
## Regional Breakdown

| Region | Revenue | Share ||--------|---------|-------|| North America | $4,100,000 | 39.0% || Europe | $2,900,000 | 27.6% || Asia Pacific | $1,800,000 | 17.1% || Latin America | $1,200,000 | 11.4% || Middle East & Africa | $500,000 | 4.8% |
## Recommendations
1. **Double down on APAC** — 17% share with highest growth rate suggests untapped potential2. **Investigate March order count** — revenue grew but orders dipped, indicating higher average order value3. **Set Q2 target at $12M** — maintaining 8% MoM growth trajectoryScript Completo Funcional
#!/usr/bin/env python3"""Multi-agent analytics team using AutoGen + DataSpoc Lens."""
import jsonimport autogenfrom dataspoc_lens import LensClient
lens = LensClient()
llm_config = { "config_list": [{"model": "gpt-4o", "api_key": "sk-..."}], "temperature": 0,}
# --- Agent Definitions ---
data_querier = autogen.AssistantAgent( name="DataQuerier", system_message="You fetch data from the lake. Always check schemas before querying. Return clean results.", llm_config=llm_config,)
data_analyst = autogen.AssistantAgent( name="DataAnalyst", system_message="You analyze data: identify trends, calculate growth rates, create charts. Use specific numbers.", llm_config=llm_config,)
report_writer = autogen.AssistantAgent( name="ReportWriter", system_message="You write markdown reports. Executive summary, key findings, tables, recommendations.", llm_config=llm_config,)
user_proxy = autogen.UserProxyAgent( name="UserProxy", human_input_mode="NEVER", max_consecutive_auto_reply=0, code_execution_config=False,)
# --- Tool Registration ---
@data_querier.register_for_execution()@data_querier.register_for_llm(description="List all tables")def list_tables() -> str: return json.dumps({"tables": lens.tables()})
@data_querier.register_for_execution()@data_querier.register_for_llm(description="Get table schema")def get_schema(table: str) -> str: return json.dumps({"schema": lens.schema(table)})
@data_querier.register_for_execution()@data_querier.register_for_llm(description="Run SQL query")def run_query(sql: str) -> str: df = lens.query(sql) return json.dumps({ "columns": list(df.columns), "rows": df.head(100).to_dict(orient="records"), "total_rows": len(df), })
@data_analyst.register_for_execution()@data_analyst.register_for_llm(description="Ask natural language question about data")def ask_data(question: str) -> str: return json.dumps({"answer": str(lens.ask(question))})
@data_analyst.register_for_execution()@data_analyst.register_for_llm(description="Create a chart")def create_chart(chart_type: str, title: str, x_data: list, y_data: list, x_label: str, y_label: str, filename: str) -> str: import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(10, 6)) if chart_type == "bar": ax.bar(x_data, y_data) elif chart_type == "line": ax.plot(x_data, y_data, marker="o") ax.set_title(title) ax.set_xlabel(x_label) ax.set_ylabel(y_label) plt.xticks(rotation=45, ha="right") plt.tight_layout() plt.savefig(filename, dpi=150) plt.close() return json.dumps({"chart_saved": filename})
# --- Group Chat ---
groupchat = autogen.GroupChat( agents=[user_proxy, data_querier, data_analyst, report_writer], messages=[], max_round=15, speaker_selection_method="auto",)manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)
# --- Run ---
if __name__ == "__main__": user_proxy.initiate_chat( manager, message="Analyze Q1 2026 sales. Show monthly revenue, top regions, growth rates. Create charts. Write a report.", )Execute:
export OPENAI_API_KEY="sk-..."python multi_agent_analytics.pyOs agentes irão colaborar de forma autônoma, e você verá a conversa completa no seu terminal. A saída final é um relatório em markdown com gráficos — construído por três agentes de IA trabalhando juntos, alimentados por SQL real contra seu data lake.