autogenmulti-agentanalyticspythonai-agents

Multi-Agent Analytics with AutoGen and DataSpoc

Michael San Martim · 2026-04-19

One agent that can query data is useful. Three agents that collaborate — one fetching data, one analyzing patterns, one writing reports — that is a team. AutoGen makes multi-agent orchestration simple. DataSpoc Lens gives those agents a data lake to work with.

The Architecture

User Request: "Analyze Q1 sales performance and write a report"
    ↓
┌─────────────────────────────────────────┐
│           AutoGen Group Chat            │
│                                         │
│  DataQuerier → runs SQL via LensClient  │
│       ↓                                 │
│  DataAnalyst → interprets + visualizes  │
│       ↓                                 │
│  ReportWriter → generates markdown      │
└─────────────────────────────────────────┘
    ↓
Final Report (markdown + charts)

Install Dependencies

pip install dataspoc-lens pyautogen matplotlib pandas

Define the Agents

Each agent has a distinct role and access to specific tools:

import json
import autogen
from dataspoc_lens import LensClient

lens = LensClient()

# LLM configuration
llm_config = {
    "config_list": [{"model": "gpt-4o", "api_key": "sk-..."}],
    "temperature": 0,
}

Agent 1: DataQuerier

This agent’s job is to fetch data from the lake. It has access to LensClient methods:

data_querier = autogen.AssistantAgent(
    name="DataQuerier",
    system_message="""You are a data retrieval specialist. Your job is to:
1. List available tables when asked
2. Check table schemas before writing queries
3. Write and execute SQL queries to fetch the data other agents need
4. Return clean, well-formatted results

Always check the schema before writing SQL. Never guess column names.
When returning data, include column headers and format numbers clearly.""",
    llm_config=llm_config,
)


@data_querier.register_for_execution()
@data_querier.register_for_llm(description="List all tables in the data lake")
def list_tables() -> str:
    tables = lens.tables()
    return json.dumps({"tables": tables})


@data_querier.register_for_execution()
@data_querier.register_for_llm(description="Get schema for a table")
def get_schema(table: str) -> str:
    schema = lens.schema(table)
    return json.dumps({"table": table, "schema": schema})


@data_querier.register_for_execution()
@data_querier.register_for_llm(description="Execute a SQL query against the data lake")
def run_query(sql: str) -> str:
    df = lens.query(sql)
    return json.dumps({
        "columns": list(df.columns),
        "rows": df.head(100).to_dict(orient="records"),
        "total_rows": len(df),
    })

Agent 2: DataAnalyst

This agent interprets data and generates visualizations:

data_analyst = autogen.AssistantAgent(
    name="DataAnalyst",
    system_message="""You are a data analyst. Your job is to:
1. Interpret query results from the DataQuerier
2. Identify trends, anomalies, and key insights
3. Generate Python code for charts when visualization helps
4. Provide statistical context (growth rates, comparisons, averages)

When you receive data, analyze it thoroughly before passing to the ReportWriter.
Use specific numbers. Calculate percentages. Highlight what matters.""",
    llm_config=llm_config,
)


@data_analyst.register_for_execution()
@data_analyst.register_for_llm(description="Ask a natural language question about the data")
def ask_data(question: str) -> str:
    answer = lens.ask(question)
    return json.dumps({"answer": str(answer)})


@data_analyst.register_for_execution()
@data_analyst.register_for_llm(description="Generate and save a chart from data")
def create_chart(
    chart_type: str,
    title: str,
    x_data: list,
    y_data: list,
    x_label: str,
    y_label: str,
    filename: str,
) -> str:
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt

    fig, ax = plt.subplots(figsize=(10, 6))

    if chart_type == "bar":
        ax.bar(x_data, y_data)
    elif chart_type == "line":
        ax.plot(x_data, y_data, marker="o")
    elif chart_type == "pie":
        ax.pie(y_data, labels=x_data, autopct="%1.1f%%")

    ax.set_title(title)
    if chart_type != "pie":
        ax.set_xlabel(x_label)
        ax.set_ylabel(y_label)
        plt.xticks(rotation=45, ha="right")

    plt.tight_layout()
    plt.savefig(filename, dpi=150)
    plt.close()
    return json.dumps({"chart_saved": filename})

Agent 3: ReportWriter

This agent produces the final markdown report:

report_writer = autogen.AssistantAgent(
    name="ReportWriter",
    system_message="""You are a business report writer. Your job is to:
1. Take insights from the DataAnalyst and write a clear markdown report
2. Structure the report with executive summary, key findings, and recommendations
3. Include specific numbers and reference any charts generated
4. Write for a business audience — no jargon, clear takeaways

Output the report in markdown format. Start with an executive summary.
Use tables for comparative data. End with actionable recommendations.""",
    llm_config=llm_config,
)

Set Up the Group Chat

AutoGen’s GroupChat lets agents talk to each other in sequence:

# User proxy to initiate the conversation
user_proxy = autogen.UserProxyAgent(
    name="UserProxy",
    human_input_mode="NEVER",  # fully autonomous
    max_consecutive_auto_reply=0,
    code_execution_config=False,
)

# Create the group chat
groupchat = autogen.GroupChat(
    agents=[user_proxy, data_querier, data_analyst, report_writer],
    messages=[],
    max_round=15,
    speaker_selection_method="auto",
)

manager = autogen.GroupChatManager(
    groupchat=groupchat,
    llm_config=llm_config,
)

Run the Team

user_proxy.initiate_chat(
    manager,
    message="""Analyze Q1 2026 sales performance. I need:
1. Total revenue by month (Jan, Feb, Mar)
2. Top 5 regions by revenue
3. Month-over-month growth rates
4. A bar chart of revenue by region
5. A final report with executive summary and recommendations""",
)

What Happens During Execution

The agents collaborate through the group chat. Here is a simplified trace:

UserProxy → "Analyze Q1 2026 sales performance..."

DataQuerier → list_tables()
  Result: ["curated_sales", "curated_customers", "gold_revenue", ...]

DataQuerier → get_schema("curated_sales")
  Result: {sale_date: DATE, amount: DOUBLE, region: VARCHAR, ...}

DataQuerier → run_query("""
    SELECT DATE_TRUNC('month', sale_date) AS month,
           SUM(amount) AS revenue,
           COUNT(*) AS orders
    FROM curated_sales
    WHERE sale_date >= '2026-01-01' AND sale_date < '2026-04-01'
    GROUP BY month ORDER BY month
""")
  Result: [{month: "2026-01", revenue: 3200000, orders: 12400}, ...]

DataQuerier → run_query("""
    SELECT region, SUM(amount) AS revenue
    FROM curated_sales
    WHERE sale_date >= '2026-01-01' AND sale_date < '2026-04-01'
    GROUP BY region ORDER BY revenue DESC LIMIT 5
""")
  Result: [{region: "North America", revenue: 4100000}, ...]

DataAnalyst → "Monthly revenue shows consistent growth:
  Jan: $3.2M, Feb: $3.5M (+9.4%), Mar: $3.8M (+8.6%).
  Q1 total: $10.5M. North America leads at $4.1M (39%)."

DataAnalyst → create_chart(
    chart_type="bar",
    title="Q1 2026 Revenue by Region",
    x_data=["North America", "Europe", "APAC", "LATAM", "MEA"],
    y_data=[4100000, 2900000, 1800000, 1200000, 500000],
    x_label="Region", y_label="Revenue ($)",
    filename="q1_revenue_by_region.png"
)
  Result: {"chart_saved": "q1_revenue_by_region.png"}

ReportWriter → [generates full markdown report]

The Generated Report

The ReportWriter produces something like this:

# Q1 2026 Sales Performance Report

## Executive Summary

Q1 2026 delivered $10.5M in total revenue across 38,200 orders,
representing 18.2% growth over Q4 2025. All three months showed
positive month-over-month growth, with a consistent 8-9% monthly
increase.

## Monthly Performance

| Month | Revenue | Orders | MoM Growth |
|-------|---------|--------|------------|
| January | $3,200,000 | 12,400 | — |
| February | $3,500,000 | 13,100 | +9.4% |
| March | $3,800,000 | 12,700 | +8.6% |

## Regional Breakdown

![Revenue by Region](q1_revenue_by_region.png)

| Region | Revenue | Share |
|--------|---------|-------|
| North America | $4,100,000 | 39.0% |
| Europe | $2,900,000 | 27.6% |
| Asia Pacific | $1,800,000 | 17.1% |
| Latin America | $1,200,000 | 11.4% |
| Middle East & Africa | $500,000 | 4.8% |

## Recommendations

1. **Double down on APAC** — 17% share with highest growth rate
   suggests untapped potential
2. **Investigate March order count** — revenue grew but orders
   dipped, indicating higher average order value
3. **Set Q2 target at $12M** — maintaining 8% MoM growth trajectory

Complete Working Script

#!/usr/bin/env python3
"""Multi-agent analytics team using AutoGen + DataSpoc Lens."""

import json
import autogen
from dataspoc_lens import LensClient

lens = LensClient()

llm_config = {
    "config_list": [{"model": "gpt-4o", "api_key": "sk-..."}],
    "temperature": 0,
}

# --- Agent Definitions ---

data_querier = autogen.AssistantAgent(
    name="DataQuerier",
    system_message="You fetch data from the lake. Always check schemas before querying. Return clean results.",
    llm_config=llm_config,
)

data_analyst = autogen.AssistantAgent(
    name="DataAnalyst",
    system_message="You analyze data: identify trends, calculate growth rates, create charts. Use specific numbers.",
    llm_config=llm_config,
)

report_writer = autogen.AssistantAgent(
    name="ReportWriter",
    system_message="You write markdown reports. Executive summary, key findings, tables, recommendations.",
    llm_config=llm_config,
)

user_proxy = autogen.UserProxyAgent(
    name="UserProxy",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=0,
    code_execution_config=False,
)

# --- Tool Registration ---

@data_querier.register_for_execution()
@data_querier.register_for_llm(description="List all tables")
def list_tables() -> str:
    return json.dumps({"tables": lens.tables()})

@data_querier.register_for_execution()
@data_querier.register_for_llm(description="Get table schema")
def get_schema(table: str) -> str:
    return json.dumps({"schema": lens.schema(table)})

@data_querier.register_for_execution()
@data_querier.register_for_llm(description="Run SQL query")
def run_query(sql: str) -> str:
    df = lens.query(sql)
    return json.dumps({
        "columns": list(df.columns),
        "rows": df.head(100).to_dict(orient="records"),
        "total_rows": len(df),
    })

@data_analyst.register_for_execution()
@data_analyst.register_for_llm(description="Ask natural language question about data")
def ask_data(question: str) -> str:
    return json.dumps({"answer": str(lens.ask(question))})

@data_analyst.register_for_execution()
@data_analyst.register_for_llm(description="Create a chart")
def create_chart(chart_type: str, title: str, x_data: list, y_data: list,
                 x_label: str, y_label: str, filename: str) -> str:
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt

    fig, ax = plt.subplots(figsize=(10, 6))
    if chart_type == "bar":
        ax.bar(x_data, y_data)
    elif chart_type == "line":
        ax.plot(x_data, y_data, marker="o")
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.savefig(filename, dpi=150)
    plt.close()
    return json.dumps({"chart_saved": filename})

# --- Group Chat ---

groupchat = autogen.GroupChat(
    agents=[user_proxy, data_querier, data_analyst, report_writer],
    messages=[],
    max_round=15,
    speaker_selection_method="auto",
)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)

# --- Run ---

if __name__ == "__main__":
    user_proxy.initiate_chat(
        manager,
        message="Analyze Q1 2026 sales. Show monthly revenue, top regions, growth rates. Create charts. Write a report.",
    )

Run it:

export OPENAI_API_KEY="sk-..."
python multi_agent_analytics.py

The agents will collaborate autonomously, and you will see the full conversation in your terminal. The final output is a markdown report with charts — built by three AI agents working together, powered by real SQL against your data lake.

← Back to blog