Skip to content

JSON Output

All DataSpoc CLI commands support --output json for machine-readable output. Use this in shell scripts, CI/CD pipelines, or any automation tool that can parse JSON.

Terminal window
dataspoc-lens catalog --output json
{
"tables": [
{
"name": "raw.my_source.orders",
"row_count": 125000,
"columns": 12,
"last_updated": "2025-01-17T14:30:00Z",
"size_bytes": 4521984
},
{
"name": "raw.my_source.customers",
"row_count": 8500,
"columns": 8,
"last_updated": "2025-01-17T14:30:00Z",
"size_bytes": 312576
}
]
}
Terminal window
dataspoc-lens query "SELECT customer, SUM(revenue) as total FROM raw.my_source.orders GROUP BY customer ORDER BY total DESC LIMIT 3" --output json
{
"columns": ["customer", "total"],
"rows": [
["Globex Inc", 28000],
["Acme Corp", 19200],
["Initech", 12000]
],
"row_count": 3,
"elapsed_ms": 42
}
Terminal window
dataspoc-lens ask "top customers by revenue" --output json
{
"question": "top customers by revenue",
"sql": "SELECT customer, SUM(revenue) as total_revenue FROM raw.my_source.orders GROUP BY customer ORDER BY total_revenue DESC LIMIT 10",
"columns": ["customer", "total_revenue"],
"rows": [
["Globex Inc", 28000],
["Acme Corp", 19200],
["Initech", 12000]
],
"row_count": 3,
"elapsed_ms": 187
}
Terminal window
dataspoc-lens cache --list --output json
{
"tables": [
{
"name": "raw.my_source.orders",
"cached": true,
"stale": false,
"cache_size_bytes": 4521984,
"cached_at": "2025-01-17T14:30:00Z",
"source_updated_at": "2025-01-17T14:30:00Z"
},
{
"name": "raw.my_source.customers",
"cached": true,
"stale": true,
"cache_size_bytes": 312576,
"cached_at": "2025-01-16T10:00:00Z",
"source_updated_at": "2025-01-17T14:30:00Z"
}
]
}
Terminal window
dataspoc-pipe status --output json
{
"pipelines": [
{
"name": "my-source",
"status": "success",
"last_run": "2025-01-17T14:30:00Z",
"rows_synced": 125000,
"tables": 5,
"duration_seconds": 45
}
]
}
Terminal window
dataspoc-pipe logs my-source --output json
{
"pipeline": "my-source",
"entries": [
{
"timestamp": "2025-01-17T14:30:00Z",
"level": "info",
"message": "Starting pipeline my-source"
},
{
"timestamp": "2025-01-17T14:30:15Z",
"level": "info",
"message": "Extracted 125000 rows from orders"
},
{
"timestamp": "2025-01-17T14:30:45Z",
"level": "info",
"message": "Pipeline completed successfully"
}
]
}
Terminal window
dataspoc-pipe manifest --output json
{
"version": "1.0",
"bucket": "s3://my-data",
"tables": [
{
"path": "raw/my-source/orders",
"format": "parquet",
"row_count": 125000,
"partitions": ["dt"],
"schema": {
"columns": [
{"name": "order_id", "type": "int64"},
{"name": "customer", "type": "string"},
{"name": "revenue", "type": "float64"},
{"name": "dt", "type": "date"}
]
}
}
]
}
Terminal window
dataspoc-pipe validate my-source --output json
{
"pipeline": "my-source",
"valid": true,
"errors": [],
"warnings": [
"Table 'legacy_orders' has no primary key configured"
]
}
Terminal window
# Get row count for a specific table
dataspoc-lens catalog --output json | jq '.tables[] | select(.name == "raw.my_source.orders") | .row_count'
# Check if any pipeline failed
dataspoc-pipe status --output json | jq '.pipelines[] | select(.status == "failed") | .name'
# Run pipeline only if validation passes
if dataspoc-pipe validate my-source --output json | jq -e '.valid' > /dev/null; then
dataspoc-pipe run my-source
fi
import json
import subprocess
result = subprocess.run(
["dataspoc-lens", "catalog", "--output", "json"],
capture_output=True,
text=True,
)
catalog = json.loads(result.stdout)
for table in catalog["tables"]:
print(f"{table['name']}: {table['row_count']} rows")