FinOps Automation: Kubecost, OpenCost, and Rightsizing
Cloud costs grow faster than revenue. FinOps brings financial accountability to engineering. This guide covers automated cost tracking, allocation, and rightsizing.
TL;DR
- OpenCost/Kubecost = cost allocation per namespace/team
- Automatic rightsizing recommendations
- Showback/chargeback by team
- Slack alerts for cost anomalies
- Terraform/GitOps integration
Install OpenCost
helm repo add opencost https://opencost.github.io/opencost-helm-chart
helm upgrade --install opencost opencost/opencost \
--namespace opencost --create-namespace \
--set opencost.prometheus.external.url=http://prometheus.monitoring:9090
Install Kubecost
helm repo add kubecost https://kubecost.github.io/cost-analyzer/
helm upgrade --install kubecost kubecost/cost-analyzer \
--namespace kubecost --create-namespace \
--set prometheus.server.enabled=false \
--set prometheus.kube-state-metrics.enabled=false \
--set prometheus.nodeExporter.enabled=false \
--set global.prometheus.enabled=true \
--set global.prometheus.fqdn=http://prometheus.monitoring:9090
Cost Allocation Labels
# Require cost allocation labels
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: require-cost-labels
spec:
validationFailureAction: Enforce
rules:
- name: require-team-and-env
match:
resources:
kinds:
- Deployment
- StatefulSet
validate:
message: "Labels 'team' and 'environment' are required for cost allocation"
pattern:
metadata:
labels:
team: "?*"
environment: "?*"
API Usage
# Namespace costs (last 7 days)
curl -s "http://kubecost.monitoring/model/allocation?window=7d&aggregate=namespace" | jq
# Team costs
curl -s "http://kubecost.monitoring/model/allocation?window=30d&aggregate=label:team" | jq
# Idle costs
curl -s "http://kubecost.monitoring/model/allocation?window=7d&aggregate=namespace&idle=true" | jq
# Savings recommendations
curl -s "http://kubecost.monitoring/model/savings" | jq
Slack Alerts
# Kubecost alert configuration
apiVersion: v1
kind: ConfigMap
metadata:
name: kubecost-alerts
namespace: kubecost
data:
alerts.yaml: |
alerts:
- name: daily-spend-anomaly
type: budget
threshold: 500 # $500/day
window: 1d
aggregation: namespace
filter: namespace!~"kube-system|monitoring"
slackWebhookUrl: https://hooks.slack.com/services/xxx
- name: efficiency-alert
type: efficiency
threshold: 0.5 # Alert if <50% efficient
window: 7d
aggregation: namespace
slackWebhookUrl: https://hooks.slack.com/services/xxx
- name: cluster-spend
type: budget
threshold: 10000 # $10k/month
window: 30d
aggregation: cluster
slackWebhookUrl: https://hooks.slack.com/services/xxx
Rightsizing Automation
# VPA recommendations
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: api-server-vpa
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: api-server
updatePolicy:
updateMode: "Off" # Recommendations only
resourcePolicy:
containerPolicies:
- containerName: '*'
minAllowed:
cpu: 50m
memory: 64Mi
maxAllowed:
cpu: 4
memory: 8Gi
Apply Rightsizing
#!/bin/bash
# rightsizing-report.sh
# Get VPA recommendations
for vpa in $(kubectl get vpa -A -o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name} {end}'); do
NS=$(echo $vpa | cut -d/ -f1)
NAME=$(echo $vpa | cut -d/ -f2)
CURRENT=$(kubectl get vpa -n $NS $NAME -o jsonpath='{.status.recommendation.containerRecommendations[0].target}')
echo "VPA: $NS/$NAME"
echo "Recommended: $CURRENT"
echo "---"
done
Grafana Dashboards
{
"panels": [
{
"title": "Cost by Namespace (30d)",
"type": "piechart",
"targets": [
{
"expr": "sum(kubecost_allocation_cost{window=\"30d\"}) by (namespace)",
"legendFormat": "{{ namespace }}"
}
]
},
{
"title": "Cost by Team (30d)",
"type": "piechart",
"targets": [
{
"expr": "sum(kubecost_allocation_cost{window=\"30d\"}) by (team)",
"legendFormat": "{{ team }}"
}
]
},
{
"title": "Daily Spend Trend",
"type": "timeseries",
"targets": [
{
"expr": "sum(kubecost_allocation_cost{window=\"1d\"})",
"legendFormat": "Daily Cost"
}
]
},
{
"title": "Idle Resources (%)",
"type": "gauge",
"targets": [
{
"expr": "sum(kubecost_allocation_cpu_idle_cost) / sum(kubecost_allocation_cpu_cost) * 100",
"legendFormat": "CPU Idle %"
}
]
}
]
}
Terraform Integration
# Enforce resource requests/limits
resource "kubectl_manifest" "cost_policy" {
yaml_body = <<EOF
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: require-requests-limits
spec:
validationFailureAction: Enforce
rules:
- name: require-resources
match:
resources:
kinds:
- Pod
validate:
message: "Resource requests and limits are required"
pattern:
spec:
containers:
- resources:
requests:
cpu: "?*"
memory: "?*"
limits:
memory: "?*"
EOF
}
# Tag resources for allocation
resource "aws_resourcegroups_group" "team_resources" {
for_each = toset(var.teams)
name = "team-${each.key}"
resource_query {
query = jsonencode({
ResourceTypeFilters = ["AWS::AllSupported"]
TagFilters = [
{
Key = "team"
Values = [each.key]
}
]
})
}
}
Weekly Cost Report
#!/usr/bin/env python3
import requests
import json
from datetime import datetime
KUBECOST_URL = "http://kubecost.monitoring"
SLACK_WEBHOOK = "https://hooks.slack.com/services/xxx"
def get_costs():
resp = requests.get(f"{KUBECOST_URL}/model/allocation?window=7d&aggregate=namespace")
return resp.json()
def get_recommendations():
resp = requests.get(f"{KUBECOST_URL}/model/savings")
return resp.json()
def send_slack_report(costs, recommendations):
total = sum([ns.get('totalCost', 0) for ns in costs.get('data', [])])
potential_savings = recommendations.get('totalSavings', 0)
blocks = [
{
"type": "header",
"text": {"type": "plain_text", "text": f"๐ Weekly Cost Report - {datetime.now().strftime('%Y-%m-%d')}"}
},
{
"type": "section",
"fields": [
{"type": "mrkdwn", "text": f"*Total Spend (7d):* ${total:.2f}"},
{"type": "mrkdwn", "text": f"*Potential Savings:* ${potential_savings:.2f}"}
]
}
]
# Top 5 namespaces
top_ns = sorted(costs.get('data', []), key=lambda x: x.get('totalCost', 0), reverse=True)[:5]
ns_text = "\n".join([f"โข {ns['namespace']}: ${ns['totalCost']:.2f}" for ns in top_ns])
blocks.append({
"type": "section",
"text": {"type": "mrkdwn", "text": f"*Top 5 Namespaces:*\n{ns_text}"}
})
requests.post(SLACK_WEBHOOK, json={"blocks": blocks})
if __name__ == "__main__":
costs = get_costs()
recs = get_recommendations()
send_slack_report(costs, recs)
References
- OpenCost: https://www.opencost.io
- Kubecost: https://docs.kubecost.com
- FinOps Foundation: https://www.finops.org