Skip to content
Back to blog FinOps Automation: Kubecost, OpenCost, and Automated Rightsizing

FinOps Automation: Kubecost, OpenCost, and Automated Rightsizing

DevOpsK8s

FinOps Automation: Kubecost, OpenCost, and Rightsizing

Cloud costs grow faster than revenue. FinOps brings financial accountability to engineering. This guide covers automated cost tracking, allocation, and rightsizing.

TL;DR

  • OpenCost/Kubecost = cost allocation per namespace/team
  • Automatic rightsizing recommendations
  • Showback/chargeback by team
  • Slack alerts for cost anomalies
  • Terraform/GitOps integration

Install OpenCost

helm repo add opencost https://opencost.github.io/opencost-helm-chart
helm upgrade --install opencost opencost/opencost \
  --namespace opencost --create-namespace \
  --set opencost.prometheus.external.url=http://prometheus.monitoring:9090

Install Kubecost

helm repo add kubecost https://kubecost.github.io/cost-analyzer/
helm upgrade --install kubecost kubecost/cost-analyzer \
  --namespace kubecost --create-namespace \
  --set prometheus.server.enabled=false \
  --set prometheus.kube-state-metrics.enabled=false \
  --set prometheus.nodeExporter.enabled=false \
  --set global.prometheus.enabled=true \
  --set global.prometheus.fqdn=http://prometheus.monitoring:9090

Cost Allocation Labels

# Require cost allocation labels
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
  name: require-cost-labels
spec:
  validationFailureAction: Enforce
  rules:
    - name: require-team-and-env
      match:
        resources:
          kinds:
            - Deployment
            - StatefulSet
      validate:
        message: "Labels 'team' and 'environment' are required for cost allocation"
        pattern:
          metadata:
            labels:
              team: "?*"
              environment: "?*"

API Usage

# Namespace costs (last 7 days)
curl -s "http://kubecost.monitoring/model/allocation?window=7d&aggregate=namespace" | jq

# Team costs
curl -s "http://kubecost.monitoring/model/allocation?window=30d&aggregate=label:team" | jq

# Idle costs
curl -s "http://kubecost.monitoring/model/allocation?window=7d&aggregate=namespace&idle=true" | jq

# Savings recommendations
curl -s "http://kubecost.monitoring/model/savings" | jq

Slack Alerts

# Kubecost alert configuration
apiVersion: v1
kind: ConfigMap
metadata:
  name: kubecost-alerts
  namespace: kubecost
data:
  alerts.yaml: |
    alerts:
      - name: daily-spend-anomaly
        type: budget
        threshold: 500  # $500/day
        window: 1d
        aggregation: namespace
        filter: namespace!~"kube-system|monitoring"
        slackWebhookUrl: https://hooks.slack.com/services/xxx
        
      - name: efficiency-alert
        type: efficiency
        threshold: 0.5  # Alert if <50% efficient
        window: 7d
        aggregation: namespace
        slackWebhookUrl: https://hooks.slack.com/services/xxx
        
      - name: cluster-spend
        type: budget
        threshold: 10000  # $10k/month
        window: 30d
        aggregation: cluster
        slackWebhookUrl: https://hooks.slack.com/services/xxx

Rightsizing Automation

# VPA recommendations
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
  name: api-server-vpa
spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: api-server
  updatePolicy:
    updateMode: "Off"  # Recommendations only
  resourcePolicy:
    containerPolicies:
      - containerName: '*'
        minAllowed:
          cpu: 50m
          memory: 64Mi
        maxAllowed:
          cpu: 4
          memory: 8Gi

Apply Rightsizing

#!/bin/bash
# rightsizing-report.sh

# Get VPA recommendations
for vpa in $(kubectl get vpa -A -o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name} {end}'); do
  NS=$(echo $vpa | cut -d/ -f1)
  NAME=$(echo $vpa | cut -d/ -f2)
  
  CURRENT=$(kubectl get vpa -n $NS $NAME -o jsonpath='{.status.recommendation.containerRecommendations[0].target}')
  
  echo "VPA: $NS/$NAME"
  echo "Recommended: $CURRENT"
  echo "---"
done

Grafana Dashboards

{
  "panels": [
    {
      "title": "Cost by Namespace (30d)",
      "type": "piechart",
      "targets": [
        {
          "expr": "sum(kubecost_allocation_cost{window=\"30d\"}) by (namespace)",
          "legendFormat": "{{ namespace }}"
        }
      ]
    },
    {
      "title": "Cost by Team (30d)",
      "type": "piechart", 
      "targets": [
        {
          "expr": "sum(kubecost_allocation_cost{window=\"30d\"}) by (team)",
          "legendFormat": "{{ team }}"
        }
      ]
    },
    {
      "title": "Daily Spend Trend",
      "type": "timeseries",
      "targets": [
        {
          "expr": "sum(kubecost_allocation_cost{window=\"1d\"})",
          "legendFormat": "Daily Cost"
        }
      ]
    },
    {
      "title": "Idle Resources (%)",
      "type": "gauge",
      "targets": [
        {
          "expr": "sum(kubecost_allocation_cpu_idle_cost) / sum(kubecost_allocation_cpu_cost) * 100",
          "legendFormat": "CPU Idle %"
        }
      ]
    }
  ]
}

Terraform Integration

# Enforce resource requests/limits
resource "kubectl_manifest" "cost_policy" {
  yaml_body = <<EOF
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
  name: require-requests-limits
spec:
  validationFailureAction: Enforce
  rules:
    - name: require-resources
      match:
        resources:
          kinds:
            - Pod
      validate:
        message: "Resource requests and limits are required"
        pattern:
          spec:
            containers:
              - resources:
                  requests:
                    cpu: "?*"
                    memory: "?*"
                  limits:
                    memory: "?*"
EOF
}

# Tag resources for allocation
resource "aws_resourcegroups_group" "team_resources" {
  for_each = toset(var.teams)
  
  name = "team-${each.key}"
  
  resource_query {
    query = jsonencode({
      ResourceTypeFilters = ["AWS::AllSupported"]
      TagFilters = [
        {
          Key    = "team"
          Values = [each.key]
        }
      ]
    })
  }
}

Weekly Cost Report

#!/usr/bin/env python3
import requests
import json
from datetime import datetime

KUBECOST_URL = "http://kubecost.monitoring"
SLACK_WEBHOOK = "https://hooks.slack.com/services/xxx"

def get_costs():
    resp = requests.get(f"{KUBECOST_URL}/model/allocation?window=7d&aggregate=namespace")
    return resp.json()

def get_recommendations():
    resp = requests.get(f"{KUBECOST_URL}/model/savings")
    return resp.json()

def send_slack_report(costs, recommendations):
    total = sum([ns.get('totalCost', 0) for ns in costs.get('data', [])])
    potential_savings = recommendations.get('totalSavings', 0)
    
    blocks = [
        {
            "type": "header",
            "text": {"type": "plain_text", "text": f"๐Ÿ“Š Weekly Cost Report - {datetime.now().strftime('%Y-%m-%d')}"}
        },
        {
            "type": "section",
            "fields": [
                {"type": "mrkdwn", "text": f"*Total Spend (7d):* ${total:.2f}"},
                {"type": "mrkdwn", "text": f"*Potential Savings:* ${potential_savings:.2f}"}
            ]
        }
    ]
    
    # Top 5 namespaces
    top_ns = sorted(costs.get('data', []), key=lambda x: x.get('totalCost', 0), reverse=True)[:5]
    ns_text = "\n".join([f"โ€ข {ns['namespace']}: ${ns['totalCost']:.2f}" for ns in top_ns])
    
    blocks.append({
        "type": "section",
        "text": {"type": "mrkdwn", "text": f"*Top 5 Namespaces:*\n{ns_text}"}
    })
    
    requests.post(SLACK_WEBHOOK, json={"blocks": blocks})

if __name__ == "__main__":
    costs = get_costs()
    recs = get_recommendations()
    send_slack_report(costs, recs)

References

======================================== FinOps + Kubecost + Automation

Track costs. Optimize automatically.

Found this helpful?

Comments