Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ ifeq ($(GIT_DIFF), 1)
GIT_TREESTATE = "dirty"
endif
BUILDDATE = $(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
BUILDMODE?="MANUAL"

LDFLAGS = "-X github.com/gocrane/crane/pkg/version.gitTag=$(GIT_VERSION) \
-X github.com/gocrane/crane/pkg/version.gitCommit=$(GIT_COMMIT_HASH) \
Expand Down Expand Up @@ -119,19 +120,19 @@ images: image-craned image-crane-agent image-metric-adapter image-dashboard

.PHONY: image-craned
image-craned: ## Build docker image with the crane manager.
docker build --build-arg LDFLAGS=$(LDFLAGS) --build-arg PKGNAME=craned -t ${MANAGER_IMG} .
docker build --build-arg BUILD=$(BUILDMODE) --build-arg LDFLAGS=$(LDFLAGS) --build-arg PKGNAME=craned -t ${MANAGER_IMG} .

.PHONY: image-dashboard
image-dashboard: ## Build docker image with the crane dashboard.
docker build --build-arg LDFLAGS=$(LDFLAGS) --build-arg PKGNAME=web -t ${DASHBOARD_IMG} ./pkg/web
docker build --build-arg BUILD=$(BUILDMODE) --build-arg LDFLAGS=$(LDFLAGS) --build-arg PKGNAME=web -t ${DASHBOARD_IMG} ./pkg/web

.PHONY: image-crane-agent
image-crane-agent: ## Build docker image with the crane agent.
docker build --build-arg LDFLAGS=$(LDFLAGS) --build-arg PKGNAME=crane-agent -t ${AGENT_IMG} .
docker build --build-arg BUILD=$(BUILDMODE) --build-arg LDFLAGS=$(LDFLAGS) --build-arg PKGNAME=crane-agent -t ${AGENT_IMG} .

.PHONY: image-metric-adapter
image-metric-adapter: ## Build docker image with the metric adapter.
docker build --build-arg LDFLAGS=$(LDFLAGS) --build-arg PKGNAME=metric-adapter -t ${ADAPTER_IMG} .
docker build --build-arg BUILD=$(BUILDMODE) --build-arg LDFLAGS=$(LDFLAGS) --build-arg PKGNAME=metric-adapter -t ${ADAPTER_IMG} .

.PHONY: push-images
push-images: push-image-craned push-image-crane-agent push-image-metric-adapter push-image-dashboard
Expand Down
3 changes: 3 additions & 0 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ func NewAgent(ctx context.Context,
if err != nil {
return agent, err
}
legacyregistry.CustomMustRegister(metrics.NewNodeResourceCollector(nodeName, nodeInformer.Lister(), nodeResourceManager.GetResource))
managers = appendManagerIfNotNil(managers, nodeResourceManager)
}

Expand All @@ -134,6 +135,8 @@ func NewAgent(ctx context.Context,

agent.managers = managers

legacyregistry.CustomMustRegister(metrics.NewPodResourceCollector(podInformer.Lister()))

return agent, nil
}

Expand Down
3 changes: 1 addition & 2 deletions pkg/ensurance/collector/cadvisor/cadvisor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func NewCadvisorManager(cgroupDriver string) Manager {
var includedMetrics = cadvisorcontainer.MetricSet{
cadvisorcontainer.CpuUsageMetrics: struct{}{},
cadvisorcontainer.ProcessSchedulerMetrics: struct{}{},
cadvisorcontainer.MemoryUsageMetrics: struct{}{},
}

allowDynamic := true
Expand Down Expand Up @@ -162,11 +163,9 @@ func (c *CadvisorCollector) Collect() (map[string][]common.TimeSeries, error) {
klog.Errorf("ContainerInfoRequest failed: %v", err)
continue
}

if hasExtMemRes && v.Stats[0].Memory != nil {
extResMemUse += float64(v.Stats[0].Memory.WorkingSet)
}

hasExtRes := hasExtCpuRes || hasExtMemRes
var containerLabels = GetContainerLabels(pod, containerId, containerName, hasExtRes)
if v.Stats[0].Memory != nil {
Expand Down
15 changes: 15 additions & 0 deletions pkg/known/types.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package known

import "k8s.io/apimachinery/pkg/api/resource"

type Module string

const (
Expand All @@ -9,3 +11,16 @@ const (
ModuleNodeResourceManager Module = "ModuleNodeResourceManager"
ModulePodResourceManager Module = "ModulePodResourceManager"
)

type ResourceStatus struct {
CPUReserved *resource.Quantity
CPUUsage *resource.Quantity
CPUUsageOffline *resource.Quantity
CPUSetIdle *resource.Quantity
MemoryReserved *resource.Quantity
MemoryUsage *resource.Quantity
MemoryUsageOffline *resource.Quantity

CPUReservedTSP *resource.Quantity
MemoryReservedTSP *resource.Quantity
}
16 changes: 15 additions & 1 deletion pkg/known/vars.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package known

import "os"
import (
"os"

corev1 "k8s.io/api/core/v1"
)

var (
CraneSystemNamespace = "crane-system"
Expand All @@ -11,3 +15,13 @@ func init() {
CraneSystemNamespace = namespace
}
}

const (
// ElasticResourcePrefix is crane resource namespace prefix.
ElasticResourcePrefix = "gocrane.io/"
)

var (
ElasticCPU = ElasticResourcePrefix + corev1.ResourceCPU
ElasticMemory = ElasticResourcePrefix + corev1.ResourceMemory
)
221 changes: 221 additions & 0 deletions pkg/metrics/node.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
package metrics

import (
"github.com/gocrane/crane/pkg/known"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/labels"
v1 "k8s.io/client-go/listers/core/v1"
k8smetrics "k8s.io/component-base/metrics"
"k8s.io/klog/v2"
)

const (
CraneNodeSubsystem = "node"
CranePodSubsystem = "pod"
)

var (
podElasticCPUDesc = k8smetrics.NewDesc("crane_pod_elastic_cpu_request",
"The elastic cpu requested by pod",
[]string{"pod", "namespace"},
nil,
k8smetrics.ALPHA,
"",
)
podElasticMemoryDesc = k8smetrics.NewDesc("crane_pod_elastic_memory_request",
"The elastic cpu requested by pod",
[]string{"pod", "namespace"},
nil,
k8smetrics.ALPHA,
"",
)
)

func NewPodResourceCollector(podLister v1.PodLister) *PodResourceCollector {
return &PodResourceCollector{
podLister: podLister,
}
}

type PodResourceCollector struct {
k8smetrics.BaseStableCollector
podLister v1.PodLister
}

func (n *PodResourceCollector) DescribeWithStability(descs chan<- *k8smetrics.Desc) {
descs <- podElasticCPUDesc
descs <- podElasticMemoryDesc
}

func (n *PodResourceCollector) CollectWithStability(metrics chan<- k8smetrics.Metric) {
pods, err := n.podLister.List(labels.Everything())
if err != nil {
klog.ErrorS(err, "list pods failed")
return
}

for _, pod := range pods {
if pod.Status.Phase != corev1.PodRunning {
continue
}
eCPU, eMemory := resource.NewQuantity(0, resource.DecimalSI), resource.NewQuantity(0, resource.BinarySI)
for _, container := range pod.Spec.Containers {
eCPU.Add(*container.Resources.Requests.Name(known.ElasticCPU, resource.DecimalSI))
eMemory.Add(*container.Resources.Requests.Name(known.ElasticMemory, resource.DecimalSI))
}
if eCPU.IsZero() && eMemory.IsZero() {
continue
}
metrics <- k8smetrics.NewLazyConstMetric(podElasticCPUDesc, k8smetrics.GaugeValue, eCPU.AsApproximateFloat64(), pod.Name, pod.Namespace)
metrics <- k8smetrics.NewLazyConstMetric(podElasticMemoryDesc, k8smetrics.GaugeValue, eMemory.AsApproximateFloat64(), pod.Name, pod.Namespace)
}
}

var (
nodeElasticCPUDesc = k8smetrics.NewDesc("crane_node_elastic_cpu_allocatable",
"The elastic cpu of the node.",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeElasticMemoryDesc = k8smetrics.NewDesc("crane_node_elastic_memory_allocatable",
"The elastic memory requested by pod",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeCPUAllocatableDesc = k8smetrics.NewDesc("crane_node_cpu_allocatable",
"The cpu allocatable of the node.",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeCPUCapacityDesc = k8smetrics.NewDesc("crane_node_cpu_capacity",
"The cpu capacity of the node.",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeMemoryAllocatableDesc = k8smetrics.NewDesc("crane_node_memory_allocatable",
"The memory allocatable requested by pod",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeMemoryCapacityDesc = k8smetrics.NewDesc("crane_node_memory_capacity",
"The memory capacity requested by pod",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeCPUReservedDesc = k8smetrics.NewDesc("crane_node_cpu_reserved",
"The reserved cpu of node",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeCPUUsageOnlineDesc = k8smetrics.NewDesc("crane_node_cpu_usage_online",
"The online cpu usage of node",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeCPUUsageOfflineDesc = k8smetrics.NewDesc("crane_node_cpu_usage_offline",
"The offline cpu usage of node",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeMemoryReservedDesc = k8smetrics.NewDesc("crane_node_memory_reserved",
"The reserved memory of node",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeMemoryUsageOnlineDesc = k8smetrics.NewDesc("crane_node_memory_usage_online",
"The online memory usage of node",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
nodeMemoryUsageOfflineDesc = k8smetrics.NewDesc("crane_node_memory_usage_offline",
"The offline memory usage of node",
[]string{"node"},
nil,
k8smetrics.ALPHA,
"",
)
)

type NodeResourceCollector struct {
k8smetrics.BaseStableCollector
nodeName string
nodeLister v1.NodeLister
nodeResourceGetter func() *known.ResourceStatus
}

func NewNodeResourceCollector(nodeName string, nodeLister v1.NodeLister, nodeResourceGetter func() *known.ResourceStatus) *NodeResourceCollector {
return &NodeResourceCollector{
nodeName: nodeName,
nodeLister: nodeLister,
nodeResourceGetter: nodeResourceGetter,
}
}

func (n *NodeResourceCollector) DescribeWithStability(descs chan<- *k8smetrics.Desc) {
// resource metrics from status of node
descs <- nodeElasticCPUDesc
descs <- nodeElasticMemoryDesc
descs <- nodeCPUAllocatableDesc
descs <- nodeCPUCapacityDesc
descs <- nodeMemoryAllocatableDesc
descs <- nodeMemoryCapacityDesc

// usage metrics
descs <- nodeCPUReservedDesc
descs <- nodeCPUUsageOnlineDesc
descs <- nodeCPUUsageOfflineDesc
descs <- nodeMemoryReservedDesc
descs <- nodeMemoryUsageOnlineDesc
descs <- nodeMemoryUsageOfflineDesc
}

func (n *NodeResourceCollector) CollectWithStability(metrics chan<- k8smetrics.Metric) {
node, err := n.nodeLister.Get(n.nodeName)
if err != nil {
klog.ErrorS(err, "list pods failed")
return
}
metrics <- k8smetrics.NewLazyConstMetric(nodeElasticCPUDesc, k8smetrics.GaugeValue, node.Status.Allocatable.Name(known.ElasticCPU, resource.DecimalSI).AsApproximateFloat64(), node.Name)
metrics <- k8smetrics.NewLazyConstMetric(nodeElasticMemoryDesc, k8smetrics.GaugeValue, node.Status.Allocatable.Name(known.ElasticMemory, resource.BinarySI).AsApproximateFloat64(), node.Name)
metrics <- k8smetrics.NewLazyConstMetric(nodeCPUAllocatableDesc, k8smetrics.GaugeValue, node.Status.Allocatable.Cpu().AsApproximateFloat64(), node.Name)
metrics <- k8smetrics.NewLazyConstMetric(nodeMemoryAllocatableDesc, k8smetrics.GaugeValue, node.Status.Allocatable.Memory().AsApproximateFloat64(), node.Name)
metrics <- k8smetrics.NewLazyConstMetric(nodeCPUCapacityDesc, k8smetrics.GaugeValue, node.Status.Capacity.Cpu().AsApproximateFloat64(), node.Name)
metrics <- k8smetrics.NewLazyConstMetric(nodeMemoryCapacityDesc, k8smetrics.GaugeValue, node.Status.Capacity.Memory().AsApproximateFloat64(), node.Name)

resourceStatus := n.nodeResourceGetter()
if resourceStatus == nil {
return
}
metrics <- k8smetrics.NewLazyConstMetric(nodeCPUReservedDesc, k8smetrics.GaugeValue, resourceStatus.CPUReserved.AsApproximateFloat64(), node.Name)
// TODO incorrect online define !!
metrics <- k8smetrics.NewLazyConstMetric(nodeCPUUsageOnlineDesc, k8smetrics.GaugeValue, resourceStatus.CPUUsage.AsApproximateFloat64(), node.Name)
metrics <- k8smetrics.NewLazyConstMetric(nodeCPUUsageOfflineDesc, k8smetrics.GaugeValue, resourceStatus.CPUUsageOffline.AsApproximateFloat64(), node.Name)

metrics <- k8smetrics.NewLazyConstMetric(nodeMemoryReservedDesc, k8smetrics.GaugeValue, resourceStatus.MemoryReserved.AsApproximateFloat64(), node.Name)
// TODO incorrect online define !!
metrics <- k8smetrics.NewLazyConstMetric(nodeMemoryUsageOnlineDesc, k8smetrics.GaugeValue, resourceStatus.MemoryUsage.AsApproximateFloat64(), node.Name)
metrics <- k8smetrics.NewLazyConstMetric(nodeMemoryUsageOfflineDesc, k8smetrics.GaugeValue, resourceStatus.MemoryUsageOffline.AsApproximateFloat64(), node.Name)
}
Loading
Loading