diff --git a/README.md b/README.md
index ae07bae..4b1c518 100644
--- a/README.md
+++ b/README.md
@@ -188,9 +188,21 @@ go build -o kagent-tools .
 
 The server runs using sse transport for MCP communication.
 
+#### CLI Flags
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--port`, `-p` | `8084` | Port to run the MCP server on |
+| `--metrics-port` | `8084` | Port to run the Prometheus metrics server on |
+| `--stdio` | `false` | Use stdio for communication instead of HTTP |
+| `--tools` | `[]` (all) | Comma-separated list of tool providers to register |
+| `--read-only` | `false` | Disable tools that perform write operations |
+| `--kubeconfig` | `""` | Path to kubeconfig file (defaults to in-cluster config) |
+| `--version`, `-v` | `false` | Show version information and exit |
+
 ### Testing
 ```bash
-go test -v
+go test -v ./...
 ```
 
 ## Tool Implementation Details
@@ -243,6 +255,25 @@ Tools can be configured through environment variables:
 - `GRAFANA_URL`: Default Grafana server URL
 - `GRAFANA_API_KEY`: Default Grafana API key
 
+## Observability
+
+The MCP server exposes Prometheus metrics on a configurable HTTP endpoint (`/metrics`). By default, the metrics endpoint runs on the same port as the MCP server. To run it on a separate port:
+
+```bash
+./kagent-tools --port 8084 --metrics-port 9090
+```
+
+### Exposed Metrics
+
+| Metric | Type | Labels | Description |
+|--------|------|--------|-------------|
+| `kagent_tools_mcp_server_info` | Gauge | `server_name`, `version`, `git_commit`, `build_date`, `server_mode` | Server metadata (always set to 1) |
+| `kagent_tools_mcp_registered_tools` | Gauge | `tool_name`, `tool_provider` | Set to 1 for each registered tool |
+| `kagent_tools_mcp_invocations_total` | Counter | `tool_name`, `tool_provider` | Total number of tool invocations |
+| `kagent_tools_mcp_invocations_failure_total` | Counter | `tool_name`, `tool_provider` | Total number of failed tool invocations |
+
+Standard Go runtime and process metrics are also included (goroutines, memory, CPU, file descriptors, etc.).
+
 ## Error Handling and Debugging
 
 The tools provide detailed error messages and support verbose output. When debugging issues:
@@ -258,9 +289,8 @@ Potential areas for future improvement:
 1. **Native Client Libraries**: Replace CLI calls with native Go client libraries where possible
 2. **Advanced Documentation Search**: Implement full vector search for documentation queries
 3. **Caching**: Add caching for frequently accessed data
-4. **Metrics and Observability**: Add metrics and tracing for tool usage
-5. **Configuration Management**: Enhanced configuration management and validation
-6. **Parallel Execution**: Support for parallel execution of related operations
+4. **Configuration Management**: Enhanced configuration management and validation
+5. **Parallel Execution**: Support for parallel execution of related operations
 
 ## Contributing
 
diff --git a/cmd/main.go b/cmd/main.go
index 374d7ee..943b7db 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"os/signal"
 	"runtime"
+	"strconv"
 	"strings"
 	"sync"
 	"syscall"
@@ -15,6 +16,7 @@ import (
 
 	"github.com/joho/godotenv"
 	"github.com/kagent-dev/tools/internal/logger"
+	"github.com/kagent-dev/tools/internal/metrics"
 	"github.com/kagent-dev/tools/internal/telemetry"
 	"github.com/kagent-dev/tools/internal/version"
 	"github.com/kagent-dev/tools/pkg/argo"
@@ -25,16 +27,19 @@ import (
 	"github.com/kagent-dev/tools/pkg/kubescape"
 	"github.com/kagent-dev/tools/pkg/prometheus"
 	"github.com/kagent-dev/tools/pkg/utils"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
 	"github.com/spf13/cobra"
 	"go.opentelemetry.io/otel"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/codes"
 
+	"github.com/mark3labs/mcp-go/mcp"
 	"github.com/mark3labs/mcp-go/server"
 )
 
 var (
 	port        int
+	metricsPort int
 	stdio       bool
 	tools       []string
 	kubeconfig  *string
@@ -56,6 +61,7 @@ var rootCmd = &cobra.Command{
 
 func init() {
 	rootCmd.Flags().IntVarP(&port, "port", "p", 8084, "Port to run the server on")
+	rootCmd.Flags().IntVarP(&metricsPort, "metrics-port", "m", 0, "Port to run the metrics server on (default 0: same as --port)")
 	rootCmd.Flags().BoolVar(&stdio, "stdio", false, "Use stdio for communication instead of HTTP")
 	rootCmd.Flags().StringSliceVar(&tools, "tools", []string{}, "List of tools to register. If empty, all tools are registered.")
 	rootCmd.Flags().BoolVarP(&showVersion, "version", "v", false, "Show version information and exit")
@@ -92,6 +98,11 @@ func run(cmd *cobra.Command, args []string) {
 		return
 	}
 
+	// 0 means "same as --port" - resolve it before any server logic uses it
+	if metricsPort == 0 {
+		metricsPort = port
+	}
+
 	logger.Init(stdio)
 	defer logger.Sync()
 
@@ -134,8 +145,11 @@ func run(cmd *cobra.Command, args []string) {
 		Version,
 	)
 
-	// Register tools
-	registerMCP(mcp, tools, *kubeconfig, readOnly)
+	// Register tools and wrap handlers with metrics instrumentation.
+	// registerMCP returns a map of tool_name -> tool_provider so that
+	// wrapToolHandlersWithMetrics knows which provider each tool belongs to.
+	toolProviders := registerMCP(mcp, tools, *kubeconfig, readOnly)
+	wrapToolHandlersWithMetrics(mcp, toolProviders)
 
 	// Create wait group for server goroutines
 	var wg sync.WaitGroup
@@ -146,6 +160,7 @@ func run(cmd *cobra.Command, args []string) {
 
 	// HTTP server reference (only used when not in stdio mode)
 	var httpServer *http.Server
+	var metricsServer *http.Server // Separate server for metrics if metricsPort is different from main port
 
 	// Start server based on chosen mode
 	wg.Add(1)
@@ -170,17 +185,40 @@ func run(cmd *cobra.Command, args []string) {
 			}
 		})
 
-		// Add metrics endpoint (basic implementation for e2e tests)
-		mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
-			w.Header().Set("Content-Type", "text/plain")
-			w.WriteHeader(http.StatusOK)
-
-			// Generate real runtime metrics instead of hardcoded values
-			metrics := generateRuntimeMetrics()
-			if err := writeResponse(w, []byte(metrics)); err != nil {
-				logger.Get().Error("Failed to write metrics response", "error", err)
+		// Add metrics endpoint
+		registry := metrics.InitServer() // Initialize Prometheus metrics before starting the server
+
+		if metricsPort != port { // Only start a separate metrics server if the metrics port is different from the main server port
+			// Create the metrics server outside the goroutine to avoid a race condition
+			// between the goroutine assigning metricsServer and the shutdown handler reading it
+			metricsMux := http.NewServeMux()
+			metricsMux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
+			metricsServer = &http.Server{
+				Addr:    fmt.Sprintf(":%d", metricsPort),
+				Handler: metricsMux,
 			}
-		})
+
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				logger.Get().Info("Starting Prometheus metrics endpoint on /metrics", "port", strconv.Itoa(metricsPort))
+				if err := metricsServer.ListenAndServe(); err != nil {
+					if !errors.Is(err, http.ErrServerClosed) {
+						logger.Get().Error("Metrics endpoint failed", "error", err)
+					} else {
+						logger.Get().Info("Metrics server closed gracefully.")
+					}
+				}
+			}()
+		} else {
+			logger.Get().Info("Starting Prometheus metrics endpoint on /metrics", "port", strconv.Itoa(port))
+			mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
+		}
+		serverMode := "read-write"
+		if readOnly {
+			serverMode = "read-only"
+		}
+		metrics.KagentToolsMCPServerInfo.WithLabelValues(Name, Version, GitCommit, BuildDate, serverMode).Set(1)
 
 		// Handle all other routes with the MCP server wrapped in telemetry middleware
 		mux.Handle("/", telemetry.HTTPMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -229,6 +267,19 @@ func run(cmd *cobra.Command, args []string) {
 				rootSpan.AddEvent("server.shutdown.completed")
 			}
 		}
+
+		// Gracefully shutdown metrics server if running separately
+		if !stdio && metricsServer != nil {
+			shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second)
+			defer shutdownCancel()
+
+			if err := metricsServer.Shutdown(shutdownCtx); err != nil {
+				logger.Get().Error("Failed to shutdown metrics server gracefully", "error", err)
+				rootSpan.RecordError(err)
+			} else {
+				logger.Get().Info("Metrics server shutdown completed")
+			}
+		}
 	}()
 
 	// Wait for all server operations to complete
@@ -242,47 +293,6 @@ func writeResponse(w http.ResponseWriter, data []byte) error {
 	return err
 }
 
-// generateRuntimeMetrics generates real runtime metrics for the /metrics endpoint
-func generateRuntimeMetrics() string {
-	var m runtime.MemStats
-	runtime.ReadMemStats(&m)
-
-	now := time.Now().Unix()
-
-	// Build metrics in Prometheus format
-	metrics := strings.Builder{}
-
-	// Go runtime info
-	metrics.WriteString("# HELP go_info Information about the Go environment.\n")
-	metrics.WriteString("# TYPE go_info gauge\n")
-	metrics.WriteString(fmt.Sprintf("go_info{version=\"%s\"} 1\n", runtime.Version()))
-
-	// Process start time
-	metrics.WriteString("# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.\n")
-	metrics.WriteString("# TYPE process_start_time_seconds gauge\n")
-	metrics.WriteString(fmt.Sprintf("process_start_time_seconds %d\n", now))
-
-	// Memory metrics
-	metrics.WriteString("# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use.\n")
-	metrics.WriteString("# TYPE go_memstats_alloc_bytes gauge\n")
-	metrics.WriteString(fmt.Sprintf("go_memstats_alloc_bytes %d\n", m.Alloc))
-
-	metrics.WriteString("# HELP go_memstats_total_alloc_bytes Total number of bytes allocated, even if freed.\n")
-	metrics.WriteString("# TYPE go_memstats_total_alloc_bytes counter\n")
-	metrics.WriteString(fmt.Sprintf("go_memstats_total_alloc_bytes %d\n", m.TotalAlloc))
-
-	metrics.WriteString("# HELP go_memstats_sys_bytes Number of bytes obtained from system.\n")
-	metrics.WriteString("# TYPE go_memstats_sys_bytes gauge\n")
-	metrics.WriteString(fmt.Sprintf("go_memstats_sys_bytes %d\n", m.Sys))
-
-	// Goroutine count
-	metrics.WriteString("# HELP go_goroutines Number of goroutines that currently exist.\n")
-	metrics.WriteString("# TYPE go_goroutines gauge\n")
-	metrics.WriteString(fmt.Sprintf("go_goroutines %d\n", runtime.NumGoroutine()))
-
-	return metrics.String()
-}
-
 func runStdioServer(ctx context.Context, mcp *server.MCPServer) {
 	logger.Get().Info("Running KAgent Tools Server STDIO:", "tools", strings.Join(tools, ","))
 	stdioServer := server.NewStdioServer(mcp)
@@ -291,7 +301,11 @@ func runStdioServer(ctx context.Context, mcp *server.MCPServer) {
 	}
 }
 
-func registerMCP(mcp *server.MCPServer, enabledToolProviders []string, kubeconfig string, readOnly bool) {
+// registerMCP registers tool providers with the MCP server and returns a mapping
+// of tool_name -> tool_provider. This mapping is built using the ListTools() diff
+// technique: we snapshot the tool list before and after each provider registers,
+// so we know exactly which tools belong to which provider.
+func registerMCP(mcp *server.MCPServer, enabledToolProviders []string, kubeconfig string, readOnly bool) map[string]string {
 	// A map to hold tool providers and their registration functions
 	toolProviderMap := map[string]func(*server.MCPServer){
 		"argo":       func(s *server.MCPServer) { argo.RegisterTools(s, readOnly) },
@@ -310,11 +324,83 @@ func registerMCP(mcp *server.MCPServer, enabledToolProviders []string, kubeconfi
 			enabledToolProviders = append(enabledToolProviders, name)
 		}
 	}
+
+	// toolToProvider maps each tool name to its provider (e.g., "kubectl_get" -> "k8s").
+	// This is used later by wrapToolHandlersWithMetrics to set the correct tool_provider label.
+	toolToProvider := make(map[string]string)
+
 	for _, toolProviderName := range enabledToolProviders {
 		if registerFunc, ok := toolProviderMap[toolProviderName]; ok {
+			// Snapshot the tool list before this provider registers its tools.
+			// We need this because ListTools() returns ALL tools from ALL providers,
+			// so the only way to know which tools belong to THIS provider is to compare
+			// the list before and after registration.
+			toolsBefore := mcp.ListTools()
+
 			registerFunc(mcp)
+
+			// Determine which tools were just registered by this provider
+			// by finding tools that exist now but didn't exist before.
+			// Record each one in Prometheus so we can observe the full tool inventory.
+			for toolName := range mcp.ListTools() {
+				if _, existed := toolsBefore[toolName]; !existed {
+					metrics.KagentToolsMCPRegisteredTools.WithLabelValues(toolName, toolProviderName).Set(1)
+					toolToProvider[toolName] = toolProviderName
+				}
+			}
 		} else {
 			logger.Get().Error("Unknown tool specified", "provider", toolProviderName)
 		}
 	}
+
+	return toolToProvider
+}
+
+// wrapToolHandlersWithMetrics applies the wrapper/middleware pattern to instrument
+// all registered MCP tool handlers with Prometheus invocation counters.
+//
+// How it works:
+//  1. Grab all registered tools from the MCP server using ListTools()
+//  2. For each tool, wrap its handler with a function that increments metrics
+//  3. Replace all tools in the MCP server using SetTools()
+//
+// The wrapper function:
+//   - Increments kagent_tools_mcp_invocations_total on every call
+//   - Increments kagent_tools_mcp_invocations_failure_total when the handler returns a
+//     non-nil Go error OR when result.IsError is true (the MCP convention for tool-level
+//     failures - handlers return NewToolResultError(...), nil, not a Go error)
+//   - Calls the original handler unchanged - the tool's behaviour is not affected
+//
+// This uses the standard middleware/decorator pattern: the original handler and the
+// wrapped handler have the same function signature, so they are interchangeable.
+// No changes are required in any pkg/ file - all instrumentation happens centrally here.
+func wrapToolHandlersWithMetrics(mcpServer *server.MCPServer, toolToProvider map[string]string) {
+	allTools := mcpServer.ListTools()
+	wrapped := make([]server.ServerTool, 0, len(allTools))
+
+	for name, st := range allTools {
+		originalHandler := st.Handler
+		toolName := name // capture for closure
+		provider := toolToProvider[toolName]
+
+		wrapped = append(wrapped, server.ServerTool{
+			Tool: st.Tool,
+			Handler: func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+				metrics.KagentToolsMCPInvocationsTotal.WithLabelValues(toolName, provider).Inc()
+
+				result, err := originalHandler(ctx, req)
+
+				// Count as failure if the Go error is non-nil OR if the tool returned
+				// a result with IsError=true (the MCP convention for tool-level failures,
+				// which always return nil for the Go error).
+				if err != nil || (result != nil && result.IsError) {
+					metrics.KagentToolsMCPInvocationsFailureTotal.WithLabelValues(toolName, provider).Inc()
+				}
+
+				return result, err
+			},
+		})
+	}
+
+	mcpServer.SetTools(wrapped...)
 }
diff --git a/cmd/metrics_wrap_test.go b/cmd/metrics_wrap_test.go
new file mode 100644
index 0000000..0b8ca73
--- /dev/null
+++ b/cmd/metrics_wrap_test.go
@@ -0,0 +1,127 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/kagent-dev/tools/internal/metrics"
+	"github.com/mark3labs/mcp-go/mcp"
+	"github.com/mark3labs/mcp-go/server"
+	promtest "github.com/prometheus/client_golang/prometheus/testutil"
+)
+
+// newTestServer creates a fresh MCP server and resets the metric counters so
+// tests do not interfere with each other.
+func newTestServer() *server.MCPServer {
+	metrics.KagentToolsMCPInvocationsTotal.Reset()
+	metrics.KagentToolsMCPInvocationsFailureTotal.Reset()
+	return server.NewMCPServer("test-server", "test")
+}
+
+// invokeWrapped registers handler on s, wraps all handlers with metrics, then
+// calls the wrapped handler for toolName and returns its result.
+func invokeWrapped(t *testing.T, s *server.MCPServer, toolName string, provider string, handler server.ToolHandlerFunc) (*mcp.CallToolResult, error) {
+	t.Helper()
+	s.AddTool(mcp.Tool{Name: toolName}, handler)
+	wrapToolHandlersWithMetrics(s, map[string]string{toolName: provider})
+	st, ok := s.ListTools()[toolName]
+	if !ok {
+		t.Fatalf("tool %q not found after wrapping", toolName)
+	}
+	return st.Handler(context.Background(), mcp.CallToolRequest{})
+}
+
+// TestWrapToolHandlersWithMetrics_IsErrorIncrementsFailureCounter is the
+// critical regression test for the bug identified in PR review:
+//
+//	Handlers signal tool-level failures via NewToolResultError(...), nil
+//	(result.IsError=true, Go error=nil), so checking only `err != nil` would
+//	never count these as failures.
+//
+// To replicate manually:
+//
+//	go test -v -run TestWrapToolHandlersWithMetrics_IsErrorIncrementsFailureCounter ./cmd/
+func TestWrapToolHandlersWithMetrics_IsErrorIncrementsFailureCounter(t *testing.T) {
+	s := newTestServer()
+
+	result, err := invokeWrapped(t, s, "failing_tool", "test",
+		func(_ context.Context, _ mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+			// This is the pattern used 214 times across pkg/ - returns a tool-level
+			// error with IsError=true but a nil Go error.
+			return mcp.NewToolResultError("kubectl: resource not found"), nil
+		},
+	)
+
+	if err != nil {
+		t.Fatalf("expected nil Go error from handler, got: %v", err)
+	}
+	if !result.IsError {
+		t.Fatal("expected result.IsError=true")
+	}
+
+	total := promtest.ToFloat64(metrics.KagentToolsMCPInvocationsTotal.WithLabelValues("failing_tool", "test"))
+	if total != 1 {
+		t.Errorf("invocations_total: expected 1, got %v", total)
+	}
+
+	failures := promtest.ToFloat64(metrics.KagentToolsMCPInvocationsFailureTotal.WithLabelValues("failing_tool", "test"))
+	if failures != 1 {
+		t.Errorf("invocations_failure_total: expected 1, got %v (IsError=true was not counted as failure)", failures)
+	}
+}
+
+// TestWrapToolHandlersWithMetrics_SuccessDoesNotIncrementFailureCounter verifies
+// that a successful tool call does not touch the failure counter.
+//
+// To replicate manually:
+//
+//	go test -v -run TestWrapToolHandlersWithMetrics_SuccessDoesNotIncrementFailureCounter ./cmd/
+func TestWrapToolHandlersWithMetrics_SuccessDoesNotIncrementFailureCounter(t *testing.T) {
+	s := newTestServer()
+
+	_, err := invokeWrapped(t, s, "success_tool", "test",
+		func(_ context.Context, _ mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+			return mcp.NewToolResultText("all good"), nil
+		},
+	)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	total := promtest.ToFloat64(metrics.KagentToolsMCPInvocationsTotal.WithLabelValues("success_tool", "test"))
+	if total != 1 {
+		t.Errorf("invocations_total: expected 1, got %v", total)
+	}
+
+	failures := promtest.ToFloat64(metrics.KagentToolsMCPInvocationsFailureTotal.WithLabelValues("success_tool", "test"))
+	if failures != 0 {
+		t.Errorf("invocations_failure_total: expected 0 for a successful call, got %v", failures)
+	}
+}
+
+// TestWrapToolHandlersWithMetrics_GoErrorIncrementsFailureCounter verifies
+// that a real Go error (e.g. infrastructure failure) is also counted.
+//
+// To replicate manually:
+//
+//	go test -v -run TestWrapToolHandlersWithMetrics_GoErrorIncrementsFailureCounter ./cmd/
+func TestWrapToolHandlersWithMetrics_GoErrorIncrementsFailureCounter(t *testing.T) {
+	s := newTestServer()
+
+	_, err := invokeWrapped(t, s, "broken_tool", "test",
+		func(_ context.Context, _ mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+			return nil, fmt.Errorf("connection refused")
+		},
+	)
+
+	if err == nil {
+		t.Fatal("expected a Go error, got nil")
+	}
+
+	failures := promtest.ToFloat64(metrics.KagentToolsMCPInvocationsFailureTotal.WithLabelValues("broken_tool", "test"))
+	if failures != 1 {
+		t.Errorf("invocations_failure_total: expected 1 for Go error, got %v", failures)
+	}
+}
diff --git a/dashboard/grafana-dash-example.png b/dashboard/grafana-dash-example.png
new file mode 100644
index 0000000..6ffe311
Binary files /dev/null and b/dashboard/grafana-dash-example.png differ
diff --git a/dashboard/grafana-dashboard.json b/dashboard/grafana-dashboard.json
new file mode 100644
index 0000000..801a052
--- /dev/null
+++ b/dashboard/grafana-dashboard.json
@@ -0,0 +1,819 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "id": 29,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 6,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "orientation": "auto",
+        "percentChangeColorMode": "standard",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "/^version$/",
+          "values": false
+        },
+        "showPercentChange": false,
+        "text": {},
+        "textMode": "value",
+        "wideLayout": true
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "kagent_tools_mcp_server_info",
+          "format": "table",
+          "instant": true,
+          "legendFormat": "__auto",
+          "range": false,
+          "refId": "A"
+        }
+      ],
+      "title": "Server Version",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "blue",
+                "value": 0
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 6,
+        "x": 6,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "colorMode": "background",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "percentChangeColorMode": "standard",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showPercentChange": false,
+        "textMode": "auto",
+        "wideLayout": true
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "count(kagent_tools_mcp_registered_tools)",
+          "instant": true,
+          "legendFormat": "Registered Tools",
+          "range": false,
+          "refId": "A"
+        }
+      ],
+      "title": "Total Registered Tools",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "yellow",
+                "value": 10
+              },
+              {
+                "color": "red",
+                "value": 100
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 6,
+        "x": 12,
+        "y": 0
+      },
+      "id": 3,
+      "options": {
+        "colorMode": "background",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "percentChangeColorMode": "standard",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showPercentChange": false,
+        "textMode": "auto",
+        "wideLayout": true
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "sum(increase(kagent_tools_mcp_invocations_total[5m]))",
+          "instant": true,
+          "legendFormat": "Total Invocations (5m)",
+          "range": false,
+          "refId": "A"
+        }
+      ],
+      "title": "Invocations (Last 5m)",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "max": 100,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "red",
+                "value": 0
+              },
+              {
+                "color": "yellow",
+                "value": 95
+              },
+              {
+                "color": "green",
+                "value": 99
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 6,
+        "x": 18,
+        "y": 0
+      },
+      "id": 4,
+      "options": {
+        "minVizHeight": 75,
+        "minVizWidth": 75,
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "sizing": "auto"
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "100 - (sum(rate(kagent_tools_mcp_invocations_failure_total[5m])) / sum(rate(kagent_tools_mcp_invocations_total[5m])) * 100)",
+          "instant": true,
+          "legendFormat": "Success Rate",
+          "range": false,
+          "refId": "A"
+        }
+      ],
+      "title": "Success Rate",
+      "type": "gauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "gradientMode": "opacity",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 4
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(kagent_tools_mcp_invocations_total[$__rate_interval])) by (tool_provider)",
+          "legendFormat": "{{tool_provider}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Invocation Rate by Provider",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "gradientMode": "opacity",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "normal"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Failures"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "red",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 4
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(kagent_tools_mcp_invocations_total[$__rate_interval]))",
+          "legendFormat": "Total",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(kagent_tools_mcp_invocations_failure_total[$__rate_interval]))",
+          "hide": false,
+          "legendFormat": "Failures",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Total Invocations vs Failures",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            }
+          },
+          "mappings": []
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 0,
+        "y": 12
+      },
+      "id": 7,
+      "options": {
+        "displayLabels": [
+          "percent"
+        ],
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": [
+            "value"
+          ]
+        },
+        "pieType": "donut",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "sort": "desc",
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "sum by(tool_provider) (kagent_tools_mcp_registered_tools)",
+          "legendFormat": "{{tool_provider}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Tools by Provider",
+      "type": "piechart"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "custom": {
+            "align": "auto",
+            "cellOptions": {
+              "type": "auto"
+            },
+            "footer": {
+              "reducers": []
+            },
+            "inspect": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Invocations"
+            },
+            "properties": [
+              {
+                "id": "custom.cellOptions",
+                "value": {
+                  "type": "color-background"
+                }
+              },
+              {
+                "id": "color",
+                "value": {
+                  "mode": "continuous-GrYlRd"
+                }
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Failures"
+            },
+            "properties": [
+              {
+                "id": "custom.cellOptions",
+                "value": {
+                  "type": "color-background"
+                }
+              },
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "red",
+                  "mode": "thresholds"
+                }
+              },
+              {
+                "id": "thresholds",
+                "value": {
+                  "mode": "absolute",
+                  "steps": [
+                    {
+                      "color": "green",
+                      "value": 0
+                    },
+                    {
+                      "color": "yellow",
+                      "value": 1
+                    },
+                    {
+                      "color": "red",
+                      "value": 10
+                    }
+                  ]
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 16,
+        "x": 8,
+        "y": 12
+      },
+      "id": 8,
+      "options": {
+        "cellHeight": "sm",
+        "showHeader": true,
+        "sortBy": [
+          {
+            "desc": true,
+            "displayName": "Invocations"
+          }
+        ]
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "sum by(tool_name, tool_provider) (kagent_tools_mcp_invocations_total)",
+          "format": "table",
+          "instant": true,
+          "legendFormat": "__auto",
+          "range": false,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "sum by(tool_name, tool_provider) (kagent_tools_mcp_invocations_failure_total)",
+          "format": "table",
+          "hide": false,
+          "instant": true,
+          "legendFormat": "__auto",
+          "range": false,
+          "refId": "B"
+        }
+      ],
+      "title": "Top Invoked Tools",
+      "transformations": [
+        {
+          "id": "seriesToColumns",
+          "options": {
+            "byField": "tool_name"
+          }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "Time": true,
+              "Time 1": true,
+              "Time 2": true,
+              "tool_provider 2": true
+            },
+            "includeByName": {},
+            "indexByName": {
+              "Time 1": 4,
+              "Time 2": 5,
+              "Value #A": 2,
+              "Value #B": 3,
+              "tool_name": 0,
+              "tool_provider 1": 1,
+              "tool_provider 2": 6
+            },
+            "renameByName": {
+              "Value #A": "Invocations",
+              "Value #B": "Failures",
+              "tool_name": "Tool Name",
+              "tool_provider 1": "Provider"
+            }
+          }
+        }
+      ],
+      "type": "table"
+    }
+  ],
+  "preload": false,
+  "refresh": "30s",
+  "schemaVersion": 42,
+  "tags": [
+    "kagent",
+    "mcp",
+    "tools"
+  ],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "text": "Prometheus",
+          "value": "prometheus"
+        },
+        "includeAll": false,
+        "label": "Datasource",
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "regex": "",
+        "type": "datasource"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "kAgent Tools - MCP Observability",
+  "uid": "kagent-tools-mcp",
+  "version": 1
+}
\ No newline at end of file
diff --git a/go.mod b/go.mod
index a2f27fc..e796d12 100644
--- a/go.mod
+++ b/go.mod
@@ -9,6 +9,8 @@ require (
 	github.com/mark3labs/mcp-go v0.43.2
 	github.com/onsi/ginkgo/v2 v2.27.2
 	github.com/onsi/gomega v1.38.2
+	github.com/prometheus/client_golang v1.23.2
+	github.com/prometheus/client_model v0.6.2
 	github.com/spf13/cobra v1.10.2
 	github.com/stretchr/testify v1.11.1
 	github.com/tmc/langchaingo v0.1.14
@@ -109,6 +111,7 @@ require (
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/klauspost/compress v1.18.4 // indirect
 	github.com/kubescape/go-logger v0.0.26 // indirect
+	github.com/kylelemons/godebug v1.1.0 // indirect
 	github.com/mackerelio/go-osstat v0.2.6 // indirect
 	github.com/mailru/easyjson v0.9.1 // indirect
 	github.com/mattn/go-colorable v0.1.14 // indirect
@@ -129,8 +132,6 @@ require (
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pkoukk/tiktoken-go v0.1.8 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
-	github.com/prometheus/client_golang v1.23.2 // indirect
-	github.com/prometheus/client_model v0.6.2 // indirect
 	github.com/prometheus/common v0.67.5 // indirect
 	github.com/prometheus/procfs v0.19.2 // indirect
 	github.com/sagikazarmark/locafero v0.12.0 // indirect
diff --git a/helm/kagent-tools/templates/deployment.yaml b/helm/kagent-tools/templates/deployment.yaml
index 001caef..9787694 100644
--- a/helm/kagent-tools/templates/deployment.yaml
+++ b/helm/kagent-tools/templates/deployment.yaml
@@ -59,6 +59,8 @@ spec:
           args:
           - "--port"
           - "{{ .Values.service.ports.tools.targetPort }}"
+          - "--metrics-port"
+          - "{{ .Values.tools.metrics.port | default .Values.service.ports.tools.targetPort }}"
           {{- if .Values.tools.enabledTools }}
           - "--tools={{ join "," .Values.tools.enabledTools }}"
           {{- end }}
@@ -98,6 +100,9 @@ spec:
             - name: http-tools
               containerPort: {{ .Values.service.ports.tools.targetPort }}
               protocol: TCP
+            - name: http-metrics
+              containerPort: {{ .Values.tools.metrics.port | default .Values.service.ports.tools.targetPort }}
+              protocol: TCP
           readinessProbe:
             tcpSocket:
               port: http-tools
diff --git a/helm/kagent-tools/templates/service.yaml b/helm/kagent-tools/templates/service.yaml
index 55c7fd2..f578670 100644
--- a/helm/kagent-tools/templates/service.yaml
+++ b/helm/kagent-tools/templates/service.yaml
@@ -19,3 +19,22 @@ spec:
       name: tools
   selector:
     {{- include "kagent.selectorLabels" . | nindent 4 }}
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "kagent.fullname" . }}-metrics
+  namespace: {{ include "kagent.namespace" . }}
+  labels:
+    {{- include "kagent.labels" . | nindent 4 }}
+    app.kubernetes.io/component: metrics
+spec:
+  selector:
+    {{- include "kagent.selectorLabels" . | nindent 4 }}
+  ports:
+    - name: prometheus-metrics
+      protocol: TCP
+      port: {{ .Values.tools.metrics.port | default .Values.service.ports.tools.targetPort }}
+      targetPort: {{ .Values.tools.metrics.port | default .Values.service.ports.tools.targetPort }}
+ 
\ No newline at end of file
diff --git a/helm/kagent-tools/templates/servicemonitor.yaml b/helm/kagent-tools/templates/servicemonitor.yaml
new file mode 100644
index 0000000..ded05cd
--- /dev/null
+++ b/helm/kagent-tools/templates/servicemonitor.yaml
@@ -0,0 +1,23 @@
+
+{{- if .Values.tools.metrics.servicemonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "kagent.fullname" . }}
+  namespace: {{ include "kagent.namespace" . }}
+  labels:
+    {{- toYaml .Values.tools.metrics.servicemonitor.labels | nindent 4 }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "kagent.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: metrics
+  namespaceSelector:
+    matchNames:
+      - {{ include "kagent.namespace" . }}
+  endpoints:
+    - port: prometheus-metrics
+      interval: {{ .Values.tools.metrics.servicemonitor.interval | default "30s" }}
+      scrapeTimeout: {{ .Values.tools.metrics.servicemonitor.scrapeTimeout | default "10s" }}
+      path: {{ .Values.tools.metrics.servicemonitor.path | default "/metrics" }}
+{{- end }}
diff --git a/helm/kagent-tools/values.yaml b/helm/kagent-tools/values.yaml
index 556f56e..dd9ef09 100644
--- a/helm/kagent-tools/values.yaml
+++ b/helm/kagent-tools/values.yaml
@@ -5,6 +5,15 @@ global:
   tag: ""
 
 tools:
+  metrics:
+    # port defaults to the main --port value (same server). Set explicitly for a dedicated metrics port.
+    port: ""
+    servicemonitor:
+      enabled: false
+      interval: 30s
+      scrapeTimeout: 10s
+      labels:
+        release: prometheus
   loglevel: "debug"
   # List of tool providers to enable. Empty list means all tools are enabled.
   # Available: k8s, helm, istio, cilium, argo, prometheus, kubescape, utils
diff --git a/internal/metrics/monitoring_server.go b/internal/metrics/monitoring_server.go
new file mode 100644
index 0000000..275a01f
--- /dev/null
+++ b/internal/metrics/monitoring_server.go
@@ -0,0 +1,69 @@
+package metrics
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/collectors"
+)
+
+// kAgent Tools MCP Server metrics definition
+var (
+	KagentToolsMCPServerInfo = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "kagent_tools_mcp_server_info",
+			Help: "Information about the MCP server including version and build details",
+		},
+		[]string{
+			"server_name",
+			"version",
+			"git_commit",
+			"build_date",
+			"server_mode", // e.g., "read-only" or "read-write"
+		},
+	)
+
+	KagentToolsMCPRegisteredTools = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "kagent_tools_mcp_registered_tools",
+			Help: "Set to 1 for each registered MCP tool provider",
+		},
+		[]string{
+			"tool_name",
+			"tool_provider",
+		},
+	)
+
+	KagentToolsMCPInvocationsTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Name: "kagent_tools_mcp_invocations_total",
+			Help: "Total number of MCP tool invocations",
+		},
+		[]string{"tool_name", "tool_provider"},
+	)
+
+	KagentToolsMCPInvocationsFailureTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Name: "kagent_tools_mcp_invocations_failure_total",
+			Help: "Total number of failed MCP tool invocations",
+		},
+		[]string{"tool_name", "tool_provider"},
+	)
+)
+
+func InitServer() *prometheus.Registry {
+	// New registry for our custom metrics, separate from the default registry
+	registry := prometheus.NewRegistry()
+
+	// Add Go runtime metrics ( goroutines, GC stats, etc. )
+	registry.MustRegister(collectors.NewGoCollector())
+
+	// Add process metrics (CPU, memory, file descriptors, etc. )
+	registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
+
+	// Register kAgent Tools MCP Server metrics
+	registry.MustRegister(KagentToolsMCPServerInfo)
+	registry.MustRegister(KagentToolsMCPRegisteredTools)
+	registry.MustRegister(KagentToolsMCPInvocationsTotal)
+	registry.MustRegister(KagentToolsMCPInvocationsFailureTotal)
+
+	return registry
+}
diff --git a/internal/metrics/monitoring_server_test.go b/internal/metrics/monitoring_server_test.go
new file mode 100644
index 0000000..495c3e1
--- /dev/null
+++ b/internal/metrics/monitoring_server_test.go
@@ -0,0 +1,268 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+)
+
+func TestInitServer_ReturnsRegistry(t *testing.T) {
+	registry := InitServer()
+	if registry == nil {
+		t.Fatal("InitServer() returned nil registry")
+	}
+}
+
+func TestInitServer_GathersMetrics(t *testing.T) {
+	registry := InitServer()
+
+	families, err := registry.Gather()
+	if err != nil {
+		t.Fatalf("Failed to gather metrics: %v", err)
+	}
+
+	if len(families) == 0 {
+		t.Fatal("Expected at least one metric family from Go/process collectors, got none")
+	}
+}
+
+func TestInitServer_RegistersCustomMetrics(t *testing.T) {
+	registry := InitServer()
+
+	families, err := registry.Gather()
+	if err != nil {
+		t.Fatalf("Failed to gather metrics: %v", err)
+	}
+
+	// Build a set of metric names for easy lookup
+	metricNames := make(map[string]bool)
+	for _, family := range families {
+		metricNames[family.GetName()] = true
+	}
+
+	// Go and process collectors should be present
+	goMetrics := []string{
+		"go_goroutines",
+		"go_memstats_alloc_bytes",
+	}
+	for _, name := range goMetrics {
+		if !metricNames[name] {
+			t.Errorf("Expected Go collector metric %q to be registered", name)
+		}
+	}
+}
+
+func TestKagentToolsMCPServerInfo_SetAndGather(t *testing.T) {
+	registry := InitServer()
+
+	// Set the server info metric
+	KagentToolsMCPServerInfo.WithLabelValues(
+		"test-server",
+		"v0.0.1",
+		"abc123",
+		"2026-02-12",
+		"read-write",
+	).Set(1)
+
+	families, err := registry.Gather()
+	if err != nil {
+		t.Fatalf("Failed to gather metrics: %v", err)
+	}
+
+	found := findMetricFamily(families, "kagent_tools_mcp_server_info")
+	if found == nil {
+		t.Fatal("Expected kagent_tools_mcp_server_info metric to be present")
+	}
+
+	metrics := found.GetMetric()
+	if len(metrics) != 1 {
+		t.Fatalf("Expected 1 time series, got %d", len(metrics))
+	}
+
+	// Verify label values
+	expectedLabels := map[string]string{
+		"server_name": "test-server",
+		"version":     "v0.0.1",
+		"git_commit":  "abc123",
+		"build_date":  "2026-02-12",
+		"server_mode": "read-write",
+	}
+
+	for _, label := range metrics[0].GetLabel() {
+		expected, ok := expectedLabels[label.GetName()]
+		if !ok {
+			t.Errorf("Unexpected label %q", label.GetName())
+			continue
+		}
+		if label.GetValue() != expected {
+			t.Errorf("Label %q: expected %q, got %q", label.GetName(), expected, label.GetValue())
+		}
+	}
+
+	// Verify gauge value is 1
+	if metrics[0].GetGauge().GetValue() != 1 {
+		t.Errorf("Expected gauge value 1, got %f", metrics[0].GetGauge().GetValue())
+	}
+}
+
+func TestKagentToolsMCPRegisteredTools_SetAndGather(t *testing.T) {
+	registry := InitServer()
+
+	// Register a couple of tool providers
+	KagentToolsMCPRegisteredTools.WithLabelValues("kubectl_get", "k8s").Set(1)
+	KagentToolsMCPRegisteredTools.WithLabelValues("helm_list", "helm").Set(1)
+
+	families, err := registry.Gather()
+	if err != nil {
+		t.Fatalf("Failed to gather metrics: %v", err)
+	}
+
+	found := findMetricFamily(families, "kagent_tools_mcp_registered_tools")
+	if found == nil {
+		t.Fatal("Expected kagent_tools_mcp_registered_tools metric to be present")
+	}
+
+	metrics := found.GetMetric()
+	if len(metrics) != 2 {
+		t.Fatalf("Expected 2 time series (one per tool), got %d", len(metrics))
+	}
+}
+
+func TestKagentToolsMCPInvocationsTotal_IncAndGather(t *testing.T) {
+	registry := InitServer()
+
+	// Simulate a few tool invocations
+	KagentToolsMCPInvocationsTotal.WithLabelValues("kubectl_get", "k8s").Inc()
+	KagentToolsMCPInvocationsTotal.WithLabelValues("kubectl_get", "k8s").Inc()
+	KagentToolsMCPInvocationsTotal.WithLabelValues("helm_list", "helm").Inc()
+
+	families, err := registry.Gather()
+	if err != nil {
+		t.Fatalf("Failed to gather metrics: %v", err)
+	}
+
+	found := findMetricFamily(families, "kagent_tools_mcp_invocations_total")
+	if found == nil {
+		t.Fatal("Expected kagent_tools_mcp_invocations_total metric to be present")
+	}
+
+	metrics := found.GetMetric()
+	if len(metrics) != 2 {
+		t.Fatalf("Expected 2 time series (one per tool), got %d", len(metrics))
+	}
+
+	// Find the kubectl_get series and verify its counter value is 2
+	for _, m := range metrics {
+		for _, label := range m.GetLabel() {
+			if label.GetName() == "tool_name" && label.GetValue() == "kubectl_get" {
+				if m.GetCounter().GetValue() != 2 {
+					t.Errorf("Expected kubectl_get counter to be 2, got %f", m.GetCounter().GetValue())
+				}
+			}
+		}
+	}
+}
+
+func TestKagentToolsMCPInvocationsFailureTotal_IncAndGather(t *testing.T) {
+	registry := InitServer()
+
+	// Simulate a tool failure
+	KagentToolsMCPInvocationsFailureTotal.WithLabelValues("helm_install", "helm").Inc()
+
+	families, err := registry.Gather()
+	if err != nil {
+		t.Fatalf("Failed to gather metrics: %v", err)
+	}
+
+	found := findMetricFamily(families, "kagent_tools_mcp_invocations_failure_total")
+	if found == nil {
+		t.Fatal("Expected kagent_tools_mcp_invocations_failure_total metric to be present")
+	}
+
+	metrics := found.GetMetric()
+	if len(metrics) != 1 {
+		t.Fatalf("Expected 1 time series, got %d", len(metrics))
+	}
+
+	if metrics[0].GetCounter().GetValue() != 1 {
+		t.Errorf("Expected failure counter to be 1, got %f", metrics[0].GetCounter().GetValue())
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"tool_name":     "helm_install",
+		"tool_provider": "helm",
+	}
+	for _, label := range metrics[0].GetLabel() {
+		expected, ok := expectedLabels[label.GetName()]
+		if !ok {
+			t.Errorf("Unexpected label %q", label.GetName())
+			continue
+		}
+		if label.GetValue() != expected {
+			t.Errorf("Label %q: expected %q, got %q", label.GetName(), expected, label.GetValue())
+		}
+	}
+}
+
+// findMetricFamily finds a metric family by name from a gathered slice
+func findMetricFamily(families []*dto.MetricFamily, name string) *dto.MetricFamily {
+	for _, family := range families {
+		if family.GetName() == name {
+			return family
+		}
+	}
+	return nil
+}
+
+// resetMetrics resets the global metric vectors so tests don't interfere with each other
+func resetMetrics() {
+	KagentToolsMCPServerInfo = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "kagent_tools_mcp_server_info",
+			Help: "Information about the MCP server including version and build details",
+		},
+		[]string{
+			"server_name",
+			"version",
+			"git_commit",
+			"build_date",
+			"server_mode",
+		},
+	)
+
+	KagentToolsMCPRegisteredTools = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "kagent_tools_mcp_registered_tools",
+			Help: "Set to 1 for each registered MCP tool provider",
+		},
+		[]string{
+			"tool_name",
+			"tool_provider",
+		},
+	)
+
+	KagentToolsMCPInvocationsTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Name: "kagent_tools_mcp_invocations_total",
+			Help: "Total number of MCP tool invocations",
+		},
+		[]string{"tool_name", "tool_provider"},
+	)
+
+	KagentToolsMCPInvocationsFailureTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Name: "kagent_tools_mcp_invocations_failure_total",
+			Help: "Total number of failed MCP tool invocations",
+		},
+		[]string{"tool_name", "tool_provider"},
+	)
+}
+
+func TestMain(m *testing.M) {
+	// Reset metrics before each test run to avoid "duplicate registration" panics
+	// since InitServer() registers the package-level vars into a new registry each time
+	resetMetrics()
+	m.Run()
+}