diff --git a/internal/llm/client.go b/internal/llm/client.go index 081da2c..82ea7f1 100644 --- a/internal/llm/client.go +++ b/internal/llm/client.go @@ -17,6 +17,7 @@ import ( "strings" "sync" "time" + "unicode" "unicode/utf8" "github.com/BlackVectorOps/semantic_firewall/v4/pkg/models" @@ -184,8 +185,11 @@ func executeOpenAIRaw(ctx context.Context, sysPrompt, userMsg, apiKey, model, ap userItem.Content = json.RawMessage(userJSON) reqBody := models.OpenAIResponsesRequest{ - Model: model, - Store: true, + Model: model, + // Do not retain payloads server-side. Audited commit messages and diffs + // can originate from private/proprietary repositories; persisting them on + // the provider is an unnecessary data-exposure surface for a security tool. + Store: false, Items: []models.OpenAIItem{sysItem, userItem}, ResponseFormat: &models.OpenAIRespFmt{Type: "json_object"}, } @@ -411,8 +415,45 @@ func validateOutput(res models.LLMResult) error { return fmt.Errorf("invalid verdict type '%s'", res.Verdict) } - forbiddenPhrases := []string{"ignore previous", "system prompt"} - lowerEv := strings.ToLower(res.Evidence) + // Structural bounds: the evidence field is a plain-string summary. An + // over-long or control-char-laden value is itself a signal that the model + // was steered off-protocol, independent of any specific phrase. + if utf8.RuneCountInString(res.Evidence) > models.MaxEvidenceRunes { + return fmt.Errorf("evidence exceeds maximum length (%d runes)", models.MaxEvidenceRunes) + } + for _, r := range res.Evidence { + // Allow common whitespace; reject other control characters that have no + // place in a plain-text summary and are frequently used to smuggle + // delimiters or terminal escapes past naive substring checks. + if r == '\n' || r == '\r' || r == '\t' { + continue + } + if r < 0x20 || r == 0x7f { + return fmt.Errorf("evidence contains disallowed control character U+%04X", r) + } + } + + // Normalize before matching so spacing/case tricks do not defeat the check. + // This is a defense-in-depth backstop, not the primary control: collapse + // runs of whitespace and lowercase, then scan for known injection markers. + lowerEv := collapseWhitespace(strings.ToLower(res.Evidence)) + forbiddenPhrases := []string{ + "ignore previous", + "ignore prior", + "ignore the above", + "ignore all previous", + "disregard previous", + "disregard prior", + "disregard the above", + "system prompt", + "system message", + "developer message", + "you are now", + "new instructions", + "override instructions", + "begin data", + "end data", + } for _, phrase := range forbiddenPhrases { if strings.Contains(lowerEv, phrase) { return fmt.Errorf("unsafe content: '%s'", phrase) @@ -421,6 +462,28 @@ func validateOutput(res models.LLMResult) error { return nil } +// collapseWhitespace lowercases nothing (caller does that) but reduces every +// run of Unicode whitespace to a single ASCII space, defeating the common +// "i g n o r e" / "ignore\u00a0previous" spacing evasions against the phrase +// denylist above. +func collapseWhitespace(s string) string { + var b strings.Builder + b.Grow(len(s)) + prevSpace := false + for _, r := range s { + if unicode.IsSpace(r) { + if !prevSpace { + b.WriteByte(' ') + prevSpace = true + } + continue + } + b.WriteRune(r) + prevSpace = false + } + return b.String() +} + func parseLLMJSON(content string) (models.LLMResult, error) { cleanContent := cleanJSONMarkdown(content) var result models.LLMResult @@ -476,3 +539,5 @@ func (t *testProxyTransport) RoundTrip(req *http.Request) (*http.Response, error } return rt.RoundTrip(req) } + + diff --git a/pkg/analysis/loop/scev.go b/pkg/analysis/loop/scev.go index 8ef1076..4b39b59 100644 --- a/pkg/analysis/loop/scev.go +++ b/pkg/analysis/loop/scev.go @@ -457,27 +457,22 @@ func deriveTripCount(loop *Loop) { return } - var isUpCounting, ivOnLeft bool + var isUpCounting bool var isInclusive, isNEQ bool switch binOp.Op { case token.LSS: isUpCounting = true - ivOnLeft = true case token.LEQ: isUpCounting = true - ivOnLeft = true isInclusive = true case token.GTR: isUpCounting = false - ivOnLeft = true case token.GEQ: isUpCounting = false - ivOnLeft = true isInclusive = true case token.NEQ: isNEQ = true - ivOnLeft = true default: loop.TripCount = &SCEVUnknown{Value: nil} return @@ -497,7 +492,11 @@ func deriveTripCount(loop *Loop) { } else if found := findIV(binOp.Y); found != nil { iv = found limit = binOp.X - ivOnLeft = !ivOnLeft + // IV is on the right-hand side (e.g. "limit < i"). Normalize to the + // canonical "iv OP limit" form by reversing the comparison direction. + // For symmetric NEQ this is a no-op. Strictness/inclusivity is preserved + // by the swap (e.g. "limit < i" == "i > limit": both strict), so only the + // counting direction needs to flip. if !isNEQ { isUpCounting = !isUpCounting } @@ -523,21 +522,26 @@ func deriveTripCount(loop *Loop) { // Determine if Dead (TripCount 0) or Divergent (Unknown) isDead := false if isUpCounting { - // Condition: i < limit. Loop runs if Start < Limit. - if startC.Cmp(limitC) >= 0 { - // Condition is false immediately. + // Condition: i < limit (or i <= limit when inclusive). + // Loop runs if Start < Limit, or Start == Limit when inclusive. + cmp := startC.Cmp(limitC) + if cmp > 0 || (cmp == 0 && !isInclusive) { + // Condition is false on entry. For "<" equality is dead; + // for "<=" equality runs exactly once, so not dead. isDead = true } else if stepC.Sign() <= 0 { - // Start < Limit, but step is negative (or zero). Diverges. + // Start <= Limit, but step is negative (or zero). Diverges. loop.TripCount = &SCEVUnknown{Value: nil} return } } else { - // Condition: i > limit. Loop runs if Start > Limit. - if startC.Cmp(limitC) <= 0 { + // Condition: i > limit (or i >= limit when inclusive). + // Loop runs if Start > Limit, or Start == Limit when inclusive. + cmp := startC.Cmp(limitC) + if cmp < 0 || (cmp == 0 && !isInclusive) { isDead = true } else if stepC.Sign() >= 0 { - // Start > Limit, but step is positive. Diverges. + // Start >= Limit, but step is positive. Diverges. loop.TripCount = &SCEVUnknown{Value: nil} return } @@ -812,3 +816,5 @@ func negateSCEV(s SCEV) SCEV { return &SCEVGenericExpr{Op: token.MUL, X: s, Y: &SCEVConstant{Value: big.NewInt(-1)}} } } + + diff --git a/pkg/analysis/loop/scev_test.go b/pkg/analysis/loop/scev_test.go index 5aae0fa..ec5eef2 100644 --- a/pkg/analysis/loop/scev_test.go +++ b/pkg/analysis/loop/scev_test.go @@ -137,6 +137,36 @@ func TestTripCountLogic(t *testing.T) { funcName: "count", expected: -1, // Unknown }, + { + name: "Inclusive equality up (runs once)", + src: `package main + func count() { + // Start=5, Limit=5, "i <= 5": runs exactly once, not dead. + for i := 5; i <= 5; i++ { } + }`, + funcName: "count", + expected: 1, + }, + { + name: "Inclusive equality down (runs once)", + src: `package main + func count() { + // Start=5, Limit=5, "i >= 5": runs exactly once, not dead. + for i := 5; i >= 5; i-- { } + }`, + funcName: "count", + expected: 1, + }, + { + name: "Exclusive equality up (dead)", + src: `package main + func count() { + // Start=5, Limit=5, "i < 5": false on entry, dead. + for i := 5; i < 5; i++ { } + }`, + funcName: "count", + expected: 0, + }, } for _, tc := range tests { @@ -247,3 +277,5 @@ func TestFloatExclusion(t *testing.T) { t.Errorf("Expected 0 IVs for float loop, got %d", len(l.Inductions)) } } + + diff --git a/pkg/analysis/topology/obfuscation.go b/pkg/analysis/topology/obfuscation.go index 2f80dde..cabad9e 100644 --- a/pkg/analysis/topology/obfuscation.go +++ b/pkg/analysis/topology/obfuscation.go @@ -185,7 +185,10 @@ func AnalyzeObfuscation(t *FunctionTopology) ObfuscationProfile { } // 4. Indirect / reflective call ratio from the call signatures the topology - // already collected. extractCallSignature tags these "dynamic:" / "reflect:". + // already collected. extractCallSignature tags these "dynamic:" / "invoke:"; + // reflection invocation emits the dot-form "reflect.Call" (see + // isIndirectCallSig for the verified form set and why "reflect:" alone was + // insufficient). totalCalls, indirectCalls := 0, 0 for sig, n := range t.CallSignatures { totalCalls += n @@ -398,12 +401,57 @@ func structuralConstOperands(instr ssa.Instruction) map[*ssa.Value]bool { // "go:invoke:T.M", "defer:invoke:T.M") are NOT matched — only the bare // "invoke:" prefix and "go:dynamic:"/"defer:dynamic:" are. Pinned by // TestIsIndirectCallSig; closing the gap should update that test deliberately. +// isIndirectCallSig reports whether a call signature represents indirect or +// reflective dispatch -- the calls whose target is not statically a named +// function and which therefore evade call-target detection. +// +// SIGNATURE FORMS ARE EMPIRICALLY VERIFIED against topology.ExtractTopology on +// real SSA built with ssa.InstantiateGenerics (the mode both ir/builder.go and +// scan.go use). Do not infer forms from the emitter source alone -- the +// optimizing SSA build resolves some calls differently than NaiveForm would. +// +// - "dynamic:" raw function-pointer call. VERIFIED. +// - "invoke:" interface method dispatch. VERIFIED. +// - "go:dynamic:" dynamic call spawned in a goroutine. +// - "defer:dynamic:" dynamic call in a defer. +// - reflect invocation: reflect.Value.Call and friends. VERIFIED to emit the +// DOT-form "reflect.Call" / "reflect.MethodByName" via extractFunctionSig +// (.), NOT the colon-form "reflect:Call". The colon-form branch +// in extractCallSignature is UNREACHABLE under InstantiateGenerics (verified +// by exhausting method-value / method-expression / stored-value shapes), so +// matching only "reflect:" silently MISSED all reflection dispatch and +// under-counted IndirectCallRatio to zero for reflection-hidden capabilities +// -- precisely the evasion technique the signal exists to catch. Both forms +// are now matched; the colon-form is retained for loader-independence. +// +// Only the reflect *invocation* entry points count as indirect dispatch. +// reflect.TypeOf / reflect.ValueOf and similar introspection helpers also emit +// "reflect." but are ordinary direct calls, not dynamic dispatch, so they +// are matched by name, not by a blanket "reflect." prefix (which would also +// capture a user package named reflect). func isIndirectCallSig(sig string) bool { return hasPrefix(sig, "dynamic:") || - hasPrefix(sig, "reflect:") || + hasPrefix(sig, "reflect:") || // colon-form: unreachable under this loader, kept defensively hasPrefix(sig, "invoke:") || hasPrefix(sig, "go:dynamic:") || - hasPrefix(sig, "defer:dynamic:") + hasPrefix(sig, "defer:dynamic:") || + isReflectInvokeSig(sig) +} + +// reflectInvokeMethods are the reflect.Value methods that perform DYNAMIC +// INVOCATION of an underlying callable -- the reflection equivalent of an +// indirect call. Introspection methods (Kind, Type, Field, Len, ...) are +// deliberately excluded: they do not dispatch a call. Emitted dot-form is +// "reflect.". +var reflectInvokeMethods = map[string]bool{ + "reflect.Call": true, + "reflect.CallSlice": true, + "reflect.MethodByName": true, // resolves a method dynamically; its result is invoked + "reflect.Method": true, +} + +func isReflectInvokeSig(sig string) bool { + return reflectInvokeMethods[sig] } // flatteningScore estimates control-flow-flattening from the dispatcher's @@ -548,3 +596,5 @@ func totalStringLen(ss []string) int { func hasPrefix(s, prefix string) bool { return len(s) >= len(prefix) && s[:len(prefix)] == prefix } + + diff --git a/pkg/analysis/topology/obfuscation_internal_test.go b/pkg/analysis/topology/obfuscation_internal_test.go index 3aaccdb..bcaf9ee 100644 --- a/pkg/analysis/topology/obfuscation_internal_test.go +++ b/pkg/analysis/topology/obfuscation_internal_test.go @@ -80,13 +80,38 @@ func TestHasPrefix(t *testing.T) { func TestIsIndirectCallSig(t *testing.T) { t.Parallel() - indirect := []string{"dynamic:func()", "reflect:Call", "invoke:T.M", "go:dynamic:x", "defer:dynamic:x"} + indirect := []string{ + "dynamic:func()", + "invoke:T.M", + "go:dynamic:x", + "defer:dynamic:x", + // Reflection invocation: the DOT-form is what ExtractTopology actually + // emits under InstantiateGenerics (verified against real SSA). Matching + // only the colon-form previously let all reflection dispatch escape the + // IndirectCallRatio, under-counting obfuscation for reflection-hidden + // capabilities. Both forms must match. + "reflect.Call", + "reflect.CallSlice", + "reflect.MethodByName", + "reflect:Call", // colon-form: unreachable under this loader, matched defensively + } for _, s := range indirect { if !isIndirectCallSig(s) { t.Errorf("isIndirectCallSig(%q)=false, want true", s) } } - direct := []string{"net.Dial", "builtin:println", "closure:func()", "", "call:unknown"} + direct := []string{ + "net.Dial", + "builtin:println", + "closure:func()", + "", + "call:unknown", + // reflect introspection helpers emit "reflect." too but are + // ordinary direct calls, NOT dynamic dispatch -- they must NOT count. + "reflect.TypeOf", + "reflect.ValueOf", + "reflect.DeepEqual", + } for _, s := range direct { if isIndirectCallSig(s) { t.Errorf("isIndirectCallSig(%q)=true, want false", s) @@ -272,3 +297,5 @@ func FuzzMaxWindowEntropy(f *testing.F) { } }) } + + diff --git a/pkg/analysis/topology/obfuscation_test.go b/pkg/analysis/topology/obfuscation_test.go index ab446bc..ae3ac11 100644 --- a/pkg/analysis/topology/obfuscation_test.go +++ b/pkg/analysis/topology/obfuscation_test.go @@ -330,3 +330,46 @@ func BenchmarkAnalyzeObfuscation(b *testing.B) { _ = topology.AnalyzeObfuscation(topo) } } + + +// TestIndirectCallRatio_CountsReflectionDispatch is the behavioral regression +// guard for the reflection-undercount bug: reflect.Value.Call emits the dot-form +// "reflect.Call" (verified against real SSA), which the original isIndirectCallSig +// missed because it matched only the never-emitted colon-form "reflect:Call". +// Result was IndirectCallRatio == 0 for reflection-hidden dispatch -- exactly the +// evasion the signal exists to catch. This test compiles real reflection dispatch +// and asserts the ratio now counts it. +func TestIndirectCallRatio_CountsReflectionDispatch(t *testing.T) { + t.Parallel() + + // A function whose ONLY call is reflect.Value.Call. Pre-fix this scored + // IndirectCallRatio 0.0; it must now be 1.0 (1 of 1 calls is indirect). + src := "package main\n" + + "import \"reflect\"\n" + + "func f(v reflect.Value, a []reflect.Value) { v.Call(a) }" + p := analyze(t, src, "f") + + if p.IndirectCallRatio != 1.0 { + t.Errorf("IndirectCallRatio=%.2f want 1.0; reflection dispatch (reflect.Call) must count as indirect", p.IndirectCallRatio) + } + if !hasIndicator(p, "indirect-dispatch") { + t.Errorf("missing indirect-dispatch indicator for reflection dispatch: %v", p.Indicators) + } +} + +// TestIndirectCallRatio_ExcludesReflectIntrospection pins the scope boundary: +// reflect.TypeOf / reflect.ValueOf emit "reflect." too but are ordinary +// direct calls, not dynamic dispatch. They must NOT inflate IndirectCallRatio, +// or every use of the reflect package would read as obfuscated. +func TestIndirectCallRatio_ExcludesReflectIntrospection(t *testing.T) { + t.Parallel() + + src := "package main\n" + + "import \"reflect\"\n" + + "func f(x int) { _ = reflect.TypeOf(x); _ = reflect.ValueOf(x) }" + p := analyze(t, src, "f") + + if p.IndirectCallRatio != 0.0 { + t.Errorf("IndirectCallRatio=%.2f want 0.0; reflect introspection (TypeOf/ValueOf) is direct, not dispatch", p.IndirectCallRatio) + } +} diff --git a/pkg/models/constants.go b/pkg/models/constants.go index 7b7a156..79e744b 100644 --- a/pkg/models/constants.go +++ b/pkg/models/constants.go @@ -15,6 +15,10 @@ const ( MaxAPIResponseSize = 5 * 1024 * 1024 // 5 MB // controls the verbosity of the diff output to keep CLI reports readable. MaxDiffOpsDisplay = 10 + // caps the length of the LLM "evidence" summary. A response longer than a + // short plain-text rationale indicates the model strayed from the output + // protocol (e.g. echoing the payload or injected instructions back). + MaxEvidenceRunes = 4000 // limits the number of attempts to reach an API before conceding failure. MaxHTTPRetries = 3 @@ -97,3 +101,5 @@ const ( // high performance, LSM tree based storage for large scale analysis history. BackendPebbleDB = "pebbledb" ) + + diff --git a/pkg/storage/pebbledb/store.go b/pkg/storage/pebbledb/store.go index 6271bc6..1bcb2e5 100644 --- a/pkg/storage/pebbledb/store.go +++ b/pkg/storage/pebbledb/store.go @@ -575,6 +575,19 @@ func (s *PebbleScanner) ScanCandidates(topo *topology.FunctionTopology) ([]*dete var sig detection.Signature if err := decodeSignature(sigData, &sig); err == nil { + // Legacy (unpacked) index entries carry no entropy score, so the + // pre-filter above was skipped. Apply the same hard entropy gate + // here using the signature's stored score, so the JSON and PebbleDB + // backends admit an identical candidate set for the same database. + if !isPacked { + effectiveTol := sig.EntropyTolerance + if effectiveTol == 0 { + effectiveTol = entropyTolerance + } + if math.Abs(sig.EntropyScore-topo.EntropyScore) > effectiveTol { + return + } + } cp := sig candidates = append(candidates, &cp) } @@ -698,6 +711,18 @@ func (s *PebbleScanner) ScanTopologyExact(topo *topology.FunctionTopology, funcN } closer.Close() + // Legacy (unpacked) entries skipped the pre-filter; apply the same hard + // entropy gate using the decoded signature's stored score for parity. + if !isPacked { + effectiveTol := sig.EntropyTolerance + if effectiveTol == 0 { + effectiveTol = tolerance + } + if math.Abs(sig.EntropyScore-topo.EntropyScore) > effectiveTol { + continue + } + } + res := detection.MatchSignature(topo, funcName, sig, tolerance) if res.Confidence >= threshold { if bestResult == nil || res.Confidence > bestResult.Confidence { @@ -1235,6 +1260,19 @@ func (s *PebbleScanner) ScanTopologyWithSnapshot(snap *pebble.Snapshot, topo *to return } + // Legacy (unpacked) entries skipped the pre-filter above; apply the same + // hard entropy gate using the decoded signature's stored score so this + // backend stays consistent with the JSON store and with packed entries. + if !isPacked { + effectiveTol := sig.EntropyTolerance + if effectiveTol == 0 { + effectiveTol = tolerance + } + if math.Abs(sig.EntropyScore-topo.EntropyScore) > effectiveTol { + return + } + } + res := detection.MatchSignature(topo, funcName, sig, tolerance) if res.Confidence >= threshold { results = append(results, res) @@ -1466,3 +1504,5 @@ func (s *PebbleScanner) InitializeMetadata(version, description string) error { func (s *PebbleScanner) TouchLastUpdated() error { return s.SetMetadata("last_updated_at", time.Now().Format(time.RFC3339Nano)) } + +