Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 69 additions & 4 deletions internal/llm/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"strings"
"sync"
"time"
"unicode"
"unicode/utf8"

"github.com/BlackVectorOps/semantic_firewall/v4/pkg/models"
Expand Down Expand Up @@ -184,8 +185,11 @@ func executeOpenAIRaw(ctx context.Context, sysPrompt, userMsg, apiKey, model, ap
userItem.Content = json.RawMessage(userJSON)

reqBody := models.OpenAIResponsesRequest{
Model: model,
Store: true,
Model: model,
// Do not retain payloads server-side. Audited commit messages and diffs
// can originate from private/proprietary repositories; persisting them on
// the provider is an unnecessary data-exposure surface for a security tool.
Store: false,
Items: []models.OpenAIItem{sysItem, userItem},
ResponseFormat: &models.OpenAIRespFmt{Type: "json_object"},
}
Expand Down Expand Up @@ -411,8 +415,45 @@ func validateOutput(res models.LLMResult) error {
return fmt.Errorf("invalid verdict type '%s'", res.Verdict)
}

forbiddenPhrases := []string{"ignore previous", "system prompt"}
lowerEv := strings.ToLower(res.Evidence)
// Structural bounds: the evidence field is a plain-string summary. An
// over-long or control-char-laden value is itself a signal that the model
// was steered off-protocol, independent of any specific phrase.
if utf8.RuneCountInString(res.Evidence) > models.MaxEvidenceRunes {
return fmt.Errorf("evidence exceeds maximum length (%d runes)", models.MaxEvidenceRunes)
}
for _, r := range res.Evidence {
// Allow common whitespace; reject other control characters that have no
// place in a plain-text summary and are frequently used to smuggle
// delimiters or terminal escapes past naive substring checks.
if r == '\n' || r == '\r' || r == '\t' {
continue
}
if r < 0x20 || r == 0x7f {
return fmt.Errorf("evidence contains disallowed control character U+%04X", r)
}
}

// Normalize before matching so spacing/case tricks do not defeat the check.
// This is a defense-in-depth backstop, not the primary control: collapse
// runs of whitespace and lowercase, then scan for known injection markers.
lowerEv := collapseWhitespace(strings.ToLower(res.Evidence))
forbiddenPhrases := []string{
"ignore previous",
"ignore prior",
"ignore the above",
"ignore all previous",
"disregard previous",
"disregard prior",
"disregard the above",
"system prompt",
"system message",
"developer message",
"you are now",
"new instructions",
"override instructions",
"begin data",
"end data",
}
for _, phrase := range forbiddenPhrases {
if strings.Contains(lowerEv, phrase) {
return fmt.Errorf("unsafe content: '%s'", phrase)
Expand All @@ -421,6 +462,28 @@ func validateOutput(res models.LLMResult) error {
return nil
}

// collapseWhitespace lowercases nothing (caller does that) but reduces every
// run of Unicode whitespace to a single ASCII space, defeating the common
// "i g n o r e" / "ignore\u00a0previous" spacing evasions against the phrase
// denylist above.
func collapseWhitespace(s string) string {
var b strings.Builder
b.Grow(len(s))
prevSpace := false
for _, r := range s {
if unicode.IsSpace(r) {
if !prevSpace {
b.WriteByte(' ')
prevSpace = true
}
continue
}
b.WriteRune(r)
prevSpace = false
}
return b.String()
}

func parseLLMJSON(content string) (models.LLMResult, error) {
cleanContent := cleanJSONMarkdown(content)
var result models.LLMResult
Expand Down Expand Up @@ -476,3 +539,5 @@ func (t *testProxyTransport) RoundTrip(req *http.Request) (*http.Response, error
}
return rt.RoundTrip(req)
}


34 changes: 20 additions & 14 deletions pkg/analysis/loop/scev.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,27 +457,22 @@ func deriveTripCount(loop *Loop) {
return
}

var isUpCounting, ivOnLeft bool
var isUpCounting bool
var isInclusive, isNEQ bool

switch binOp.Op {
case token.LSS:
isUpCounting = true
ivOnLeft = true
case token.LEQ:
isUpCounting = true
ivOnLeft = true
isInclusive = true
case token.GTR:
isUpCounting = false
ivOnLeft = true
case token.GEQ:
isUpCounting = false
ivOnLeft = true
isInclusive = true
case token.NEQ:
isNEQ = true
ivOnLeft = true
default:
loop.TripCount = &SCEVUnknown{Value: nil}
return
Expand All @@ -497,7 +492,11 @@ func deriveTripCount(loop *Loop) {
} else if found := findIV(binOp.Y); found != nil {
iv = found
limit = binOp.X
ivOnLeft = !ivOnLeft
// IV is on the right-hand side (e.g. "limit < i"). Normalize to the
// canonical "iv OP limit" form by reversing the comparison direction.
// For symmetric NEQ this is a no-op. Strictness/inclusivity is preserved
// by the swap (e.g. "limit < i" == "i > limit": both strict), so only the
// counting direction needs to flip.
if !isNEQ {
isUpCounting = !isUpCounting
}
Expand All @@ -523,21 +522,26 @@ func deriveTripCount(loop *Loop) {
// Determine if Dead (TripCount 0) or Divergent (Unknown)
isDead := false
if isUpCounting {
// Condition: i < limit. Loop runs if Start < Limit.
if startC.Cmp(limitC) >= 0 {
// Condition is false immediately.
// Condition: i < limit (or i <= limit when inclusive).
// Loop runs if Start < Limit, or Start == Limit when inclusive.
cmp := startC.Cmp(limitC)
if cmp > 0 || (cmp == 0 && !isInclusive) {
// Condition is false on entry. For "<" equality is dead;
// for "<=" equality runs exactly once, so not dead.
isDead = true
} else if stepC.Sign() <= 0 {
// Start < Limit, but step is negative (or zero). Diverges.
// Start <= Limit, but step is negative (or zero). Diverges.
loop.TripCount = &SCEVUnknown{Value: nil}
return
}
} else {
// Condition: i > limit. Loop runs if Start > Limit.
if startC.Cmp(limitC) <= 0 {
// Condition: i > limit (or i >= limit when inclusive).
// Loop runs if Start > Limit, or Start == Limit when inclusive.
cmp := startC.Cmp(limitC)
if cmp < 0 || (cmp == 0 && !isInclusive) {
isDead = true
} else if stepC.Sign() >= 0 {
// Start > Limit, but step is positive. Diverges.
// Start >= Limit, but step is positive. Diverges.
loop.TripCount = &SCEVUnknown{Value: nil}
return
}
Expand Down Expand Up @@ -812,3 +816,5 @@ func negateSCEV(s SCEV) SCEV {
return &SCEVGenericExpr{Op: token.MUL, X: s, Y: &SCEVConstant{Value: big.NewInt(-1)}}
}
}


32 changes: 32 additions & 0 deletions pkg/analysis/loop/scev_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,36 @@ func TestTripCountLogic(t *testing.T) {
funcName: "count",
expected: -1, // Unknown
},
{
name: "Inclusive equality up (runs once)",
src: `package main
func count() {
// Start=5, Limit=5, "i <= 5": runs exactly once, not dead.
for i := 5; i <= 5; i++ { }
}`,
funcName: "count",
expected: 1,
},
{
name: "Inclusive equality down (runs once)",
src: `package main
func count() {
// Start=5, Limit=5, "i >= 5": runs exactly once, not dead.
for i := 5; i >= 5; i-- { }
}`,
funcName: "count",
expected: 1,
},
{
name: "Exclusive equality up (dead)",
src: `package main
func count() {
// Start=5, Limit=5, "i < 5": false on entry, dead.
for i := 5; i < 5; i++ { }
}`,
funcName: "count",
expected: 0,
},
}

for _, tc := range tests {
Expand Down Expand Up @@ -247,3 +277,5 @@ func TestFloatExclusion(t *testing.T) {
t.Errorf("Expected 0 IVs for float loop, got %d", len(l.Inductions))
}
}


56 changes: 53 additions & 3 deletions pkg/analysis/topology/obfuscation.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,10 @@ func AnalyzeObfuscation(t *FunctionTopology) ObfuscationProfile {
}

// 4. Indirect / reflective call ratio from the call signatures the topology
// already collected. extractCallSignature tags these "dynamic:" / "reflect:".
// already collected. extractCallSignature tags these "dynamic:" / "invoke:";
// reflection invocation emits the dot-form "reflect.Call" (see
// isIndirectCallSig for the verified form set and why "reflect:" alone was
// insufficient).
totalCalls, indirectCalls := 0, 0
for sig, n := range t.CallSignatures {
totalCalls += n
Expand Down Expand Up @@ -398,12 +401,57 @@ func structuralConstOperands(instr ssa.Instruction) map[*ssa.Value]bool {
// "go:invoke:T.M", "defer:invoke:T.M") are NOT matched — only the bare
// "invoke:" prefix and "go:dynamic:"/"defer:dynamic:" are. Pinned by
// TestIsIndirectCallSig; closing the gap should update that test deliberately.
// isIndirectCallSig reports whether a call signature represents indirect or
// reflective dispatch -- the calls whose target is not statically a named
// function and which therefore evade call-target detection.
//
// SIGNATURE FORMS ARE EMPIRICALLY VERIFIED against topology.ExtractTopology on
// real SSA built with ssa.InstantiateGenerics (the mode both ir/builder.go and
// scan.go use). Do not infer forms from the emitter source alone -- the
// optimizing SSA build resolves some calls differently than NaiveForm would.
//
// - "dynamic:" raw function-pointer call. VERIFIED.
// - "invoke:" interface method dispatch. VERIFIED.
// - "go:dynamic:" dynamic call spawned in a goroutine.
// - "defer:dynamic:" dynamic call in a defer.
// - reflect invocation: reflect.Value.Call and friends. VERIFIED to emit the
// DOT-form "reflect.Call" / "reflect.MethodByName" via extractFunctionSig
// (<pkg>.<func>), NOT the colon-form "reflect:Call". The colon-form branch
// in extractCallSignature is UNREACHABLE under InstantiateGenerics (verified
// by exhausting method-value / method-expression / stored-value shapes), so
// matching only "reflect:" silently MISSED all reflection dispatch and
// under-counted IndirectCallRatio to zero for reflection-hidden capabilities
// -- precisely the evasion technique the signal exists to catch. Both forms
// are now matched; the colon-form is retained for loader-independence.
//
// Only the reflect *invocation* entry points count as indirect dispatch.
// reflect.TypeOf / reflect.ValueOf and similar introspection helpers also emit
// "reflect.<Func>" but are ordinary direct calls, not dynamic dispatch, so they
// are matched by name, not by a blanket "reflect." prefix (which would also
// capture a user package named reflect).
func isIndirectCallSig(sig string) bool {
return hasPrefix(sig, "dynamic:") ||
hasPrefix(sig, "reflect:") ||
hasPrefix(sig, "reflect:") || // colon-form: unreachable under this loader, kept defensively
hasPrefix(sig, "invoke:") ||
hasPrefix(sig, "go:dynamic:") ||
hasPrefix(sig, "defer:dynamic:")
hasPrefix(sig, "defer:dynamic:") ||
isReflectInvokeSig(sig)
}

// reflectInvokeMethods are the reflect.Value methods that perform DYNAMIC
// INVOCATION of an underlying callable -- the reflection equivalent of an
// indirect call. Introspection methods (Kind, Type, Field, Len, ...) are
// deliberately excluded: they do not dispatch a call. Emitted dot-form is
// "reflect.<Method>".
var reflectInvokeMethods = map[string]bool{
"reflect.Call": true,
"reflect.CallSlice": true,
"reflect.MethodByName": true, // resolves a method dynamically; its result is invoked
"reflect.Method": true,
}

func isReflectInvokeSig(sig string) bool {
return reflectInvokeMethods[sig]
}

// flatteningScore estimates control-flow-flattening from the dispatcher's
Expand Down Expand Up @@ -548,3 +596,5 @@ func totalStringLen(ss []string) int {
func hasPrefix(s, prefix string) bool {
return len(s) >= len(prefix) && s[:len(prefix)] == prefix
}


31 changes: 29 additions & 2 deletions pkg/analysis/topology/obfuscation_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,38 @@ func TestHasPrefix(t *testing.T) {

func TestIsIndirectCallSig(t *testing.T) {
t.Parallel()
indirect := []string{"dynamic:func()", "reflect:Call", "invoke:T.M", "go:dynamic:x", "defer:dynamic:x"}
indirect := []string{
"dynamic:func()",
"invoke:T.M",
"go:dynamic:x",
"defer:dynamic:x",
// Reflection invocation: the DOT-form is what ExtractTopology actually
// emits under InstantiateGenerics (verified against real SSA). Matching
// only the colon-form previously let all reflection dispatch escape the
// IndirectCallRatio, under-counting obfuscation for reflection-hidden
// capabilities. Both forms must match.
"reflect.Call",
"reflect.CallSlice",
"reflect.MethodByName",
"reflect:Call", // colon-form: unreachable under this loader, matched defensively
}
for _, s := range indirect {
if !isIndirectCallSig(s) {
t.Errorf("isIndirectCallSig(%q)=false, want true", s)
}
}
direct := []string{"net.Dial", "builtin:println", "closure:func()", "", "call:unknown"}
direct := []string{
"net.Dial",
"builtin:println",
"closure:func()",
"",
"call:unknown",
// reflect introspection helpers emit "reflect.<Func>" too but are
// ordinary direct calls, NOT dynamic dispatch -- they must NOT count.
"reflect.TypeOf",
"reflect.ValueOf",
"reflect.DeepEqual",
}
for _, s := range direct {
if isIndirectCallSig(s) {
t.Errorf("isIndirectCallSig(%q)=true, want false", s)
Expand Down Expand Up @@ -272,3 +297,5 @@ func FuzzMaxWindowEntropy(f *testing.F) {
}
})
}


Loading
Loading