From fe808f73956bc4a9b3c1a3c843d73e1ac3d70481 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 18 Oct 2024 17:36:17 +0100 Subject: [PATCH] Switch to implementation using a universal read-only ContentSet --- .../go/dataflow/internal/DataFlowPrivate.qll | 113 +++++------------- .../go/dataflow/internal/DataFlowUtil.qll | 54 ++++++++- .../go/dataflow/internal/FlowSummaryImpl.qll | 42 +++---- .../dataflow/internal/TaintTrackingUtil.qll | 31 ++++- .../semmle/go/frameworks/stdlib/NetHttp.qll | 2 +- .../frameworks/serialization/texttemplate.go | 5 +- 6 files changed, 135 insertions(+), 112 deletions(-) diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll index 6fe005385b24b..f9e569089befb 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll @@ -143,73 +143,28 @@ predicate jumpStep(Node n1, Node n2) { * Thus, `node2` references an object with a content `x` that contains the * value of `node1`. */ -predicate storeStep(Node node1, ContentSet c, Node node2) { - // a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`, - // which in turn flows into the pointer content of `p` - exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) | - node1 = rhs and - node2.(PostUpdateNode).getPreUpdateNode() = base and - c = any(DataFlow::FieldContent fc | fc.getField() = f) +predicate storeStep(Node node1, ContentSet cs, Node node2) { + exists(Content c | cs.asOneContent() = c | + // a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`, + // which in turn flows into the pointer content of `p` + exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) | + node1 = rhs and + node2.(PostUpdateNode).getPreUpdateNode() = base and + c = any(DataFlow::FieldContent fc | fc.getField() = f) + or + node1 = base and + node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and + c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType()) + ) or - node1 = base and - node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and + node1 = node2.(AddressOperationNode).getOperand() and c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType()) + or + containerStoreStep(node1, node2, c) ) or - node1 = node2.(AddressOperationNode).getOperand() and - c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType()) - or - FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c, + FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), cs, node2.(FlowSummaryNode).getSummaryNode()) - or - containerStoreStep(node1, node2, c) -} - -/** - * Gets a `DataFlow::ContentSet` containing a single `Content` appropriate - * for reading a field, element, map value or channel message of type `containerType`. - */ -DataFlow::ContentSet getContentForType(Type containerType) { - containerType instanceof ArrayType and - result instanceof DataFlow::ArrayContent - or - containerType instanceof SliceType and - result instanceof DataFlow::ArrayContent - or - containerType instanceof ChanType and - result instanceof DataFlow::CollectionContent - or - containerType instanceof MapType and - result instanceof DataFlow::MapValueContent - or - result.(DataFlow::PointerContent).getPointerType() = containerType - or - exists(Field f | f = containerType.(StructType).getField(_) | - result.(DataFlow::FieldContent).getField() = f - ) -} - -/** - * Gets the type of an array/slice element, channel value, map value, - * pointer base type or named-type underlying type relating to `containerType`. - */ -Type getElementType(Type containerType) { - result = containerType.(ArrayType).getElementType() or - result = containerType.(SliceType).getElementType() or - result = containerType.(ChanType).getElementType() or - result = containerType.(MapType).getValueType() or - result = containerType.(PointerType).getPointerType() or - result = containerType.(NamedType).getUnderlyingType() -} - -/** - * Gets the type of an array/slice element, channel value, map value, - * pointer base type, named-type underlying type or struct field type - * relating to `containerType`. - */ -Type getAnElementOrFieldType(Type containerType) { - result = getElementType(containerType) or - result = containerType.(StructType).getField(_).getType() } /** @@ -217,28 +172,26 @@ Type getAnElementOrFieldType(Type containerType) { * Thus, `node1` references an object with a content `c` whose value ends up in * `node2`. */ -predicate readStep(Node node1, ContentSet c, Node node2) { - node1 = node2.(PointerDereferenceNode).getOperand() and - c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType()) - or - exists(FieldReadNode read | - node2 = read and - node1 = read.getBase() and - c = any(DataFlow::FieldContent fc | fc.getField() = read.getField()) +predicate readStep(Node node1, ContentSet cs, Node node2) { + exists(Content c | cs.asOneContent() = c | + node1 = node2.(PointerDereferenceNode).getOperand() and + c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType()) + or + exists(FieldReadNode read | + node2 = read and + node1 = read.getBase() and + c = any(DataFlow::FieldContent fc | fc.getField() = read.getField()) + ) + or + containerReadStep(node1, node2, c) ) or - FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c, + FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), cs, node2.(FlowSummaryNode).getSummaryNode()) or - containerReadStep(node1, node2, c) - or - exists(Type containerType | - any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and - getAnElementOrFieldType*(node1.getType()) = containerType - | - c = getContentForType(containerType) and - node1 = node2 - ) + any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and + cs.isUniversalContent() and + node1 = node2 } /** diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll index 3d66f4b96e073..c748a1c138aac 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -182,6 +182,13 @@ class Content extends TContent { ) { filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0 } + + /** + * Gets the `ContentSet` contaning only this content. + */ + ContentSet asContentSet() { + result.asOneContent() = this + } } /** A reference through a field. */ @@ -249,21 +256,35 @@ class SyntheticFieldContent extends Content, TSyntheticFieldContent { override string toString() { result = s.toString() } } +private newtype TContentSet = + TOneContent(Content c) or + TAllContent() + /** * An entity that represents a set of `Content`s. * * The set may be interpreted differently depending on whether it is * stored into (`getAStoreContent`) or read from (`getAReadContent`). */ -class ContentSet instanceof Content { +class ContentSet instanceof TContentSet { /** Gets a content that may be stored into when storing into this set. */ - Content getAStoreContent() { result = this } + Content getAStoreContent() { + this = TOneContent(result) + } /** Gets a content that may be read from when reading from this set. */ - Content getAReadContent() { result = this } + Content getAReadContent() { + this = TOneContent(result) + or + this = TAllContent() and result = any(Content c) + } /** Gets a textual representation of this content set. */ - string toString() { result = super.toString() } + string toString() { + exists(Content c | this = TOneContent(c) | result = c.toString()) + or + this = TAllContent() and result = "all content" + } /** * Holds if this element is at the specified location. @@ -275,7 +296,30 @@ class ContentSet instanceof Content { predicate hasLocationInfo( string filepath, int startline, int startcolumn, int endline, int endcolumn ) { - super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + exists(Content c | this = TOneContent(c) | + c.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) + or + this = TAllContent() and + filepath = "" and + startline = 0 and + startcolumn = 0 and + endline = 0 and + endcolumn = 0 + } + + /** + * If this is a singleton content set, returns the content. + */ + Content asOneContent() { + this = TOneContent(result) + } + + /** + * Holds if this is a universal content set. + */ + predicate isUniversalContent() { + this = TAllContent() } } diff --git a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll index 211974f75a56c..d6c25ac679eb3 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll @@ -55,26 +55,28 @@ module Input implements InputSig { } string encodeContent(ContentSet cs, string arg) { - exists(Field f, string package, string className, string fieldName | - f = cs.(FieldContent).getField() and - f.hasQualifiedName(package, className, fieldName) and - result = "Field" and - arg = package + "." + className + "." + fieldName - ) - or - exists(SyntheticField f | - f = cs.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f + exists(Content c | cs.asOneContent() = c | + exists(Field f, string package, string className, string fieldName | + f = c.(FieldContent).getField() and + f.hasQualifiedName(package, className, fieldName) and + result = "Field" and + arg = package + "." + className + "." + fieldName + ) + or + exists(SyntheticField f | + f = c.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f + ) + or + c instanceof ArrayContent and result = "ArrayElement" and arg = "" + or + c instanceof CollectionContent and result = "Element" and arg = "" + or + c instanceof MapKeyContent and result = "MapKey" and arg = "" + or + c instanceof MapValueContent and result = "MapValue" and arg = "" + or + c instanceof PointerContent and result = "Dereference" and arg = "" ) - or - cs instanceof ArrayContent and result = "ArrayElement" and arg = "" - or - cs instanceof CollectionContent and result = "Element" and arg = "" - or - cs instanceof MapKeyContent and result = "MapKey" and arg = "" - or - cs instanceof MapValueContent and result = "MapValue" and arg = "" - or - cs instanceof PointerContent and result = "Dereference" and arg = "" } bindingset[token] @@ -339,7 +341,7 @@ module Private { SummaryComponent qualifier() { result = argument(-1) } /** Gets a summary component for field `f`. */ - SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) } + SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f).asContentSet()) } /** Gets a summary component that represents the return value of a call. */ SummaryComponent return() { result = SC::return(_) } diff --git a/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll index 50a632af3352f..d02329d6aa231 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll @@ -34,17 +34,38 @@ predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) { FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(src, sink, _) } +private Type getElementType(Type containerType) { + result = containerType.(ArrayType).getElementType() or + result = containerType.(SliceType).getElementType() or + result = containerType.(ChanType).getElementType() or + result = containerType.(MapType).getValueType() or + result = containerType.(PointerType).getPointerType() +} + /** * Holds if default `TaintTracking::Configuration`s should allow implicit reads * of `c` at sinks and inputs to additional taint steps. */ bindingset[node] -predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { - exists(Type containerType | +predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet cs) { + exists(Type containerType, DataFlow::Content c | node instanceof DataFlow::ArgumentNode and - DataFlowPrivate::getElementType*(node.getType()) = containerType + getElementType*(node.getType()) = containerType and + cs.asOneContent() = c | - c = DataFlowPrivate::getContentForType(containerType) + containerType instanceof ArrayType and + c instanceof DataFlow::ArrayContent + or + containerType instanceof SliceType and + c instanceof DataFlow::ArrayContent + or + containerType instanceof ChanType and + c instanceof DataFlow::CollectionContent + or + containerType instanceof MapType and + c instanceof DataFlow::MapValueContent + or + c.(DataFlow::PointerContent).getPointerType() = containerType ) } @@ -122,7 +143,7 @@ predicate elementWriteStep(DataFlow::Node pred, DataFlow::Node succ) { any(DataFlow::Write w).writesElement(succ.(DataFlow::PostUpdateNode).getPreUpdateNode(), _, pred) or FlowSummaryImpl::Private::Steps::summaryStoreStep(pred.(DataFlowPrivate::FlowSummaryNode) - .getSummaryNode(), any(DataFlow::Content c | c instanceof DataFlow::ArrayContent), + .getSummaryNode(), any(DataFlow::ArrayContent ac).asContentSet(), succ.(DataFlowPrivate::FlowSummaryNode).getSummaryNode()) } diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll index deb366b4cdaa6..dded5092bcc3b 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll @@ -122,7 +122,7 @@ module NetHttp { | lastParamIndex = call.getCall().getCalleeType().getNumParameter() - 1 and varArgsSliceArgument = SummaryComponentStack::argument(lastParamIndex) and - arrayContentSC = SummaryComponent::content(arrayContent) and + arrayContentSC = SummaryComponent::content(arrayContent.asContentSet()) and stack = SummaryComponentStack::push(arrayContentSC, varArgsSliceArgument) ) else stack = SummaryComponentStack::argument(n) diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go index 85a1372752ff8..3c94020c60d46 100644 --- a/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go +++ b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go @@ -43,7 +43,10 @@ func test() { buff1.Read(bytes1) sink(bytes1) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 - tmpl.Execute(buff2, toSerialize) + // Read `buff2` via an `any`-typed variable, to ensure the static type of the argument to tmpl.Execute makes no difference to the result + var toSerializeAsAny any + toSerializeAsAny = toSerialize + tmpl.Execute(buff2, toSerializeAsAny) buff2.Read(bytes2) sink(bytes2) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3