Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Go: template/text.Template execution methods: support reading arbitrary content #17701

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions go/ql/lib/change-notes/2024-12-16-any-content-readers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
* By implementing `ImplicitFieldReadNode` it is now possible to declare a dataflow node that reads any content (fields, array members, map keys and values). For example, this is appropriate for modelling a serialization method that flattens a potentially deep data structure into a string or byte array.
* The `Template.Execute[Template]` methods of the `text/template` package now correctly convey taint from any nested fields to their result. This may produce more results from any taint-tracking query when the `text/template` package is in use.
4 changes: 2 additions & 2 deletions go/ql/lib/ext/text.template.model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ extensions:
- ["text/template", "", False, "HTMLEscapeString", "", "", "Argument[0]", "ReturnValue", "taint", "manual"]
- ["text/template", "", False, "JSEscape", "", "", "Argument[1]", "Argument[0]", "taint", "manual"]
- ["text/template", "", False, "JSEscapeString", "", "", "Argument[0]", "ReturnValue", "taint", "manual"]
- ["text/template", "Template", True, "Execute", "", "", "Argument[1]", "Argument[0]", "taint", "manual"]
- ["text/template", "Template", True, "ExecuteTemplate", "", "", "Argument[2]", "Argument[0]", "taint", "manual"]
# - ["text/template", "Template", True, "Execute", "", "", "Argument[1]", "Argument[0]", "taint", "manual"] # Implemented in QL to provide an arbitrary content read from the input.
# - ["text/template", "Template", True, "ExecuteTemplate", "", "", "Argument[2]", "Argument[0]", "taint", "manual"] # Implemented in QL to provide an arbitrary content read from the input.
58 changes: 33 additions & 25 deletions go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -143,47 +143,55 @@ predicate jumpStep(Node n1, Node n2) {
* Thus, `node2` references an object with a content `x` that contains the
* value of `node1`.
*/
predicate storeStep(Node node1, ContentSet c, Node node2) {
// a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`,
// which in turn flows into the pointer content of `p`
exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) |
node1 = rhs and
node2.(PostUpdateNode).getPreUpdateNode() = base and
c = any(DataFlow::FieldContent fc | fc.getField() = f)
predicate storeStep(Node node1, ContentSet cs, Node node2) {
exists(Content c | cs.asOneContent() = c |
// a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`,
// which in turn flows into the pointer content of `p`
exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) |
node1 = rhs and
node2.(PostUpdateNode).getPreUpdateNode() = base and
c = any(DataFlow::FieldContent fc | fc.getField() = f)
or
node1 = base and
node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType())
)
or
node1 = base and
node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and
node1 = node2.(AddressOperationNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType())
or
containerStoreStep(node1, node2, c)
)
or
node1 = node2.(AddressOperationNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType())
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c,
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), cs,
node2.(FlowSummaryNode).getSummaryNode())
or
containerStoreStep(node1, node2, c)
}

/**
* Holds if data can flow from `node1` to `node2` via a read of `c`.
* Thus, `node1` references an object with a content `c` whose value ends up in
* `node2`.
*/
predicate readStep(Node node1, ContentSet c, Node node2) {
node1 = node2.(PointerDereferenceNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType())
or
exists(FieldReadNode read |
node2 = read and
node1 = read.getBase() and
c = any(DataFlow::FieldContent fc | fc.getField() = read.getField())
predicate readStep(Node node1, ContentSet cs, Node node2) {
exists(Content c | cs.asOneContent() = c |
node1 = node2.(PointerDereferenceNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType())
or
exists(FieldReadNode read |
node2 = read and
node1 = read.getBase() and
c = any(DataFlow::FieldContent fc | fc.getField() = read.getField())
)
or
containerReadStep(node1, node2, c)
)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c,
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), cs,
node2.(FlowSummaryNode).getSummaryNode())
or
containerReadStep(node1, node2, c)
any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and
cs.isUniversalContent() and
node1 = node2
}

/**
Expand Down
59 changes: 54 additions & 5 deletions go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ private import semmle.go.dataflow.FunctionInputsAndOutputs
private import semmle.go.dataflow.ExternalFlow
private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
private import codeql.util.Unit
import DataFlowNodes::Public

/**
Expand Down Expand Up @@ -50,6 +51,18 @@ abstract class FunctionModel extends Function {
}
}

/**
* A unit class for adding nodes that should implicitly read from all nested content.
*
* For example, this might be appropriate for the argument to a method that serializes a struct.
*/
class ImplicitFieldReadNode extends Unit {
/**
* Holds if the node `n` should implicitly read from all nested content in a taint-tracking context.
*/
abstract predicate shouldImplicitlyReadAllFields(DataFlow::Node n);
}

/**
* Gets the `Node` corresponding to `insn`.
*/
Expand Down Expand Up @@ -169,6 +182,11 @@ class Content extends TContent {
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}

/**
* Gets the `ContentSet` contaning only this content.
*/
ContentSet asContentSet() { result.asOneContent() = this }
}

/** A reference through a field. */
Expand Down Expand Up @@ -236,21 +254,33 @@ class SyntheticFieldContent extends Content, TSyntheticFieldContent {
override string toString() { result = s.toString() }
}

private newtype TContentSet =
TOneContent(Content c) or
TAllContent()

/**
* An entity that represents a set of `Content`s.
*
* The set may be interpreted differently depending on whether it is
* stored into (`getAStoreContent`) or read from (`getAReadContent`).
*/
class ContentSet instanceof Content {
class ContentSet instanceof TContentSet {
/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent() { result = this }
Content getAStoreContent() { this = TOneContent(result) }

/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() { result = this }
Content getAReadContent() {
this = TOneContent(result)
or
this = TAllContent() and exists(result)
}

/** Gets a textual representation of this content set. */
string toString() { result = super.toString() }
string toString() {
exists(Content c | this = TOneContent(c) | result = c.toString())
or
this = TAllContent() and result = "all content"
}

/**
* Holds if this element is at the specified location.
Expand All @@ -262,8 +292,27 @@ class ContentSet instanceof Content {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
exists(Content c | this = TOneContent(c) |
c.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
)
or
this = TAllContent() and
filepath = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}

/**
* If this is a singleton content set, returns the content.
*/
Content asOneContent() { this = TOneContent(result) }

/**
* Holds if this is a universal content set.
*/
predicate isUniversalContent() { this = TAllContent() }
}

/**
Expand Down
44 changes: 24 additions & 20 deletions go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -61,26 +61,28 @@ module Input implements InputSig<Location, DataFlowImplSpecific::GoDataFlow> {
}

string encodeContent(ContentSet cs, string arg) {
exists(Field f, string package, string className, string fieldName |
f = cs.(FieldContent).getField() and
f.hasQualifiedName(package, className, fieldName) and
result = "Field" and
arg = package + "." + className + "." + fieldName
)
or
exists(SyntheticField f |
f = cs.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f
exists(Content c | cs.asOneContent() = c |
exists(Field f, string package, string className, string fieldName |
f = c.(FieldContent).getField() and
f.hasQualifiedName(package, className, fieldName) and
result = "Field" and
arg = package + "." + className + "." + fieldName
)
or
exists(SyntheticField f |
f = c.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f
)
or
c instanceof ArrayContent and result = "ArrayElement" and arg = ""
or
c instanceof CollectionContent and result = "Element" and arg = ""
or
c instanceof MapKeyContent and result = "MapKey" and arg = ""
or
c instanceof MapValueContent and result = "MapValue" and arg = ""
or
c instanceof PointerContent and result = "Dereference" and arg = ""
)
or
cs instanceof ArrayContent and result = "ArrayElement" and arg = ""
or
cs instanceof CollectionContent and result = "Element" and arg = ""
or
cs instanceof MapKeyContent and result = "MapKey" and arg = ""
or
cs instanceof MapValueContent and result = "MapValue" and arg = ""
or
cs instanceof PointerContent and result = "Dereference" and arg = ""
}

bindingset[token]
Expand Down Expand Up @@ -523,7 +525,9 @@ module Private {
SummaryComponent qualifier() { result = argument(-1) }

/** Gets a summary component for field `f`. */
SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) }
SummaryComponent field(Field f) {
result = content(any(FieldContent c | c.getField() = f).asContentSet())
}

/** Gets a summary component that represents the return value of a call. */
SummaryComponent return() { result = SC::return(_) }
Expand Down
9 changes: 5 additions & 4 deletions go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,11 @@ private Type getElementType(Type containerType) {
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) {
exists(Type containerType |
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet cs) {
exists(Type containerType, DataFlow::Content c |
node instanceof DataFlow::ArgumentNode and
getElementType*(node.getType()) = containerType
getElementType*(node.getType()) = containerType and
cs.asOneContent() = c
|
containerType instanceof ArrayType and
c instanceof DataFlow::ArrayContent
Expand Down Expand Up @@ -142,7 +143,7 @@ predicate elementWriteStep(DataFlow::Node pred, DataFlow::Node succ) {
any(DataFlow::Write w).writesElement(succ.(DataFlow::PostUpdateNode).getPreUpdateNode(), _, pred)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(pred.(DataFlowPrivate::FlowSummaryNode)
.getSummaryNode(), any(DataFlow::Content c | c instanceof DataFlow::ArrayContent),
.getSummaryNode(), any(DataFlow::ArrayContent ac).asContentSet(),
succ.(DataFlowPrivate::FlowSummaryNode).getSummaryNode())
}

Expand Down
2 changes: 1 addition & 1 deletion go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ module NetHttp {
|
lastParamIndex = call.getCall().getCalleeType().getNumParameter() - 1 and
varArgsSliceArgument = SummaryComponentStack::argument(lastParamIndex) and
arrayContentSC = SummaryComponent::content(arrayContent) and
arrayContentSC = SummaryComponent::content(arrayContent.asContentSet()) and
stack = SummaryComponentStack::push(arrayContentSC, varArgsSliceArgument)
)
else stack = SummaryComponentStack::argument(n)
Expand Down
41 changes: 41 additions & 0 deletions go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,45 @@ module TextTemplate {
input = inp and output = outp
}
}

private class ExecuteTemplateMethod extends Method {
int inputArg;

ExecuteTemplateMethod() {
exists(string name |
this.hasQualifiedName("text/template", "Template", name) and
(
name = "Execute" and inputArg = 1
or
name = "ExecuteTemplate" and inputArg = 2
)
)
}

int getInputArgIdx() { result = inputArg }
}

private class ExecuteTemplateFieldReader extends DataFlow::ImplicitFieldReadNode {
override predicate shouldImplicitlyReadAllFields(DataFlow::Node n) {
exists(ExecuteTemplateMethod m, DataFlow::MethodCallNode cn |
cn.getTarget() = m and
n = cn.getArgument(m.getInputArgIdx())
)
}
}

private class ExecuteTemplateFunctionModels extends TaintTracking::FunctionModel,
ExecuteTemplateMethod
{
FunctionInput inp;
FunctionOutput outp;

ExecuteTemplateFunctionModels() {
inp.isParameter(this.getInputArgIdx()) and outp.isParameter(0)
}

override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
input = inp and output = outp
}
}
owen-mc marked this conversation as resolved.
Show resolved Hide resolved
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import go
import utils.test.InlineFlowTest

string getArgString(DataFlow::Node src, DataFlow::Node sink) {
exists(sink) and
result = src.(DataFlow::CallNode).getArgument(0).getExactValue()
}

import TaintFlowTestArgString<DefaultFlowConfig, getArgString/2>
Loading