diff --git a/.gitignore b/.gitignore index 53362a6..1ae93e9 100644 --- a/.gitignore +++ b/.gitignore @@ -62,4 +62,7 @@ target/ # Automatically-built grammar tokens *.tokens +# PyCharm +.idea/ + MANIFEST diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..73ca350 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,7 @@ +[settings] +check=1 +diff=1 +known_third_party=antlr4,enum,dateutil,six,stix2patterns,typing +known_first_party=stix2matcher +not_skip=__init__.py +force_sort_within_sections=1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..201a248 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +- repo: https://github.com/pre-commit/pre-commit-hooks + sha: ea227f024bd89d638aea319c92806737e3375979 + hooks: + - id: trailing-whitespace + - id: flake8 + args: + - --max-line-length=160 + - id: check-merge-conflict +- repo: https://github.com/FalconSocial/pre-commit-python-sorter + sha: b57843b0b874df1d16eb0bef00b868792cb245c2 + hooks: + - id: python-import-sorter diff --git a/.travis.yml b/.travis.yml index 896f07d..f2d2ae9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,18 @@ sudo: false language: python +cache: pip python: - "2.7" - "3.3" - "3.4" - "3.5" + - "3.6" install: - pip install -U pip setuptools - - pip install tox-travis -script: tox + - pip install tox-travis pre-commit + - pip install codecov +script: + - tox + - if [[ $TRAVIS_PYTHON_VERSION != 2.6 ]]; then pre-commit run --all-files; fi +after_success: + - codecov diff --git a/Notes.md b/Notes.md index 56a92a9..d91f0fd 100644 --- a/Notes.md +++ b/Notes.md @@ -1,15 +1,15 @@ # Implementation Notes -This document gives some additional information regarding how the matcher is +This document gives some additional information regarding how the matcher is implemented, and some usage caveats. The emphasis in this implementation is on -simplicity, clarity, and correctness, in order to help people understand the +simplicity, clarity, and correctness, in order to help people understand the STIX pattern language. It is not designed for "real" high-intensity usage. ## Background Matching a pattern is equivalent to finding a set of *bindings*. A binding -is a mapping from observation expressions in the pattern to observations. The -matcher does its job in a single traversal of the pattern parse tree. It +is a mapping from observation expressions in the pattern to observations. The +matcher does its job in a single traversal of the pattern parse tree. It maintains a list of candidate bindings and other state, pruning away those which don't work as it goes. At the end, the result is a list of many bindings, not just the first one found. @@ -17,7 +17,7 @@ just the first one found. ## Caveats - **The number of bindings found can be large.** If each observation -expression matches a lot of observations, the number of bindings can grow +expression matches a lot of observations, the number of bindings can grow exponentially. In the worst case, if N observations match N observation expressions (i.e. every observation expression matches every observation), and only the `AND` observation operator is used, that's N! different possible @@ -42,9 +42,9 @@ first, then temporal filtering occurs second. They don't occur at the same time. So you could still get large growth in the number of bindings in the first step. A more clever implementation could do both at the same time. -- **"Creative" use of references can cause large memory usage.** This is a -corner case and unlikely to occur in a real pattern, but it's interesting. It's -especially easy to demonstrate with circular references. Consider the +- **"Creative" use of references can cause large memory usage.** This is a +corner case and unlikely to occur in a real pattern, but it's interesting. It's +especially easy to demonstrate with circular references. Consider the pattern: `[foo:some_refs[*].some_refs[*].some_refs[*].some_refs[*].size > 100]` and the observation data: ```json @@ -67,4 +67,4 @@ and the observation data: replaced with the same objects, which have two references apiece, etc. Also, because of the `[*]` index steps, nothing gets pruned away; all paths must be considered. This is exponential growth. - + diff --git a/README.md b/README.md index c153e2a..f378f1d 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ source files. (The .jar file is not needed for normal use of the validator). ```bash $ java -jar "/path/to/antlr-4.7-complete.jar" -Dlanguage=Python2 STIXPattern.g4 -o /path/to/cti-pattern-matcher/stix2matcher/grammars ``` - + 5. Commit the resulting files to git. ## Governance diff --git a/requirements.txt b/requirements.txt index b1c8676..605bc54 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,9 @@ -pytest==3.2.0 -tox==2.7.0 +bumpversion +pre-commit +pytest +pytest-cov +sphinx +sphinx-prompt +tox -e . diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..0d77e8b --- /dev/null +++ b/setup.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 0.1.0 +commit = True +tag = True + +[bumpversion:file:setup.py] + +[bdist_wheel] +universal = 1 + diff --git a/setup.py b/setup.py index 15f906f..f5de9c8 100644 --- a/setup.py +++ b/setup.py @@ -1,21 +1,21 @@ -from setuptools import setup, find_packages +from setuptools import find_packages, setup setup( name='stix2-matcher', - version="0.1.0", + version='0.1.0', packages=find_packages(), description='Match STIX content against STIX patterns', install_requires=[ - "antlr4-python2-runtime==4.7 ; python_version < '3'", - "antlr4-python3-runtime==4.7 ; python_version >= '3'", - 'typing ; python_version<"3.5" and python_version>="3"', - "enum34 ; python_version ~= '3.3.0'", - "python-dateutil", - "six", - "stix2-patterns>=0.5.0", + 'antlr4-python2-runtime==4.7 ; python_version < "3"', + 'antlr4-python3-runtime==4.7 ; python_version >= "3"', + 'enum34 ; python_version ~= "3.3.0"', + 'python-dateutil', + 'six', + 'stix2-patterns>=0.5.0', + 'typing ; python_version < "3.5" and python_version >= "3"', ], tests_require=[ - "pytest>=2.9.2" + 'pytest>=2.9.2' ], entry_points={ 'console_scripts': [ @@ -31,5 +31,6 @@ 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', ], ) diff --git a/stix2matcher/matcher.py b/stix2matcher/matcher.py index 859534d..868792a 100644 --- a/stix2matcher/matcher.py +++ b/stix2matcher/matcher.py @@ -4,28 +4,26 @@ import base64 import binascii import datetime -import dateutil.relativedelta -import dateutil.tz import io import itertools import json import operator import pprint import re -import six import socket import struct import sys import unicodedata import antlr4 -import antlr4.error.Errors import antlr4.error.ErrorListener - -import stix2patterns.pattern +import antlr4.error.Errors +import dateutil.relativedelta +import dateutil.tz +import six from stix2patterns.grammars.STIXPatternListener import STIXPatternListener from stix2patterns.grammars.STIXPatternParser import STIXPatternParser - +import stix2patterns.pattern # Example observed-data SDO. This represents N observations, where N is # the value of the "number_observed" property (in this case, 5). diff --git a/stix2matcher/test/test_basic_ops.py b/stix2matcher/test/test_basic_ops.py index 7021295..11e34ba 100644 --- a/stix2matcher/test/test_basic_ops.py +++ b/stix2matcher/test/test_basic_ops.py @@ -1,7 +1,5 @@ import pytest - -from stix2matcher.matcher import match, MatcherException - +from stix2matcher.matcher import MatcherException, match _observations = [ { diff --git a/stix2matcher/test/test_binary.py b/stix2matcher/test/test_binary.py index df8b192..fb1d79e 100644 --- a/stix2matcher/test/test_binary.py +++ b/stix2matcher/test/test_binary.py @@ -1,8 +1,6 @@ import pytest - from stix2matcher.matcher import match - _observations = [ { "type": "observed-data", diff --git a/stix2matcher/test/test_comparison_exprs.py b/stix2matcher/test/test_comparison_exprs.py index 22722d3..068465c 100644 --- a/stix2matcher/test/test_comparison_exprs.py +++ b/stix2matcher/test/test_comparison_exprs.py @@ -1,8 +1,6 @@ import pytest - from stix2matcher.matcher import match - _observations = [ { "type": "observed-data", diff --git a/stix2matcher/test/test_complex.py b/stix2matcher/test/test_complex.py index 07f46b3..efeaec9 100644 --- a/stix2matcher/test/test_complex.py +++ b/stix2matcher/test/test_complex.py @@ -1,5 +1,4 @@ import pytest - from stix2matcher.matcher import match _observations = [ diff --git a/stix2matcher/test/test_matching_sdos.py b/stix2matcher/test/test_matching_sdos.py index d002354..7613442 100644 --- a/stix2matcher/test/test_matching_sdos.py +++ b/stix2matcher/test/test_matching_sdos.py @@ -1,9 +1,8 @@ import six -import pytest +import pytest from stix2matcher.matcher import match - _observations = [ { "id": "observed-data--a49751b8-b041-4c00-96c2-76af472bfbbe", diff --git a/stix2matcher/test/test_null.py b/stix2matcher/test/test_null.py index d61c1ab..8946005 100644 --- a/stix2matcher/test/test_null.py +++ b/stix2matcher/test/test_null.py @@ -1,8 +1,8 @@ -import pytest - -from stix2matcher.matcher import match, MatcherException from stix2patterns.pattern import ParseException +import pytest +from stix2matcher.matcher import match + _observations = [ { "type": "observed-data", diff --git a/stix2matcher/test/test_object_path_quoting.py b/stix2matcher/test/test_object_path_quoting.py index 900af65..cfaba28 100644 --- a/stix2matcher/test/test_object_path_quoting.py +++ b/stix2matcher/test/test_object_path_quoting.py @@ -1,7 +1,7 @@ -import pytest +from stix2patterns.pattern import ParseException +import pytest from stix2matcher.matcher import match -from stix2patterns.pattern import ParseException _observations = [ { diff --git a/stix2matcher/test/test_observation_exprs.py b/stix2matcher/test/test_observation_exprs.py index d6503ee..6b31a4f 100644 --- a/stix2matcher/test/test_observation_exprs.py +++ b/stix2matcher/test/test_observation_exprs.py @@ -1,5 +1,4 @@ import pytest - from stix2matcher.matcher import match _observations = [ diff --git a/stix2matcher/test/test_references.py b/stix2matcher/test/test_references.py index f3b7ee5..7de4498 100644 --- a/stix2matcher/test/test_references.py +++ b/stix2matcher/test/test_references.py @@ -1,5 +1,4 @@ import pytest - from stix2matcher.matcher import match _observations = [ diff --git a/stix2matcher/test/test_temporal_qualifiers.py b/stix2matcher/test/test_temporal_qualifiers.py index c840625..bb124ad 100644 --- a/stix2matcher/test/test_temporal_qualifiers.py +++ b/stix2matcher/test/test_temporal_qualifiers.py @@ -1,16 +1,13 @@ -import pytest - -from stix2matcher.matcher import match, MatcherException from stix2patterns.pattern import ParseException # I'll specially test some critical internal time-interval related code, # since it's easier to test it separately than create lots of SDOs and # patterns. -from stix2matcher.matcher import (_overlap, _OVERLAP_NONE, _OVERLAP, +import pytest +from stix2matcher.matcher import (_OVERLAP, _OVERLAP_NONE, _OVERLAP_TOUCH_INNER, _OVERLAP_TOUCH_OUTER, - _OVERLAP_TOUCH_POINT) -from stix2matcher.matcher import _timestamp_intervals_within - + _OVERLAP_TOUCH_POINT, MatcherException, + _overlap, _timestamp_intervals_within, match) _observations = [ { diff --git a/stix2matcher/test/test_timestamps.py b/stix2matcher/test/test_timestamps.py index 5d4decd..4a03b0f 100644 --- a/stix2matcher/test/test_timestamps.py +++ b/stix2matcher/test/test_timestamps.py @@ -1,8 +1,8 @@ -import pytest - -from stix2matcher.matcher import match, MatcherException from stix2patterns.pattern import ParseException +import pytest +from stix2matcher.matcher import MatcherException, match + _observations = [ { "type": "observed-data", diff --git a/stix2matcher/test/test_unicode_normalization.py b/stix2matcher/test/test_unicode_normalization.py index 410f688..dbf23ce 100644 --- a/stix2matcher/test/test_unicode_normalization.py +++ b/stix2matcher/test/test_unicode_normalization.py @@ -1,5 +1,4 @@ import pytest - from stix2matcher.matcher import match _observations = [ diff --git a/tox.ini b/tox.ini index b815ca0..3e311b8 100644 --- a/tox.ini +++ b/tox.ini @@ -1,22 +1,43 @@ [tox] -envlist = py27,py33,py34,py35,pycodestyle +envlist = py27,py33,py34,py35,py36,pycodestyle,isort-check [testenv] -deps = pytest -commands = py.test +deps = + -U + tox + pytest + pytest-cov + coverage +commands = + py.test --cov=stix2matcher stix2matcher/test/ --cov-report term-missing + +passenv = CI TRAVIS TRAVIS_* [testenv:pycodestyle] -deps = pycodestyle -commands = pycodestyle ./stix2matcher +deps = + pycodestyle + flake8 +commands = + pycodestyle ./stix2matcher + flake8 + +[testenv:isort-check] +deps = isort +commands = + isort -rc stix2matcher -df + isort -rc stix2matcher -c [pycodestyle] -ignore= max-line-length=160 exclude=grammars +[flake8] +max-line-length=160 + [travis] python = 2.7: py27, pycodestyle - 3.3: py33 - 3.4: py34 - 3.5: py35 + 3.3: py33, pycodestyle + 3.4: py34, pycodestyle + 3.5: py35, pycodestyle + 3.6: py36, pycodestyle