diff --git a/reproman/distributions/base.py b/reproman/distributions/base.py index 6f56083bb..c4bfa6577 100644 --- a/reproman/distributions/base.py +++ b/reproman/distributions/base.py @@ -46,8 +46,6 @@ class SpecObject(object): _diff_cmp_fields = tuple() # Fields of the primary interest when showing diff _diff_fields = tuple() - # Fields used in determination of comparison (satisfied_by and identical_to) - _comparison_fields = tuple() @property def _diff_cmp_id(self): @@ -60,12 +58,19 @@ def _diff_cmp_id(self): @property def _cmp_id(self): - if not self._comparison_fields: + if not self._diff_cmp_fields: # Might need to be gone or some custom exception raise RuntimeError( - "Cannot establish identity of %r since _comaprison_fields " + "Cannot establish identity of %r since _diff_cmp_fields " "are not defined" % self) - return tuple(getattr(self, a) for a in self._comparison_fields) + if not self._diff_fields: + # Might need to be gone or some custom exception + raise RuntimeError( + "Cannot establish identity of %r since _diff_fields " + "are not defined" % self) + vals = [ getattr(self, a) for a in self._diff_cmp_fields ] + vals.extend([ getattr(self, a) for a in self._diff_fields ]) + return tuple(vals) @property def _diff_vals(self): @@ -94,8 +99,8 @@ def diff_subidentity_string(self): @property def identity_string(self): - """like diff_identity_string, but for _comparison_fields (used in - satisfied_by comparisons) + """like diff_identity_string, but for both _diff_cmp_fields and + _diff_fields (used in satisfied_by comparisons) """ return " ".join(str(el) for el in self._cmp_id if el is not None) @@ -137,15 +142,15 @@ def _satisfied_by(self, other): spec object. We require that the values of the attributes given by - _comparison_fields are the same. A specobject with a value of None - for one of these attributes is less specific than one with - a specific value; the former cannot satisfy the latter, - but the latter can satisfy the former. + _diff_cmp_fields and _diff_fields are the same. A specobject + with a value of None for one of these attributes is less specific + than one with a specific value; the former cannot satisfy the + latter, but the latter can satisfy the former. TODO: Ensure we don't encounter the case where self is completely unspecified (all values are None), in which case satisfied_by() returns True by default. Perhaps this is done by making - sure that at least one of the _comparison_fields cannot be None. + sure that at least one of the _diff_cmp_fields cannot be None. TODO: derive _collection_type directly from _collection. This isn't possible at the moment because DebianDistribution.packages is @@ -162,9 +167,7 @@ def _satisfied_by(self, other): raise TypeError('don''t know how to determine if a %s is satisfied by a %s' % (self.__class__, other_collection_type)) if not isinstance(other, self.__class__): raise TypeError('incompatible specobject types') - for attr_name in self._comparison_fields: - self_value = getattr(self, attr_name) - other_value = getattr(other, attr_name) + for (self_value, other_value) in zip(self._cmp_id, other._cmp_id): if self_value is None: continue if self_value != other_value: @@ -176,14 +179,12 @@ def _identical_to(self, other): """Determine if the other object is identical to the spec object. We require that the objects are of the same type and that the - values of the attributes given by _comparison_fields are the same. + values of the attributes given by _diff_cmp_fields and _diff_fields + (dereferenced in _cmp_id) are the same. """ if not isinstance(other, self.__class__): return False - for attr_name in self._comparison_fields: - if getattr(self, attr_name) != getattr(other, attr_name): - return False - return True + return all(sv==ov for sv, ov in zip(self._cmp_id, other._cmp_id)) def _register_with_representer(cls): @@ -423,3 +424,101 @@ def _create_package(self, **package_fields): provided by _get_packagefields_for_files """ return + + +class SpecDiff: + + """Difference object for SpecObjects. + + Instantiate with SpecDiff(a, b). a and b must be of the same type + or TypeError is raised. Either (but not both) may be None. + + Attributes: + + a, b: The two objects being compared. + + If _diff_cmp_fields is defined for the SpecObjects: + + diff_cmp_id: The _diff_cmp_id of the two objects. If + _diff_cmp_id are different for the two objects, + they cannot be compared and ValueError is raised. + + diff_vals_a, diff_vals_b: _diff_vals for a and b, respectively, + or None if a or b is None. + + For collection SpecObjects (e.g. DebianDistribution, containing + DEBPackages; these have _collection_attribute defined), we also + have: + + collection: a list of SpecDiff objects for the contained + SpecObjects. + + a_only: SpecDiff objects from collection that only appear in the + first passed SpecObject + + b_only: SpecDiff objects from collection that only appear in the + second passed SpecObject + + a_and_b: SpecDiff objects in collection that appear in both + passed SpecObjects + + If a and b are lists, they are treated as files specifications, and + self.collection is a list of (fname_a, fname_b) tuples. + TODO: give files and file collections their own specobjects + """ + + def __init__(self, a, b): + if not isinstance(a, (SpecObject, list, type(None))) \ + or not isinstance(b, (SpecObject, list, type(None))): + raise TypeError('objects must be SpecObjects or None') + if a is None and b is None: + raise TypeError('objects cannot both be None') + if a and b and type(a) != type(b): + raise TypeError('objects must be of the same type') + self.cls = type(a) if a is not None else type(b) + self.a = a + self.b = b + if self.cls == list: + a_collection = set(a) + b_collection = set(b) + self.collection = [] + for fname in set(a_collection).union(b_collection): + if fname not in a_collection: + self.collection.append((None, fname)) + elif fname not in b_collection: + self.collection.append((fname, None)) + else: + self.collection.append((fname, fname)) + else: + if self.cls._diff_cmp_fields: + if a and b and a._diff_cmp_id != b._diff_cmp_id: + raise ValueError('objects\' _diff_cmp_id differ') + self.diff_vals_a = a._diff_vals if a else None + self.diff_vals_b = b._diff_vals if b else None + self.diff_cmp_id = a._diff_cmp_id if a else b._diff_cmp_id + if hasattr(self.cls, '_collection_attribute'): + self.collection = [] + a_collection = { c._diff_cmp_id: c for c in a.collection } if a else {} + b_collection = { c._diff_cmp_id: c for c in b.collection } if b else {} + all_cmp_ids = set(a_collection).union(b_collection) + for cmp_id in all_cmp_ids: + ac = a_collection[cmp_id] if cmp_id in a_collection else None + bc = b_collection[cmp_id] if cmp_id in b_collection else None + self.collection.append(SpecDiff(ac, bc)) + if hasattr(self, 'collection'): + self.a_only = [] + self.b_only = [] + self.a_and_b = [] + for pkg_diff in self.collection: + if isinstance(pkg_diff, tuple): + (a, b) = pkg_diff + else: + a = pkg_diff.a + b = pkg_diff.b + if not a: + self.b_only.append(pkg_diff) + elif not b: + self.a_only.append(pkg_diff) + else: + self.a_and_b.append(pkg_diff) + return diff --git a/reproman/distributions/conda.py b/reproman/distributions/conda.py index ef13eb093..8a881ae75 100644 --- a/reproman/distributions/conda.py +++ b/reproman/distributions/conda.py @@ -136,6 +136,7 @@ class CondaDistribution(Distribution): environments = TypedList(CondaEnvironment) _cmp_field = ('path',) + _collection_attribute = 'packages' def initiate(self, environment): """ diff --git a/reproman/distributions/debian.py b/reproman/distributions/debian.py index 6c9ad9b35..3d33d4a5b 100644 --- a/reproman/distributions/debian.py +++ b/reproman/distributions/debian.py @@ -89,7 +89,6 @@ class DEBPackage(Package): _diff_cmp_fields = ('name', 'architecture') _diff_fields = ('version',) - _comparison_fields = ('name', 'architecture', 'version') _register_with_representer(DEBPackage) diff --git a/reproman/distributions/redhat.py b/reproman/distributions/redhat.py index 8f7baf0c4..87a48fefe 100644 --- a/reproman/distributions/redhat.py +++ b/reproman/distributions/redhat.py @@ -65,7 +65,8 @@ class RPMPackage(Package): vendor = attrib() url = attrib() files = attrib(default=attr.Factory(list), hash=False) - _comparison_fields = ('name', 'version', 'architecture') + _diff_cmp_fields = ('name', 'architecture') + _diff_fields = ('version',) _register_with_representer(RPMPackage) diff --git a/reproman/distributions/vcs.py b/reproman/distributions/vcs.py index b118fd48c..200f8845e 100644 --- a/reproman/distributions/vcs.py +++ b/reproman/distributions/vcs.py @@ -125,6 +125,8 @@ class GitDistribution(VCSDistribution): _cmd = "git" packages = TypedList(GitRepo) + _collection_attribute = 'packages' + def initiate(self, session=None): pass @@ -296,6 +298,8 @@ class SVNDistribution(VCSDistribution): _cmd = "svn" packages = TypedList(SVNRepo) + _collection_attribute = 'packages' + def install_packages(self, session, use_version=True): raise NotImplementedError SVNRepo._distribution = SVNDistribution diff --git a/reproman/distributions/venv.py b/reproman/distributions/venv.py index f9059ce51..e017bdfcd 100644 --- a/reproman/distributions/venv.py +++ b/reproman/distributions/venv.py @@ -38,6 +38,8 @@ class VenvPackage(Package): location = attrib() editable = attrib(default=False) files = attrib(default=attr.Factory(list)) + _diff_cmp_fields = ('name', ) + _diff_fields = ('version', ) @attr.s @@ -45,6 +47,8 @@ class VenvEnvironment(SpecObject): path = attrib() python_version = attrib() packages = TypedList(VenvPackage) + _diff_cmp_fields = ('path', 'python_version') + _collection_attribute = 'packages' @attr.s @@ -54,6 +58,7 @@ class VenvDistribution(Distribution): path = attrib() venv_version = attrib() environments = TypedList(VenvEnvironment) + _collection_attribute = 'environments' def initiate(self, _): return diff --git a/reproman/interface/diff.py b/reproman/interface/diff.py index 5ab4e2704..57a493cdd 100644 --- a/reproman/interface/diff.py +++ b/reproman/interface/diff.py @@ -20,6 +20,8 @@ from ..distributions.debian import DebianDistribution from ..distributions.conda import CondaDistribution from ..distributions.vcs import GitDistribution, SVNDistribution +from ..distributions.venv import VenvDistribution, VenvEnvironment +from ..distributions.base import SpecDiff __docformat__ = 'restructuredtext' @@ -81,55 +83,40 @@ def __call__(prov1, prov2, satisfies): @staticmethod def diff(env_1, env_2): - result = {'method': 'diff', 'distributions': []} - # distribution type -> package type string supported_distributions = { DebianDistribution: 'Debian package', CondaDistribution: 'Conda package', GitDistribution: 'Git repository', - SVNDistribution: 'SVN repository' + SVNDistribution: 'SVN repository', + VenvDistribution: 'Venv environment', } env_1_dist_types = { d.__class__ for d in env_1.distributions } env_2_dist_types = { d.__class__ for d in env_2.distributions } all_dist_types = env_1_dist_types.union(env_2_dist_types) + diffs = [] + for dist_type in all_dist_types: if dist_type not in supported_distributions: msg = 'diff doesn\'t know how to handle %s' % str(dist_type) raise ValueError(msg) - dist_res = {'pkg_type': supported_distributions[dist_type], - 'pkg_diffs': []} dist_1 = env_1.get_distribution(dist_type) - if dist_1: - pkgs_1 = {p._diff_cmp_id: p for p in dist_1.packages} - else: - pkgs_1 = {} dist_2 = env_2.get_distribution(dist_type) - if dist_2: - pkgs_2 = {p._diff_cmp_id: p for p in dist_2.packages} - else: - pkgs_2 = {} - dist_res['pkgs_1'] = pkgs_1 - dist_res['pkgs_2'] = pkgs_2 - pkgs_1_s = set(pkgs_1) - pkgs_2_s = set(pkgs_2) - dist_res['pkgs_only_1'] = pkgs_1_s - pkgs_2_s - dist_res['pkgs_only_2'] = pkgs_2_s - pkgs_1_s - for cmp_key in pkgs_1_s.intersection(pkgs_2_s): - package_1 = pkgs_1[cmp_key] - package_2 = pkgs_2[cmp_key] - if package_1._diff_vals != package_2._diff_vals: - dist_res['pkg_diffs'].append((package_1, package_2)) - result['distributions'].append(dist_res) + diffs.append({'diff': SpecDiff(dist_1, dist_2), + 'pkg_type': supported_distributions[dist_type]}) + + diffs.append({'diff': SpecDiff(env_1.files, env_2.files), + 'pkg_type': 'files'}) files1 = set(env_1.files) files2 = set(env_2.files) - result['files_1_only'] = files1 - files2 - result['files_2_only'] = files2 - files1 - return result + return {'method': 'diff', + 'diffs': diffs, + 'files_1_only': files1 - files2, + 'files_2_only': files2 - files1} @staticmethod def satisfies(env_1, env_2): @@ -180,39 +167,60 @@ def render_cmdline_diff(result): status = 0 - for dist_res in result['distributions']: + for diff_d in result['diffs']: - if dist_res['pkgs_only_1'] or dist_res['pkgs_only_2']: - print(_make_plural(dist_res['pkg_type']) + ':') + diff = diff_d['diff'] + pkg_type = diff_d['pkg_type'] - if dist_res['pkgs_only_1']: - for cmp_key in sorted(dist_res['pkgs_only_1']): - package = dist_res['pkgs_1'][cmp_key] - print('< %s' % package.diff_identity_string) - status = 3 - if dist_res['pkgs_only_1'] and dist_res['pkgs_only_2']: - print('---') - if dist_res['pkgs_only_2']: - for cmp_key in sorted(dist_res['pkgs_only_2']): - package = dist_res['pkgs_2'][cmp_key] - print('> %s' % package.diff_identity_string) - status = 3 + if pkg_type == 'files': + files_diff = diff_d['diff'] + continue - for (package_1, package_2) in dist_res['pkg_diffs']: - print('%s %s:' % (dist_res['pkg_type'], - " ".join(package_1._diff_cmp_id))) - print('< %s' % package_1.diff_subidentity_string) - print('---') - print('> %s' % package_2.diff_subidentity_string) + if diff.a_only or diff.b_only: + print(_make_plural(pkg_type) + ':') status = 3 - - if result['files_1_only'] or result['files_2_only']: + for pkg_diff in diff.a_only: + print('< %s' % pkg_diff.a.diff_identity_string) + if diff.a_only and diff.b_only: + print('---') + for pkg_diff in diff.b_only: + print('> %s' % pkg_diff.b.diff_identity_string) + + for pkg_diff in diff.a_and_b: + if not hasattr(pkg_diff, 'collection'): + if pkg_diff.diff_vals_a != pkg_diff.diff_vals_b: + print('%s %s:' % (pkg_type, ' '.join(pkg_diff.diff_cmp_id))) + print('< %s' % pkg_diff.a.diff_subidentity_string) + print('---') + print('> %s' % pkg_diff.b.diff_subidentity_string) + status = 3 + else: + a_only = [ pd.a for pd in pkg_diff.a_only ] + b_only = [ pd.b for pd in pkg_diff.b_only ] + ab = [ pd for pd in pkg_diff.a_and_b + if pd.diff_vals_a != pd.diff_vals_b ] + if a_only or b_only or ab: + print('%s %s:' % (pkg_type, ' '.join(pkg_diff.diff_cmp_id))) + for pkg in a_only: + print('< %s' % pkg.diff_identity_string) + for pd in ab: + print('< %s %s' % (pd.a.diff_identity_string, pd.a.diff_subidentity_string)) + if (a_only and b_only) or ab: + print('---') + for pkg in b_only: + print('> %s' % pkg.diff_identity_string) + for pd in ab: + print('> %s %s' % (pd.b.diff_identity_string, pd.b.diff_subidentity_string)) + + files_1_only = [ t[0] for t in files_diff.collection if t[1] is None ] + files_2_only = [ t[1] for t in files_diff.collection if t[0] is None ] + if files_1_only or files_2_only: print('Files:') - for fname in result['files_1_only']: + for fname in files_1_only: print('< %s' % fname) - if result['files_1_only'] and result['files_2_only']: + if files_1_only and files_2_only: print('---') - for fname in result['files_2_only']: + for fname in files_2_only: print('> %s' % fname) status = 3 diff --git a/reproman/interface/tests/files/diff_1.yaml b/reproman/interface/tests/files/diff_1.yaml index 08a098312..40c498f02 100644 --- a/reproman/interface/tests/files/diff_1.yaml +++ b/reproman/interface/tests/files/diff_1.yaml @@ -58,4 +58,53 @@ distributions: - path: /path/1/to/different/svn/commit revision: 12 uuid: 95e4b738-84c7-154c-f082-34d40e21fdd4 +- name: venv + path: /usr/bin/virtualenv + venv_version: 15.1.0 + environments: + - path: /home/user/venvs/test/both + python_version: 2.7.15+ + packages: + - name: six + version: 1.12.0 + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/six-1.12.0.dist-info/METADATA + - lib/python2.7/site-packages/six.pyc + - lib/python2.7/site-packages/six.py + - path: /home/user/venvs/test/bth_2 + python_version: 2.7.15+ + packages: + - name: one_only + version: 1.12.0 + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/six-1.12.0.dist-info/METADATA + - lib/python2.7/site-packages/six.pyc + - lib/python2.7/site-packages/six.py + - name: version_mismatch + version: 1.1.0 + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/six-1.12.0.dist-info/METADATA + - lib/python2.7/site-packages/six.pyc + - lib/python2.7/site-packages/six.py + - path: /home/user/venvs/test/1_only + python_version: 2.7.15+ + packages: + - name: funcsigs + version: 1.0.2 + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/funcsigs-1.0.2.dist-info/METADATA + - name: rpaths + version: '0.13' + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/rpaths-0.13.dist-info/METADATA files: [/etc/a, /etc/b] diff --git a/reproman/interface/tests/files/diff_2.yaml b/reproman/interface/tests/files/diff_2.yaml index bed05c111..ff9939fc3 100644 --- a/reproman/interface/tests/files/diff_2.yaml +++ b/reproman/interface/tests/files/diff_2.yaml @@ -58,4 +58,53 @@ distributions: - path: /path/2/to/different/svn/commit revision: 14 uuid: 95e4b738-84c7-154c-f082-34d40e21fdd4 +- name: venv + path: /usr/bin/virtualenv + venv_version: 15.1.0 + environments: + - path: /home/user/venvs/test/both + python_version: 2.7.15+ + packages: + - name: six + version: 1.12.0 + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/six-1.12.0.dist-info/METADATA + - lib/python2.7/site-packages/six.pyc + - lib/python2.7/site-packages/six.py + - path: /home/user/venvs/test/bth_2 + python_version: 2.7.15+ + packages: + - name: two_only + version: 1.12.0 + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/six-1.12.0.dist-info/METADATA + - lib/python2.7/site-packages/six.pyc + - lib/python2.7/site-packages/six.py + - name: version_mismatch + version: 1.2.0 + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/six-1.12.0.dist-info/METADATA + - lib/python2.7/site-packages/six.pyc + - lib/python2.7/site-packages/six.py + - path: /home/user/venvs/test/2_only + python_version: 2.7.15+ + packages: + - name: funcsigs + version: 1.0.2 + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/funcsigs-1.0.2.dist-info/METADATA + - name: rpaths + version: '0.13' + local: true + location: /home/user/venvs/test/lib/python2.7/site-packages + files: + - lib/python2.7/site-packages/rpaths-0.13.dist-info/METADATA files: [/etc/c, /etc/b] diff --git a/reproman/interface/tests/test_diff.py b/reproman/interface/tests/test_diff.py index 6248031aa..1668f6d05 100644 --- a/reproman/interface/tests/test_diff.py +++ b/reproman/interface/tests/test_diff.py @@ -152,6 +152,18 @@ def test_diff_svn(): assert_not_in('(/path/2/to/common/svn/repo)', outputs.out) +def test_diff_venv(): + with swallow_outputs() as outputs: + args = ['diff', diff_1_yaml, diff_2_yaml] + rv = main(args) + assert_equal(rv, 3) + assert_equal(outputs.err, '') + assert_in('Venv environments:', outputs.out) + assert_in('< /home/user/venvs/test/1_only 2.7.15+', outputs.out) + assert_in('> /home/user/venvs/test/2_only 2.7.15+', outputs.out) + assert_not_in('/home/user/venvs/test/both', outputs.out) + + def test_diff_satisfies_unsupported_distribution(): # using subprocess.call() here because we're looking for a condition # that raises an exception in main(), so it doesn't return and we