From 2201745270fc6e4cd7c47c27855447bb7a3e72b4 Mon Sep 17 00:00:00 2001 From: Matthew Wardrop Date: Thu, 4 Oct 2018 09:56:42 -0700 Subject: [PATCH] Improve tooling and backend for creating and working with knowledge posts (#308) --- knowledge_repo/config.py | 3 +- knowledge_repo/converters/pkp.py | 3 +- knowledge_repo/repositories/folder.py | 226 +++++++++++++++++++ knowledge_repo/repositories/gitrepository.py | 2 +- knowledge_repo/repository.py | 6 +- run_tests.bat | 8 +- run_tests.sh | 16 +- scripts/kp | 141 ++++++++++++ setup.py | 2 +- 9 files changed, 390 insertions(+), 17 deletions(-) create mode 100644 knowledge_repo/repositories/folder.py create mode 100755 scripts/kp diff --git a/knowledge_repo/config.py b/knowledge_repo/config.py index d2946d458..0b4896b61 100644 --- a/knowledge_repo/config.py +++ b/knowledge_repo/config.py @@ -4,6 +4,7 @@ import os import time import types +import yaml logger = logging.getLogger(__name__) @@ -89,7 +90,7 @@ def __set_from_file(self, d, filename, force=False): self.__set_from_module(d, config, force) elif filename.endswith('.yml'): with open(filename) as f: - config = yaml.load(f) + config = yaml.safe_load(f) self.update(config) def __set_from_module(self, d, module, force=False): diff --git a/knowledge_repo/converters/pkp.py b/knowledge_repo/converters/pkp.py index 84b37e85b..41f614872 100644 --- a/knowledge_repo/converters/pkp.py +++ b/knowledge_repo/converters/pkp.py @@ -1,10 +1,11 @@ import zipfile import io +import os from ..converter import KnowledgePostConverter -class IpynbFormat(KnowledgePostConverter): +class PkpConverter(KnowledgePostConverter): _registry_keys = ['kp', 'zip'] def to_file(self, filename): diff --git a/knowledge_repo/repositories/folder.py b/knowledge_repo/repositories/folder.py new file mode 100644 index 000000000..7dfdf6e9b --- /dev/null +++ b/knowledge_repo/repositories/folder.py @@ -0,0 +1,226 @@ +from __future__ import print_function +from builtins import input + +import os +import shutil +import logging +import re +import git +import socket +import time +from io import open + +from knowledge_repo._version import __git_uri__ +from ..post import KnowledgePost +from ..repository import KnowledgeRepository +from ..utils.exec_code import get_module_for_source +from ..utils.types import str_types +from ..utils.encoding import encode + +logger = logging.getLogger(__name__) + + +class FolderKnowledgeRepository(KnowledgeRepository): + _registry_keys = ['', 'file'] + + TEMPLATES = { + 'README.md': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_readme.md')), + '.knowledge_repo_config.yml': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_config.yml')) + } + + @classmethod + def create(cls, uri, embed_tooling=False): + if uri.startswith('file://'): + uri = uri[len('file://'):] + path = os.path.abspath(uri) + if not os.path.exists(path): + os.makedirs(path) + + # Add README and configuration templates + for filename, template in cls.TEMPLATES.items(): + target = os.path.join(path, filename) + if not os.path.exists(target): + shutil.copy(template, target) + else: + logger.warning("Not overriding existing file '{}'.".format(filename)) + return FolderKnowledgeRepository(path) + + @classmethod + def from_uri(cls, uri, *args, **kwargs): + """ + If this folder is actually a git repository, a `GitKnowledgeRepository` + is returned instead, unless the folder knowledge repository is explicitly + requested via the 'file://' protocol. + """ + check_for_git = True + if uri.startswith('file://'): + check_for_git = False + uri = uri[len('file://'):] + if check_for_git and os.path.exists(os.path.join(uri, '.git')): + from .gitrepository import GitKnowledgeRepository + return GitKnowledgeRepository(uri, *args, **kwargs) + return cls(uri, *args, **kwargs) + + def init(self, config='.knowledge_repo_config.yml', auto_create=False): + self.auto_create = auto_create + self.path = self.uri + self.config.update(os.path.join(self.path, config)) + + @property + def path(self): + return self._path + + @path.setter + def path(self, path): + assert isinstance(path, str), "The path specified must be a string." + path = os.path.abspath(os.path.expanduser(path)) + if not os.path.exists(path): + path = os.path.abspath(path) + if self.auto_create: + self.create(path) + else: + raise ValueError("Provided path '{}' does not exist.".format(path)) + self._path = path + + # ----------- Repository actions / state ------------------------------------ + @property + def revision(self): + return time.time() + + @property + def status(self): + return 'OK' + + @property + def status_message(self): + return 'OK' + + # ---------------- Post retrieval methods -------------------------------- + + def _dir(self, prefix, statuses): + posts = set() + + if self.PostStatus.PUBLISHED in statuses: + + for path, folders, files in os.walk(os.path.join(self.path, prefix or '')): + + # Do not visit hidden folders + for folder in folders: + if folder.startswith('.'): + folders.remove(folder) + + posts.update( + os.path.join(os.path.relpath(path, start=self.path), folder) + for folder in folders if folder.endswith('.kp') + ) + posts.update( + os.path.join(os.path.relpath(path, start=self.path), file) + for file in files if file.endswith('.kp') + ) + + for post in sorted([post[2:] if post.startswith('./') else post for post in posts]): + yield post + + # ------------- Post submission / addition user flow ---------------------- + def _add_prepare(self, kp, path, update=False, **kwargs): + pass + + def _add_cleanup(self, kp, path, update=False, **kwargs): + pass + + def _submit(self, path=None, branch=None, force=False): + pass # Added posts are already submitted + + def _publish(self, path): # Publish a post for general perusal + pass # Added posts are already published + + def _unpublish(self, path): # unpublish a post for general perusal + raise NotImplementedError + + def _accept(self, path): # Approve to publish a post for general perusal + pass + + def _remove(self, path, all=False): + shutil.rmtree(os.path.join(self.path, path)) + + # ------------ Knowledge Post Data Retrieval Methods ------------------------- + + def _kp_uuid(self, path): + try: + return self._kp_read_ref(path, 'UUID') + except: + return None + + def _kp_path(self, path, rel=None): + return KnowledgeRepository._kp_path(self, os.path.expanduser(path), rel=rel or self.path) + + def _kp_exists(self, path, revision=None): + return os.path.exists(os.path.join(self.path, path)) + + def _kp_status(self, path, revision=None, detailed=False, branch=None): + return self.PostStatus.PUBLISHED + + def _kp_get_revision(self, path): + # We use a 'REVISION' file in the knowledge post folder rather than using git + # revisions because using git rev-parse is slow. + try: + return int(self._kp_read_ref(path, 'REVISION')) + except: + return 0 + + def _kp_get_revisions(self, path): + raise NotImplementedError + + def _kp_write_ref(self, path, reference, data, uuid=None, revision=None): + path = os.path.join(self.path, path) + if os.path.isfile(path): + kp = KnowledgePost.from_file(path, format='kp') + kp._write_ref(reference, data) + kp.to_file(path, format='kp') + else: + ref_path = os.path.join(path, reference) + ref_dir = os.path.dirname(ref_path) + if not os.path.exists(ref_dir): + os.makedirs(ref_dir) + with open(ref_path, 'wb') as f: + return f.write(data) + + def _kp_dir(self, path, parent=None, revision=None): # TODO: Account for revision + path = os.path.join(self.path, path) + if os.path.isdir(path): + if parent: + path = os.path.join(path, parent) + for dirpath, dirnames, filenames in os.walk(os.path.join(self.path, path)): + for filename in filenames: + if dirpath == "" and filename == "REVISION": + continue + yield os.path.relpath(os.path.join(dirpath, filename), os.path.join(self.path, path)) + else: + kp = KnowledgePost.from_file(path, format='kp') + for reference in kp._dir(parent=parent): + yield reference + + def _kp_has_ref(self, path, reference, revision=None): # TODO: Account for revision + path = os.path.join(self.path, path) + if os.path.isdir(path): + return os.path.isfile(os.path.join(path, reference)) + else: + kp = KnowledgePost.from_file(path, format='kp') + return kp._has_ref(reference) + + def _kp_diff(self, path, head, base): + raise NotImplementedError + + def _kp_new_revision(self, path, uuid=None): + self._kp_write_ref(path, "REVISION", encode(self._kp_get_revision(path) + 1)) + if uuid: + self._kp_write_ref(path, "UUID", encode(uuid)) + + def _kp_read_ref(self, path, reference, revision=None): + path = os.path.join(self.path, path) + if os.path.isdir(path): + with open(os.path.join(self.path, path, reference), 'rb') as f: + return f.read() + else: + kp = KnowledgePost.from_file(path, format='kp') + return kp._read_ref(reference) diff --git a/knowledge_repo/repositories/gitrepository.py b/knowledge_repo/repositories/gitrepository.py index ec9acf09d..ea6e61516 100644 --- a/knowledge_repo/repositories/gitrepository.py +++ b/knowledge_repo/repositories/gitrepository.py @@ -21,7 +21,7 @@ class GitKnowledgeRepository(KnowledgeRepository): - _registry_keys = ['', 'git'] + _registry_keys = ['git'] TEMPLATES = { 'README.md': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_readme.md')), diff --git a/knowledge_repo/repository.py b/knowledge_repo/repository.py index c8c30ddd2..ece4ddc13 100644 --- a/knowledge_repo/repository.py +++ b/knowledge_repo/repository.py @@ -43,7 +43,7 @@ def for_uri(cls, uri, *args, **kwargs): if isinstance(uri, dict): return cls.for_uris(uri) scheme = urlparse(uri).scheme - return cls._get_subclass_for(scheme)(uri, *args, **kwargs) + return cls._get_subclass_for(scheme).from_uri(uri, *args, **kwargs) @classmethod def for_uris(cls, uri): @@ -57,6 +57,10 @@ def for_uris(cls, uri): krs = {name: cls.for_uri(uri) for name, uri in list(uris.items())} return MetaKnowledgeRepository(krs) + @classmethod + def from_uri(cls, url, *args, **kwargs): + return cls(url, *args, **kwargs) + @classmethod def create_for_uri(cls, uri, **kwargs): if isinstance(uri, dict): diff --git a/run_tests.bat b/run_tests.bat index 455a4a17b..a8a0caf27 100644 --- a/run_tests.bat +++ b/run_tests.bat @@ -24,7 +24,7 @@ IF EXIST "%test_repo_path%" ( RMDIR /Q /S %test_repo_path% ) -%PYTHON%\\python.exe scripts/knowledge_repo --repo="${test_repo_path}" init +%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://${test_repo_path}" init COPY tests\config_repo.yml %test_repo_path%\.knowledge_repo_config.yml PUSHD %test_repo_path% @@ -35,9 +35,9 @@ PUSHD %test_repo_path% POPD # Add some knowledge_posts -%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master -%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master -%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master +%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master +%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master +%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master REM "Running regression test suite" %PYTHON%\\python.exe -m nose --with-coverage --cover-package=knowledge_repo --verbosity=1 diff --git a/run_tests.sh b/run_tests.sh index 785fc76fe..4c01b0872 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -24,7 +24,7 @@ echo "Creating a test repository in ${test_repo_path}..." # Remove the repository if it exists rm -rf ${test_repo_path} &> /dev/null -`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" init +`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" init mkdir -p ${test_repo_path} &> /dev/null cp `dirname $0`/tests/config_repo.yml ${test_repo_path}/.knowledge_repo_config.yml &> /dev/null @@ -36,19 +36,19 @@ git commit -m "Update repository config." &> /dev/null popd &> /dev/null # Add some knowledge_posts -`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master -`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master -`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master +`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master +`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master +`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master for post in $(ls `dirname $0`/tests/test_posts); do if [[ "${post}" == *.ipynb ]]; then - `dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master; + `dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master; fi; if [[ "${post}" == *.Rmd ]]; then - `dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master; + `dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master; fi; if [[ "${post}" == *.md ]]; then - `dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master; + `dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master; fi; done @@ -56,7 +56,7 @@ echo echo "Synchronising database index" echo "-----------------------------" echo -`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" reindex --config `dirname $0`/tests/config_server.py +`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" reindex --config `dirname $0`/tests/config_server.py echo echo "Running regression test suite" diff --git a/scripts/kp b/scripts/kp new file mode 100755 index 000000000..3f718deb5 --- /dev/null +++ b/scripts/kp @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import os +import socket +import sys +import threading +import webbrowser + +# If this script is being run out of a checked out `knowledge-repo` repository, +# we need to make sure the appropriate knowledge_repo package is being used. To +# do this, we add the parent directory of the folder containing this script if +# it contains a python package named "knowledge_repo". +script_dir = os.path.dirname(__file__) +if os.path.exists(os.path.join(os.path.dirname(script_dir), 'knowledge_repo', '__init__.py')): + sys.path.insert(0, os.path.join(script_dir, '..')) + +import knowledge_repo # nopep8 +from knowledge_repo import KnowledgePost # nopep8 + +# Build argparser + +parser = argparse.ArgumentParser(add_help=False, description='Tooling to aid with the authoring and submission of knowledge posts.') +parser.add_argument('--version', dest='version', action='store_true', help='Show version and exit.') +parser.add_argument('--non-interactive', dest='interactive', action='store_false', help='Run scripts in non-interactive mode.') +parser.add_argument('-h', '--help', action='store_true', help='Show help and exit.') + +args, remaining_args = parser.parse_known_args() + +if args.version: + print('{}'.format(knowledge_repo.__version__)) + sys.exit(0) + +# ------------------------------------------------------------------------------ +# Everything below this line pertains to actions to be performed on a specific +# knowledge post. + +parser.add_argument('post_path', help='The path of an existing knowledge post, or the target for a new knowledge post.') + +# Add the action parsers +subparsers = parser.add_subparsers(help='actions') + +from_ = subparsers.add_parser('from', help='Create a knowledge post from an existing document.') +from_.set_defaults(action='from') +from_.add_argument('source', help='The path or url of the source file.') +from_.add_argument('--format', help='The format to assume for the source file (overriding default detection algorithms).') +from_.add_argument('--src', nargs='+', help='Specify additional files to be added as source files.') + +to = subparsers.add_parser('to', help='Export a knowledge post as another format.') +to.set_defaults(action='to') +to.add_argument('target', help='The path or url of the target location.') +to.add_argument('--format', help='The target format (overrides default detection algorithms).') + +preview = subparsers.add_parser('preview', help='Preview a knowledge post in a local web server.') +preview.set_defaults(action='preview') + +submit = subparsers.add_parser('submit', help='Submit a knowledge post to a nominated repository.') +submit.set_defaults(action='submit') +submit.add_argument( + 'repo', nargs='?', default=os.environ.get('KNOWLEDGE_REPO'), + help=( + "The repository into which the post should be submitted. (Defaults to " + "$KNOWLEDGE_REPO, which is currently {})".format( + os.environ['KNOWLEDGE_REPO'].__repr__() if 'KNOWLEDGE_REPO' in os.environ else 'unset' + ) + ) +) +submit.add_argument( + 'path', help="The path of the post within the repository." +) +submit.add_argument('--update', action='store_true', help='Whether this post should replace existing posts at the same path.') +submit.add_argument('--message', help="A commit message describing this post and/or its changes.") + +args = parser.parse_args() + +if args.help: + parser.print_help() + sys.exit(0) + +if not args.post_path.endswith('.kp'): + args.post_path += '.kp' + +if args.action == 'from': + kp = KnowledgePost.from_file(args.source, format=args.format, src_paths=args.src, interactive=args.interactive) + kp.to_file(args.post_path, format='kp') + sys.exit(0) + +# ------------------------------------------------------------------------------ +# Everything below this line requires the knowledge post to already exist to +# actions to be performed on a specific knowledge post. +if not os.path.exists(args.post_path): + raise IOError("Knowledge post does not exist at '{}'.".format(os.path.abspath(args.post_path))) + +kp = KnowledgePost.from_file(args.post_path, format='kp', interactive=args.interactive) + +if args.action == 'to': + kp.to_file(args.target, format=args.format) + sys.exit(0) + +if args.action == 'preview': + from knowledge_repo.app.deploy import KnowledgeDeployer, get_app_builder + + def get_available_port(): + s = socket.socket() + s.bind(("", 0)) + free_port = s.getsockname()[1] + s.close() + return free_port + port = get_available_port() + + post_path = os.path.abspath(args.post_path) + repo_dir = os.path.dirname(post_path) + post_path = os.path.basename(post_path) + + app_builder = get_app_builder('file://' + repo_dir, + debug=False, + db_uri='sqlite:///:memory:', + config=None, + REPOSITORY_INDEXING_ENABLED=False) + + url = 'http://127.0.0.1:{}/post/{}'.format(port, post_path) + threading.Timer(1.25, lambda: webbrowser.open(url)).start() + + KnowledgeDeployer.using('flask')( + app_builder, + host='0.0.0.0', + port=port + ).run() + sys.exit(0) + +if args.action == 'submit': + if not args.repo: + raise RuntimeError("Repository not specified.") + repo = knowledge_repo.KnowledgeRepository.for_uri(args.repo) + + repo.add(kp, path=args.path, update=args.update, message=args.message) + repo.submit(args.path) + sys.exit(0) diff --git a/setup.py b/setup.py index 4c227a524..90486cc0a 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ def run(self): packages=find_packages(), zip_safe=False, include_package_data=True, # See included paths in MANIFEST.in - scripts=['scripts/knowledge_repo'], + scripts=['scripts/knowledge_repo', 'scripts/kp'], install_requires=version_info['__dependencies__'], extras_require=version_info['__optional_dependencies__'], classifiers=[