Skip to content

Commit

Permalink
Improve tooling and backend for creating and working with knowledge p…
Browse files Browse the repository at this point in the history
…osts (#308)
  • Loading branch information
matthewwardrop authored Oct 4, 2018
1 parent 72e91ec commit 2201745
Show file tree
Hide file tree
Showing 9 changed files with 390 additions and 17 deletions.
3 changes: 2 additions & 1 deletion knowledge_repo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import time
import types
import yaml

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -89,7 +90,7 @@ def __set_from_file(self, d, filename, force=False):
self.__set_from_module(d, config, force)
elif filename.endswith('.yml'):
with open(filename) as f:
config = yaml.load(f)
config = yaml.safe_load(f)
self.update(config)

def __set_from_module(self, d, module, force=False):
Expand Down
3 changes: 2 additions & 1 deletion knowledge_repo/converters/pkp.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import zipfile
import io
import os

from ..converter import KnowledgePostConverter


class IpynbFormat(KnowledgePostConverter):
class PkpConverter(KnowledgePostConverter):
_registry_keys = ['kp', 'zip']

def to_file(self, filename):
Expand Down
226 changes: 226 additions & 0 deletions knowledge_repo/repositories/folder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
from __future__ import print_function
from builtins import input

import os
import shutil
import logging
import re
import git
import socket
import time
from io import open

from knowledge_repo._version import __git_uri__
from ..post import KnowledgePost
from ..repository import KnowledgeRepository
from ..utils.exec_code import get_module_for_source
from ..utils.types import str_types
from ..utils.encoding import encode

logger = logging.getLogger(__name__)


class FolderKnowledgeRepository(KnowledgeRepository):
_registry_keys = ['', 'file']

TEMPLATES = {
'README.md': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_readme.md')),
'.knowledge_repo_config.yml': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_config.yml'))
}

@classmethod
def create(cls, uri, embed_tooling=False):
if uri.startswith('file://'):
uri = uri[len('file://'):]
path = os.path.abspath(uri)
if not os.path.exists(path):
os.makedirs(path)

# Add README and configuration templates
for filename, template in cls.TEMPLATES.items():
target = os.path.join(path, filename)
if not os.path.exists(target):
shutil.copy(template, target)
else:
logger.warning("Not overriding existing file '{}'.".format(filename))
return FolderKnowledgeRepository(path)

@classmethod
def from_uri(cls, uri, *args, **kwargs):
"""
If this folder is actually a git repository, a `GitKnowledgeRepository`
is returned instead, unless the folder knowledge repository is explicitly
requested via the 'file://' protocol.
"""
check_for_git = True
if uri.startswith('file://'):
check_for_git = False
uri = uri[len('file://'):]
if check_for_git and os.path.exists(os.path.join(uri, '.git')):
from .gitrepository import GitKnowledgeRepository
return GitKnowledgeRepository(uri, *args, **kwargs)
return cls(uri, *args, **kwargs)

def init(self, config='.knowledge_repo_config.yml', auto_create=False):
self.auto_create = auto_create
self.path = self.uri
self.config.update(os.path.join(self.path, config))

@property
def path(self):
return self._path

@path.setter
def path(self, path):
assert isinstance(path, str), "The path specified must be a string."
path = os.path.abspath(os.path.expanduser(path))
if not os.path.exists(path):
path = os.path.abspath(path)
if self.auto_create:
self.create(path)
else:
raise ValueError("Provided path '{}' does not exist.".format(path))
self._path = path

# ----------- Repository actions / state ------------------------------------
@property
def revision(self):
return time.time()

@property
def status(self):
return 'OK'

@property
def status_message(self):
return 'OK'

# ---------------- Post retrieval methods --------------------------------

def _dir(self, prefix, statuses):
posts = set()

if self.PostStatus.PUBLISHED in statuses:

for path, folders, files in os.walk(os.path.join(self.path, prefix or '')):

# Do not visit hidden folders
for folder in folders:
if folder.startswith('.'):
folders.remove(folder)

posts.update(
os.path.join(os.path.relpath(path, start=self.path), folder)
for folder in folders if folder.endswith('.kp')
)
posts.update(
os.path.join(os.path.relpath(path, start=self.path), file)
for file in files if file.endswith('.kp')
)

for post in sorted([post[2:] if post.startswith('./') else post for post in posts]):
yield post

# ------------- Post submission / addition user flow ----------------------
def _add_prepare(self, kp, path, update=False, **kwargs):
pass

def _add_cleanup(self, kp, path, update=False, **kwargs):
pass

def _submit(self, path=None, branch=None, force=False):
pass # Added posts are already submitted

def _publish(self, path): # Publish a post for general perusal
pass # Added posts are already published

def _unpublish(self, path): # unpublish a post for general perusal
raise NotImplementedError

def _accept(self, path): # Approve to publish a post for general perusal
pass

def _remove(self, path, all=False):
shutil.rmtree(os.path.join(self.path, path))

# ------------ Knowledge Post Data Retrieval Methods -------------------------

def _kp_uuid(self, path):
try:
return self._kp_read_ref(path, 'UUID')
except:
return None

def _kp_path(self, path, rel=None):
return KnowledgeRepository._kp_path(self, os.path.expanduser(path), rel=rel or self.path)

def _kp_exists(self, path, revision=None):
return os.path.exists(os.path.join(self.path, path))

def _kp_status(self, path, revision=None, detailed=False, branch=None):
return self.PostStatus.PUBLISHED

def _kp_get_revision(self, path):
# We use a 'REVISION' file in the knowledge post folder rather than using git
# revisions because using git rev-parse is slow.
try:
return int(self._kp_read_ref(path, 'REVISION'))
except:
return 0

def _kp_get_revisions(self, path):
raise NotImplementedError

def _kp_write_ref(self, path, reference, data, uuid=None, revision=None):
path = os.path.join(self.path, path)
if os.path.isfile(path):
kp = KnowledgePost.from_file(path, format='kp')
kp._write_ref(reference, data)
kp.to_file(path, format='kp')
else:
ref_path = os.path.join(path, reference)
ref_dir = os.path.dirname(ref_path)
if not os.path.exists(ref_dir):
os.makedirs(ref_dir)
with open(ref_path, 'wb') as f:
return f.write(data)

def _kp_dir(self, path, parent=None, revision=None): # TODO: Account for revision
path = os.path.join(self.path, path)
if os.path.isdir(path):
if parent:
path = os.path.join(path, parent)
for dirpath, dirnames, filenames in os.walk(os.path.join(self.path, path)):
for filename in filenames:
if dirpath == "" and filename == "REVISION":
continue
yield os.path.relpath(os.path.join(dirpath, filename), os.path.join(self.path, path))
else:
kp = KnowledgePost.from_file(path, format='kp')
for reference in kp._dir(parent=parent):
yield reference

def _kp_has_ref(self, path, reference, revision=None): # TODO: Account for revision
path = os.path.join(self.path, path)
if os.path.isdir(path):
return os.path.isfile(os.path.join(path, reference))
else:
kp = KnowledgePost.from_file(path, format='kp')
return kp._has_ref(reference)

def _kp_diff(self, path, head, base):
raise NotImplementedError

def _kp_new_revision(self, path, uuid=None):
self._kp_write_ref(path, "REVISION", encode(self._kp_get_revision(path) + 1))
if uuid:
self._kp_write_ref(path, "UUID", encode(uuid))

def _kp_read_ref(self, path, reference, revision=None):
path = os.path.join(self.path, path)
if os.path.isdir(path):
with open(os.path.join(self.path, path, reference), 'rb') as f:
return f.read()
else:
kp = KnowledgePost.from_file(path, format='kp')
return kp._read_ref(reference)
2 changes: 1 addition & 1 deletion knowledge_repo/repositories/gitrepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


class GitKnowledgeRepository(KnowledgeRepository):
_registry_keys = ['', 'git']
_registry_keys = ['git']

TEMPLATES = {
'README.md': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_readme.md')),
Expand Down
6 changes: 5 additions & 1 deletion knowledge_repo/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def for_uri(cls, uri, *args, **kwargs):
if isinstance(uri, dict):
return cls.for_uris(uri)
scheme = urlparse(uri).scheme
return cls._get_subclass_for(scheme)(uri, *args, **kwargs)
return cls._get_subclass_for(scheme).from_uri(uri, *args, **kwargs)

@classmethod
def for_uris(cls, uri):
Expand All @@ -57,6 +57,10 @@ def for_uris(cls, uri):
krs = {name: cls.for_uri(uri) for name, uri in list(uris.items())}
return MetaKnowledgeRepository(krs)

@classmethod
def from_uri(cls, url, *args, **kwargs):
return cls(url, *args, **kwargs)

@classmethod
def create_for_uri(cls, uri, **kwargs):
if isinstance(uri, dict):
Expand Down
8 changes: 4 additions & 4 deletions run_tests.bat
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ IF EXIST "%test_repo_path%" (
RMDIR /Q /S %test_repo_path%
)

%PYTHON%\\python.exe scripts/knowledge_repo --repo="${test_repo_path}" init
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://${test_repo_path}" init
COPY tests\config_repo.yml %test_repo_path%\.knowledge_repo_config.yml

PUSHD %test_repo_path%
Expand All @@ -35,9 +35,9 @@ PUSHD %test_repo_path%
POPD

# Add some knowledge_posts
%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master

REM "Running regression test suite"
%PYTHON%\\python.exe -m nose --with-coverage --cover-package=knowledge_repo --verbosity=1
16 changes: 8 additions & 8 deletions run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ echo "Creating a test repository in ${test_repo_path}..."
# Remove the repository if it exists
rm -rf ${test_repo_path} &> /dev/null

`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" init
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" init
mkdir -p ${test_repo_path} &> /dev/null
cp `dirname $0`/tests/config_repo.yml ${test_repo_path}/.knowledge_repo_config.yml &> /dev/null

Expand All @@ -36,27 +36,27 @@ git commit -m "Update repository config." &> /dev/null
popd &> /dev/null

# Add some knowledge_posts
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master

for post in $(ls `dirname $0`/tests/test_posts); do
if [[ "${post}" == *.ipynb ]]; then
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
fi;
if [[ "${post}" == *.Rmd ]]; then
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
fi;
if [[ "${post}" == *.md ]]; then
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
fi;
done

echo
echo "Synchronising database index"
echo "-----------------------------"
echo
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" reindex --config `dirname $0`/tests/config_server.py
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" reindex --config `dirname $0`/tests/config_server.py

echo
echo "Running regression test suite"
Expand Down
Loading

0 comments on commit 2201745

Please sign in to comment.