From c613319324b34f0af3e2bcd0bcef3eb6c804f19e Mon Sep 17 00:00:00 2001 From: Hadrien Chauvin Date: Tue, 24 Sep 2019 23:06:02 +0200 Subject: [PATCH] Initial version --- .bazelrc | 1 + .circleci/config.yml | 186 ++++++++++++++++ .gitignore | 2 + BUILD | 12 + LICENSE | 21 ++ README.md | 116 ++++++++++ WORKSPACE | 51 +++++ cli.py | 74 +++++++ common/BUILD | 7 + common/pathutils.py | 25 +++ import_into/BUILD | 49 +++++ import_into/__init__.py | 8 + import_into/e2e_test.py | 130 +++++++++++ import_into/import_into.py | 258 ++++++++++++++++++++++ import_into/import_into_test.py | 378 ++++++++++++++++++++++++++++++++ import_into/individual_repos.py | 29 +++ import_into/single_commit.py | 126 +++++++++++ import_into/testutils.py | 112 ++++++++++ internal/BUILD | 5 + internal/format.bzl | 83 +++++++ lint.sh | 261 ++++++++++++++++++++++ renovate.json | 5 + requirements.txt | 4 + 23 files changed, 1943 insertions(+) create mode 100644 .bazelrc create mode 100644 .circleci/config.yml create mode 100644 BUILD create mode 100644 LICENSE create mode 100644 README.md create mode 100644 WORKSPACE create mode 100644 cli.py create mode 100644 common/BUILD create mode 100644 common/pathutils.py create mode 100644 import_into/BUILD create mode 100644 import_into/__init__.py create mode 100644 import_into/e2e_test.py create mode 100644 import_into/import_into.py create mode 100644 import_into/import_into_test.py create mode 100644 import_into/individual_repos.py create mode 100644 import_into/single_commit.py create mode 100644 import_into/testutils.py create mode 100644 internal/BUILD create mode 100644 internal/format.bzl create mode 100755 lint.sh create mode 100644 renovate.json create mode 100644 requirements.txt diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 0000000..0e96ea3 --- /dev/null +++ b/.bazelrc @@ -0,0 +1 @@ +build --build_python_zip diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..629478e --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,186 @@ +version: 2.1 + +executors: + vs2019: + description: > + An executor preloaded with visual studios 2019 plus a number of other + development tools. + parameters: + version: + type: string + description: The image version to use when executing. Defaults to "201908-06" + default: "201908-06" + shell: + type: string + description: > + The shell to use. + Defaults to `powershell.exe -ExecutionPolicy Bypass` + default: powershell.exe -ExecutionPolicy Bypass + machine: + image: "windows-server-2019-vs2019:<< parameters.version >>" + resource_class: windows.medium + shell: << parameters.shell >> + +jobs: + test-linux: + docker: + - image: hchauvin/dsk-build:latest + steps: + - checkout + - run: bazel build //... + - run: bazel test //... + - run: mv bazel-bin/monorepo_tools.zip monorepo_tools_linux_amd64.zip + - persist_to_workspace: + root: '.' + paths: + - monorepo_tools_linux_amd64.zip + test-linux-standalone: + docker: + - image: hchauvin/dsk-build:latest + steps: + - attach_workspace: + at: ./artifacts + - run: python ./artifacts/monorepo_tools_linux_amd64.zip --help + - run: python2 ./artifacts/monorepo_tools_linux_amd64.zip --help + test-windows-py3: + executor: + name: vs2019 + steps: + - checkout + - run: choco install --no-progress bazel + - run: choco install --no-progress python + - run: pip install wheel + - run: bazel build //... + - run: bazel test //... + - run: mv bazel-bin/monorepo_tools.zip monorepo_tools_py3_windows_amd64.zip + - persist_to_workspace: + root: '.' + paths: + - monorepo_tools_py3_windows_amd64.zip + test-windows-standalone-py3: + executor: + name: vs2019 + steps: + - attach_workspace: + at: ./artifacts + - run: choco install --no-progress python + - run: python ./artifacts/monorepo_tools_py3_windows_amd64.zip --help + test-windows-py2: + executor: + name: vs2019 + steps: + - checkout + - run: choco install --no-progress bazel + - run: pip install wheel + - run: bazel build //... + - run: bazel test //... + - run: mv bazel-bin/monorepo_tools.zip monorepo_tools_py2_windows_amd64.zip + - persist_to_workspace: + root: '.' + paths: + - monorepo_tools_py2_windows_amd64.zip + test-windows-standalone-py2: + executor: + name: vs2019 + steps: + - attach_workspace: + at: ./artifacts + - run: choco install --no-progress python2 + - run: python ./artifacts/monorepo_tools_py2_windows_amd64.zip --help + test-darwin: + macos: + xcode: 9.3.0 + steps: + - checkout + - run: brew install bazel + - run: bazel build //... + - run: bazel test //... + - run: mv bazel-bin/monorepo_tools.zip monorepo_tools_darwin_amd64.zip + - persist_to_workspace: + root: '.' + paths: + - monorepo_tools_darwin_amd64.zip + test-darwin-standalone: + macos: + xcode: 9.3.0 + steps: + - attach_workspace: + at: ./artifacts + - run: python ./artifacts/monorepo_tools_darwin_amd64.zip --help + - run: python2 ./artifacts/monorepo_tools_darwin_amd64.zip --help + publish-github-release: + docker: + - image: cibuilds/github:0.10 + environment: + # https://stackoverflow.com/questions/57828037/cant-attach-circleci-workspace-from-windows-to-linux-due-to-cannot-change-owne + TAR_OPTIONS: --no-same-owner + steps: + - attach_workspace: + at: ./artifacts + - run: + name: "Publish Release on GitHub" + command: | + VERSION=${CIRCLE_TAG:-${CIRCLE_SHA1}} + ( + cd ./artifacts && + mv monorepo_tools_linux_amd64.zip monorepo_tools_${VERSION}_linux_amd64.zip && + mv monorepo_tools_py2_windows_amd64.zip monorepo_tools_py2_${VERSION}_windows_amd64.zip && + mv monorepo_tools_py3_windows_amd64.zip monorepo_tools_py3_${VERSION}_windows_amd64.zip && + mv monorepo_tools_darwin_amd64.zip monorepo_tools_${VERSION}_darwin_amd64.zip + ) + ghr -draft -t ${GITHUB_TOKEN} -u ${CIRCLE_PROJECT_USERNAME} -r ${CIRCLE_PROJECT_REPONAME} -c ${CIRCLE_SHA1} -delete ${VERSION} ./artifacts/ + +workflows: + main: + jobs: + - test-linux: + filters: + tags: + only: /^v.*/ + - test-linux-standalone: + requires: + - test-linux + filters: + tags: + only: /^v.*/ + - test-windows-py3: + filters: + tags: + only: /^v.*/ + - test-windows-standalone-py3: + requires: + - test-windows-py3 + filters: + tags: + only: /^v.*/ + - test-windows-py2: + filters: + tags: + only: /^v.*/ + - test-windows-standalone-py2: + requires: + - test-windows-py2 + filters: + tags: + only: /^v.*/ + - test-darwin: + filters: + tags: + only: /^v.*/ + - test-darwin-standalone: + requires: + - test-darwin + filters: + tags: + only: /^v.*/ + - publish-github-release: + requires: + - test-linux-standalone + - test-windows-standalone-py2 + - test-windows-standalone-py3 + - test-darwin-standalone + filters: + tags: + only: /^v.*/ + branches: + ignore: /.*/ diff --git a/.gitignore b/.gitignore index a6ef824..a3c1fb7 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /bazel-* +__pycache__ +*.pyc \ No newline at end of file diff --git a/BUILD b/BUILD new file mode 100644 index 0000000..0984c7c --- /dev/null +++ b/BUILD @@ -0,0 +1,12 @@ +load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test") +load("@py_deps//:requirements.bzl", "requirement") + +py_binary( + name = "monorepo_tools", + srcs = ["cli.py"], + main = "cli.py", + visibility = ["//visibility:public"], + deps = [ + "//import_into", + ], +) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d69e69a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Hadrien Chauvin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..053383c --- /dev/null +++ b/README.md @@ -0,0 +1,116 @@ +# `monorepo-tools`: Monorepo administration + +[![CircleCI](https://circleci.com/gh/hchauvin/monorepo-tools/tree/master.svg?style=svg)](https://circleci.com/gh/hchauvin/monorepo-tools/tree/master) + +`monorepo-tools` aims at offering a collection of tools to administrate a +monorepo. Monorepos have +[many advantages](https://en.wikipedia.org/wiki/Monorepo) for closed-source systems +as compared to separate repos, and are a sound evolution or starting point for projects +in need of large-scale code refactoring, collaboration, and ease of code +reuse. A monorepo correctly set up can diminish friction both for +fledging startups, and for companies maintaining, evolving or migrating +legacy projects: they can be introduced at all stages of a product's lifecycle. + +The tools can be either consumed using a Command-Line Interface (CLI), +or programmatically. They are written in Python, packaged in a runnable ZIP +file, and compatible with Python 2.7 and Python 3.7. Prepackaged runnable +ZIP files are available +[on the release page](https://github.com/hchauvin/monorepo-tools/releases). +Tests are continuously run on Windows, Linux, and Mac OSX. + +Right now, `monorepo-tools` only offers one subcommand, `import`, but other +commands will follow. The scope will be vendoring, open sourcing part of +a monorepo with an OSS-monorepo sync, and related topics. We plan on +open-sourcing separately some work on continuous integration and +deployment pipelines for monorepos, as CI/CD is out-of-scope for this project. +Currently only Git is supported as a Version Control System and no plan +is made to extend support to other VCS such as Mercurial. + +## Installation + +For CLI use, please go to [the release page](https://github.com/hchauvin/monorepo-tools/releases) +and download the appropriate ZIP bundle for your platform. For Windows, +please make sure that you have Python 2 or Python 3 installed. On Windows, we +recommend installing Python using [Chocolatey](https://chocolatey.org) (respectively, +with `choco install python2` and `choco install python`). Usage can be queried with: + +```bash +python monorepo_tools.zip --help +``` + +For programmatic access, use [bazel](https://bazel.build/) and import +this project in your workspace. + +## `monorepo_tools-import` + +``` +usage: monorepo_tools import [-h] --individual_repos INDIVIDUAL_REPOS + --dest_branch DEST_BRANCH --monorepo_path + MONOREPO_PATH + +Import individual repos into a monorepo + +optional arguments: + -h, --help show this help message and exit + --individual_repos INDIVIDUAL_REPOS + Path to python module that exports one function, + individual_repos, that takes the destination branch + name as an argument + --dest_branch DEST_BRANCH + The destination branch to import into + --monorepo_path MONOREPO_PATH + The local path to the monorepo (it is created if it + does not exist) +``` + +Note that incremental update of an existing monorepo is supported, just +set `--monorepo_path` to a clone. + +See [./import_into/individual_repos.py]() for an example for `--individual_repos`. + +The strategy for `import` is "merge unrelated history then move": for each +individual repo, we create in the monorepo a branch that is the result +of pulling the unrelated history from the requested branch in the individual +repo. This history is directly taken from the individual repo, without +any transformation, meaning that the commit SHA1 are the same, which helps +for traceability and auditing. Additionally, because there is no +transformation, the import is faster than other strategies (see below). +The files in this branch are moved to the appropriate subdirectory of the +monorepo (and these moves are committed), then this branch is merged into +the destination monorepo branch. This way, `import` introduces two +additional commits per individual repo and destination branch: a move, +and a merge. Additionally, `import` provides the first commit in the +monorepo branch (with the message "Initial monorepo commit"), onto which +the individual repos are grafted. With this strategy, commit history is best +viewed in date order, not ancestor order. + +### Alternatives + +While researching `import`, other strategies and tools were looked at. We +specifically wanted a tool that would allow the complete import of histories, +and autonomy of the monorepo from the separate repos. Therefore, Git +[submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) +and [`git-subrepo`](https://github.com/ingydotnet/git-subrepo) were taken +out of the picture, as they work by maintaining references to the separate repos. + +Next, [Copybara](https://github.com/google/copybara) was +considered. However, its iterative filtering strategy is a huge +performance issue for large separate repos, and it was quickly abandoned, +as a full migration of the repos we were considering would take Copybara +many days to perform. + +[`git-stitch-repo`](https://metacpan.org/pod/git-stitch-repo) +was also considered. It nicely uses `git-fast-import` and `git-fast-export` +to combine linear histories into one linear history, which could be cleaner +than our "merge unrelated history then move" (as it comes with merge "nonlinearities"). +However, we found out that `git-stitch-repo` gave wrong results for nonlinear histories, as +the commits were sometimes not correctly stitched. The project, written in Perl, had not +been maintained for years. We also decided that Git history +rewriting was too difficult to get right for the mixed benefits +of enforcing a linear Git history. That's why we went back to the +very simple strategy than ended up being `import` and didn't try to patch +`git-stitch-repo` instead. + +## License + +`monorepo-tools` is licensed under [The MIT License](./LICENSE). diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 0000000..1c9f8a9 --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,51 @@ +workspace(name = "monorepo_tools") + +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") + +# Sanity checks + +git_repository( + name = "bazel_skylib", + remote = "https://github.com/bazelbuild/bazel-skylib", + tag = "0.9.0", +) + +load("@bazel_skylib//lib:versions.bzl", "versions") + +versions.check("0.29.0") + +git_repository( + name = "rules_python", + commit = "54d1cb35cd54318d59bf38e52df3e628c07d4bbc", + remote = "https://github.com/bazelbuild/rules_python.git", +) + +# This call should always be present. +load("@rules_python//python:repositories.bzl", "py_repositories") + +py_repositories() + +# This one is only needed if you're using the packaging rules. +load("@rules_python//python:pip.bzl", "pip_repositories") + +pip_repositories() + +load("@rules_python//python:pip.bzl", "pip_import") + +# This rule translates the specified requirements.txt into +# @my_deps//:requirements.bzl, which itself exposes a pip_install method. +pip_import( + name = "py_deps", + requirements = "//:requirements.txt", +) + +# Load the pip_install symbol for my_deps, and create the dependencies' +# repositories. +load("@py_deps//:requirements.bzl", "pip_install") + +pip_install() + +# Linting +load("//internal:format.bzl", "format_repositories") + +format_repositories() diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..91ee1ff --- /dev/null +++ b/cli.py @@ -0,0 +1,74 @@ +"""Command-Line Interface""" +import re +import os +from git import Repo, SymbolicReference +import gitdb +import shutil +import logging +import argparse +import sys +from monorepo_tools.import_into import import_into_monorepo, IndividualRepo + +VERSION = '0.0.1' + + +def local_monorepo(monorepo_local_path): + if os.path.exists(monorepo_local_path): + return Repo(monorepo_local_path) + + print("Monorepo does not exist. Create it from scratch...") + return Repo.init(monorepo_local_path, bare = False) + + +def load_source(name, path): + """Loads a python module from its name (e.g., `foo.bar`) and its + path (e.g., `/foo/bar.py`). + + Provides a compatibility layer between Python 3 and Python 2.7 + """ + if sys.version_info >= (3, 0): + import importlib.machinery + mod_loader = importlib.machinery.SourceFileLoader(name, path) + return mod_loader.load_module() + + import imp + return imp.load_source(name, path) + + +def main(): + """Parses the command-line arguments.""" + parser = argparse.ArgumentParser( + "monorepo_tools", description = 'Monorepo tools {}'.format(VERSION)) + subparsers = parser.add_subparsers(dest = 'subcommand') + + import_parser = subparsers.add_parser( + 'import', description = 'Import individual repos into a monorepo') + import_parser.add_argument( + '--individual_repos', + required = True, + help = ( + 'Path to python module that exports one function, individual_repos, ' + + 'that takes the destination branch name as an argument')) + import_parser.add_argument( + '--dest_branch', + required = True, + help = 'The destination branch to import into') + import_parser.add_argument( + '--monorepo_path', + required = True, + help = + 'The local path to the monorepo (it is created if it does not exist)') + + options = parser.parse_args() + + if options.subcommand == 'import': + mod = load_source('individual_repos', options.individual_repos) + repos = mod.individual_repos(options.dest_branch) + monorepo = local_monorepo(options.monorepo_path) + import_into_monorepo(monorepo, repos, options.dest_branch) + else: + raise Exception("unexpected subcommand {}".format(options.subcommand)) + + +if __name__ == "__main__": + main() diff --git a/common/BUILD b/common/BUILD new file mode 100644 index 0000000..bcffe1d --- /dev/null +++ b/common/BUILD @@ -0,0 +1,7 @@ +load("@rules_python//python:defs.bzl", "py_library") + +py_library( + name = "common", + srcs = ["pathutils.py"], + visibility = ["//:__subpackages__"], +) diff --git a/common/pathutils.py b/common/pathutils.py new file mode 100644 index 0000000..1574bab --- /dev/null +++ b/common/pathutils.py @@ -0,0 +1,25 @@ +# Copyright (c) Hadrien Chauvin +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import stat + + +def onerror(func, path, exc_info): + """Error handler for ``shutil.rmtree``. + + If the error is due to an access error (read only file) + it attempts to add write permission and then retries. + + If the error is for another reason it re-raises the error. + + Usage: `shutil.rmtree(path, onerror=onerror)` + """ + if not os.access(path, os.W_OK): + # Is the error an access error ? + os.chmod(path, stat.S_IWUSR) + func(path) + else: + raise diff --git a/import_into/BUILD b/import_into/BUILD new file mode 100644 index 0000000..8f886aa --- /dev/null +++ b/import_into/BUILD @@ -0,0 +1,49 @@ +# Implementation of the "import" command. + +load("@rules_python//python:defs.bzl", "py_library", "py_test") +load("@py_deps//:requirements.bzl", "requirement") + +py_library( + name = "import_into", + srcs = [ + "__init__.py", + "import_into.py", + ], + visibility = ["//visibility:public"], + deps = [ + requirement("GitPython"), + requirement("gitdb"), + requirement("smmap"), + "//common", + ], +) + +py_test( + name = "import_into_test", + srcs = [ + "import_into_test.py", + "testutils.py", + ], + deps = [ + ":import_into", + requirement("attrs"), + ], +) + +py_test( + name = "e2e_test", + srcs = [ + "e2e_test.py", + "single_commit.py", + ], + data = [ + ":individual_repos.py", + "//:monorepo_tools", + ], + tags = ["e2e"], + deps = [ + ":import_into", + requirement("attrs"), + "@rules_python//python/runfiles", + ], +) diff --git a/import_into/__init__.py b/import_into/__init__.py new file mode 100644 index 0000000..243f155 --- /dev/null +++ b/import_into/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Hadrien Chauvin +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .import_into import import_into_monorepo, IndividualRepo + +__all__ = ["import_into_monorepo", "IndividualRepo"] diff --git a/import_into/e2e_test.py b/import_into/e2e_test.py new file mode 100644 index 0000000..74877ed --- /dev/null +++ b/import_into/e2e_test.py @@ -0,0 +1,130 @@ +# Copyright (c) Hadrien Chauvin +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""End-to-end tests with "real" remote public GitHub repos, and direct +invocation of the `monorepo_tools` CLI.""" +import os +import unittest +import shutil +import filecmp +import attr +import time +import subprocess +from rules_python.python.runfiles import runfiles +from git import Repo +from monorepo_tools.import_into import import_into_monorepo +from monorepo_tools.common.pathutils import onerror +from single_commit import single_commit +from individual_repos import individual_repos + +REPOS_ROOT = os.path.join(os.environ["TEST_TMPDIR"], "REPOS") + + +@attr.s(frozen = True) +class Algorithm(object): + name = attr.ib() + fun = attr.ib() + options = attr.ib() + + +ALGORITHMS = [ + Algorithm( + name = "import_into_monorepo", fun = import_into_monorepo, + options = {}), + Algorithm( + name = "single_commit", + fun = single_commit, + options = {"workdir": os.path.join(REPOS_ROOT, "workdir")}), +] + + +class E2eTest(unittest.TestCase): + + def setUp(self): + # Removing the `REPOS_ROOT` dir tree here instead of in `tearDown` + # allows post-mortem inspection when combined with the Bazel + # CLI argument `--sandbox_debug`. + if os.path.exists(REPOS_ROOT): + shutil.rmtree(REPOS_ROOT, onerror = onerror) + os.mkdir(REPOS_ROOT) + + def test_working_dir(self): + dest_branch_name = "stitched" + repos = individual_repos(dest_branch_name) + + elapsed = {} + for algorithm in ALGORITHMS: + start = time.clock() + monorepo = Repo.init( + os.path.join(REPOS_ROOT, "monorepo_{}".format(algorithm.name))) + algorithm.fun(monorepo, repos, dest_branch_name, **algorithm.options) + elapsed[algorithm.name] = time.clock() - start + + print("ELAPSED (in seconds): {}".format(elapsed)) + + # Expect the working dirs to all be the same + ref_algorithm = ALGORITHMS[0] + for algorithm in ALGORITHMS[1:]: + if not is_same( + os.path.join(REPOS_ROOT, "monorepo_{}".format(ref_algorithm.name)), + os.path.join(REPOS_ROOT, "monorepo_{}".format(algorithm.name))): + raise Exception(("monorepo for {} does not have the same working " + + "dir as {}").format(algorithm.name, + ref_algorithm.name)) + + def test_cli(self): + """Tests launching the CLI.""" + r = runfiles.Create() + sp = subprocess.Popen( + [ + "python", + r.Rlocation("monorepo_tools/monorepo_tools.zip"), "import", + "--individual_repos", + r.Rlocation("monorepo_tools/import_into/individual_repos.py"), + "--dest_branch", "stitched", "--monorepo_path", + os.path.join(REPOS_ROOT, "monorepo") + ], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + out, err = sp.communicate() + if sp.returncode != 0: + raise Exception( + "non-zero return code {};\nstdout:\n{}\nstderr:\n{}".format( + sp.returncode, out.decode('utf8'), err.decode('utf8'))) + + +class dircmp(filecmp.dircmp): + """ + Compare the content of dir1 and dir2. In contrast with filecmp.dircmp, this + subclass compares the content of files with the same path. + """ + + def phase3(self): + """ + Find out differences between common files. + Ensure we are using content comparison with shallow=False. + """ + fcomp = filecmp.cmpfiles( + self.left, self.right, self.common_files, shallow = False) + self.same_files, self.diff_files, self.funny_files = fcomp + + +def is_same(dir1, dir2): + """ + Compare two directory trees content. + Return False if they differ, True is they are the same. + """ + compared = dircmp(dir1, dir2, ignore = [".git"]) + if (compared.left_only or compared.right_only or compared.diff_files or + compared.funny_files): + compared.report_full_closure() + return False + for subdir in compared.common_dirs: + if not is_same(os.path.join(dir1, subdir), os.path.join(dir2, subdir)): + return False + return True + + +if __name__ == "__main__": + unittest.main() diff --git a/import_into/import_into.py b/import_into/import_into.py new file mode 100644 index 0000000..ac04118 --- /dev/null +++ b/import_into/import_into.py @@ -0,0 +1,258 @@ +"""Imports individual repos into a monorepo using a "merge unrelated histories +and move" strategy.""" +import logging +import os +# Copyright (c) Hadrien Chauvin +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from git import Actor +import shutil + +DEFAULT_LOGGER_NAME = "monorepo" +DEFAULT_AUTHOR = Actor("monorepo-tools", "monorepo-tools@chauvin.io") +DEFAULT_COMMITTER = DEFAULT_AUTHOR +INITIAL_COMMIT_MESSAGE = "Initial monorepo commit" + + +def import_into_monorepo(monorepo, + individual_repos, + dest_branch_name = "master", + silent = False, + author = DEFAULT_AUTHOR, + committer = DEFAULT_COMMITTER, + logger_name = DEFAULT_LOGGER_NAME): + """Imports individual repos into a monorepo using a "merge unrelated histories + and move" strategy. + + Args: + monorepo: Monorepo object, of type `git.Repo`, to import into. + individual_repos: List of individual repos, of type `IndividualRepo`. + dest_branch_name: The destination branch to put the individual repos in. + silent: Whether to suppress all progress report. + author: The author to use when the import algorithm creates commits. + committer: The committer to use when the import algorithm creates commits. + logger_name: The `logging` logger name to use for all progress reports. + """ + syncer = _MonorepoSyncer(monorepo, individual_repos, author, committer, + logger_name) + if silent: + syncer.logger.setLevel(logging.WARNING) + syncer.create_remotes() + to_update = syncer.create_or_update_individual_repo_branches(dest_branch_name) + syncer.merge_individual_repo_branches(to_update, dest_branch_name) + syncer.logger.info("Done") + + +class IndividualRepo: + """An individual repo to import into a monorepo. + + Attrs: + location: Location of the individual repo. Can be remote (`https://...`) + or a location on the local file system. + branch: The branch to import. + name: The name of the monorepo. Names must be unique across a sequence + of individual repos to import. The name is by default the basename + of the location (if for instance the URL is + `https://github.com/orga/repo.git`, the `name` is `"repo"`). + destination: The destination folder, within the monorepo, where to put + the individual repo. The destination folder can have multiple parts, + e.g., `foo/bar`, in which case the subfolders are recursively created. + The destination folder is by default the `name`. + """ + + def __init__(self, location, branch, name = None, destination = None): + self.location = location + self.branch = branch + self.name = name or _default_repo_name(location) + self.destination = destination or self.name + + +class _MonorepoSyncer: + + def __init__(self, monorepo, individual_repos, author, committer, + logger_name): + self.logger = logging.getLogger(logger_name) + self.monorepo = monorepo + self.individual_repos = individual_repos + self.author = author + self.committer = committer + self.__initial_commit = None + + self._init_environ() + self._init_logger() + + def _init_environ(self): + # We need this because some commands (e.g. `pull`) do not allow for + # author/committer override. + os.environ["GIT_AUTHOR_NAME"] = self.author.name + os.environ["GIT_AUTHOR_EMAIL"] = self.author.email + os.environ["GIT_COMMITTER_NAME"] = self.committer.name + os.environ["GIT_COMMITTER_EMAIL"] = self.committer.email + + def _init_logger(self): + self.logger.setLevel(logging.INFO) + self.logger.handlers = [ + h for h in self.logger.handlers + if not isinstance(h, logging.StreamHandler) + ] + ch = logging.StreamHandler() + ch.setFormatter(logging.Formatter("+%(relativeCreated)dms - %(message)s")) + self.logger.addHandler(ch) + + def _initial_commit(self, dest_branch_name): + if not self.__initial_commit: + head = self._maybe_head(dest_branch_name) + if head: + for commit in self.monorepo.iter_commits(dest_branch_name): + if commit.message == INITIAL_COMMIT_MESSAGE: + self.__initial_commit = commit + # Not breaking here but instead going through the full + # list of commits ensure that we are selecting the first + # commit. Indeed, `iter_commits` gives back the commits + # in reverse chronological order. + if not self.__initial_commit: + raise Error("cannot find initial commit message") + else: + self.__initial_commit = self.monorepo.index.commit( + INITIAL_COMMIT_MESSAGE, + author = self.author, + committer = self.committer) + return self.__initial_commit + + def _remote_exists(self, name): + try: + self.monorepo.remote(name) + return True + except ValueError: + return False + + def _maybe_head(self, branch_name): + try: + return self.monorepo.heads[branch_name] + except IndexError: + return None + + def create_remotes(self): + self.logger.info("Create the individual repo remotes...") + for individual_repo in self.individual_repos: + self.logger.info("For {}".format(individual_repo.name)) + if self._remote_exists(individual_repo.name): + self.monorepo.delete_remote(individual_repo.name) + remote = self.monorepo.create_remote(individual_repo.name, + individual_repo.location) + + def create_or_update_individual_repo_branches(self, + dest_branch_name, + fetch_depth = None): + self.logger.info("Create or update individual repo branches...") + to_update = [] + for individual_repo in self.individual_repos: + repo_name = individual_repo.name + branch_name = _individual_repo_branch_name(dest_branch_name, repo_name) + self.logger.info("{}: pulling...".format(repo_name)) + repo_branch = self._maybe_head(branch_name) + repo_branch_created = False + if not repo_branch: + repo_branch_created = True + repo_branch = self.monorepo.create_head( + branch_name, self._initial_commit(dest_branch_name)) + repo_branch.checkout() + commit_before_pull = repo_branch.commit + self.monorepo.remotes[repo_name].pull( + individual_repo.branch, + allow_unrelated_histories = repo_branch_created, + depth = fetch_depth) + commit_after_pull = repo_branch.commit + if commit_before_pull == commit_after_pull: + self.logger.info("{}: SKIP: up-to-date".format(repo_name)) + continue + to_update.append(repo_name) + self.logger.info( + "{}: create destination directories...".format(repo_name)) + repo_branch.checkout() + index = self.monorepo.index + filenames = [] + for (key, value) in index.entries.items(): + filename = key[0] + append = True + for cur in self.individual_repos: + if filename.startswith(cur.destination + "/"): + append = False + break + if append: + filenames.append(filename) + dest_dirs_to_add = [] + for filename in filenames: + dest_dir_rel = os.path.dirname( + os.path.join(individual_repo.destination, filename)) + dest_dir_abs = os.path.join(self.monorepo.working_dir, dest_dir_rel) + dest_dirs_to_add.append(dest_dir_abs) + if not os.path.exists(dest_dir_abs): + os.makedirs(dest_dir_abs) + #index.add(dest_dirs_to_add) + self.logger.info("{}: move files...".format(repo_name)) + for dest_dir_abs in set(dest_dirs_to_add): + dest_dir_rel = os.path.relpath(dest_dir_abs, self.monorepo.working_dir) + src_dir_rel = os.path.relpath(dest_dir_rel, individual_repo.destination) + if src_dir_rel == ".": + src_dir_rel = "" + else: + src_dir_rel += os.path.sep + cur_filenames = [] + for filename in filenames: + if filename.startswith(src_dir_rel.replace(os.path.sep, "/")): + if "/" not in filename[len(src_dir_rel):]: + cur_filenames.append(filename) + cur_filenames.append(dest_dir_rel) + index.move(cur_filenames) + repo_branch.reference = index.commit( + "Move files from repo {} to directory {}".format( + repo_name, individual_repo.destination), + author = self.author, + committer = self.committer) + return to_update + + def merge_individual_repo_branches(self, to_update, dest_branch_name): + self.logger.info("Merge old repo branches...") + dest_branch = self._maybe_head(dest_branch_name) + if not dest_branch: + dest_branch = self.monorepo.create_head( + dest_branch_name, self._initial_commit(dest_branch_name)) + dest_branch.checkout() + for repo_name in to_update: + self.logger.info("{}: merge".format(repo_name)) + source_branch = self.monorepo.heads[_individual_repo_branch_name( + dest_branch_name, repo_name)] + merge_base = self.monorepo.merge_base(dest_branch, source_branch) + index = self.monorepo.index + index.merge_tree(source_branch, base = merge_base) + next_commit = index.commit( + "Merge repo {}".format(repo_name), + parent_commits = (source_branch.commit, dest_branch.commit), + author = self.author, + committer = self.committer) + dest_branch.commit = next_commit + self.logger.info("Clean up working directory...") + self.monorepo.head.reference = dest_branch + for entry in os.listdir(self.monorepo.working_dir): + if entry == ".git": + continue + path = os.path.join(self.monorepo.working_dir, entry) + if os.path.isfile(path): + os.remove(path) + else: + shutil.rmtree(path) + self.monorepo.head.reset(index = True, working_tree = True) + + +def _default_repo_name(repo_location): + name = repo_location[repo_location.rindex(os.sep) + 1:] + if name.endswith('.git'): + return name[:-len('.git')] + return name + + +def _individual_repo_branch_name(dest_branch_name, repo_name): + return 'individual_repos/{}/{}'.format(dest_branch_name, repo_name) diff --git a/import_into/import_into_test.py b/import_into/import_into_test.py new file mode 100644 index 0000000..6d72c60 --- /dev/null +++ b/import_into/import_into_test.py @@ -0,0 +1,378 @@ +# Copyright (c) Hadrien Chauvin +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""Unit tests for the `import_into` module.""" +import unittest +import os +import re +import attr +import shutil +from git import Repo, Actor, NULL_TREE +from monorepo_tools.common.pathutils import onerror +from monorepo_tools.import_into import import_into_monorepo, IndividualRepo +from testutils import (REPOS_ROOT, ExpectedCommits, ExpectedCommit, + ExpectedDiff, repo_file, debug_repos) + +#: If `True`, displays a summary of the repos on test case tear down. +DEBUG = False + + +class ImportTest(unittest.TestCase): + + def setUp(self): + # Removing the `REPOS_ROOT` dir tree here instead of in `tearDown` + # allows post-mortem inspection when combined with the Bazel + # CLI argument `--sandbox_debug`. + if os.path.exists(REPOS_ROOT): + shutil.rmtree(REPOS_ROOT, onerror = onerror) + os.mkdir(REPOS_ROOT) + + def tearDown(self): + if DEBUG: + debug_repos() + + def test_two_individual_repos_can_be_merged(self): + monorepo = Repo.init(os.path.join(REPOS_ROOT, "monorepo")) + repo1 = init_repo1() + repo2 = init_repo2() + import_into_monorepo( + monorepo, [repo1, repo2], "develop", silent = not DEBUG) + self.assertSetEqual( + set([repr(ref) for ref in monorepo.refs]), + set([ + '', + '', + '', + '', + '', + '' + ])) + expected_commits = TWO_INDIVIDUAL_REPOS_EXPECTED_COMMITS + self.assert_commits_equal( + expected_commits, + [commit for commit in monorepo.iter_commits("develop")]) + self.assertEqual( + expected_commits.match_head(monorepo, "develop"), "MERGE_MOVED_REPO2") + + def test_no_commit_if_the_individual_repo_did_not_change(self): + monorepo = Repo.init(os.path.join(REPOS_ROOT, "monorepo")) + repo1 = init_repo1() + repo2 = init_repo2() + import_into_monorepo( + monorepo, [repo1, repo2], "develop", silent = not DEBUG) + commit_before = monorepo.rev_parse("develop") + import_into_monorepo( + monorepo, [repo1, repo2], "develop", silent = not DEBUG) + commit_after = monorepo.rev_parse("develop") + self.assertEqual(commit_before.hexsha, commit_after.hexsha) + + def test_incremental_merge_if_the_individual_repo_changed(self): + monorepo = Repo.init(os.path.join(REPOS_ROOT, "monorepo")) + repo1 = init_repo1() + import_into_monorepo(monorepo, [repo1], "develop", silent = not DEBUG) + + repo1_git = Repo(repo1.location) + repo_file(repo1_git, "qux.txt", "QUX") + repo1_git.index.add([os.path.join(repo1_git.working_dir, "qux.txt")]) + commit = repo1_git.index.commit( + "Commit 2", + committer = Actor("Committer2", "committer2@domain.test"), + author = Actor("Author2", "author1@domain.test")) + repo1_git.heads["master1"].commit = commit + + import_into_monorepo(monorepo, [repo1], "develop", silent = not DEBUG) + + commits = [commit for commit in monorepo.iter_commits("develop")] + expected_commits = INCREMENTAL_MERGE_EXPECTED_COMMITS + self.assert_commits_equal(expected_commits, commits) + self.assertEqual( + expected_commits.match_head(monorepo, "develop"), "MERGE_MOVED_REPO1_2") + + def test_adding_an_individual_repo_after_another(self): + monorepo = Repo.init(os.path.join(REPOS_ROOT, "monorepo")) + repo1 = init_repo1() + repo2 = init_repo2() + import_into_monorepo(monorepo, [repo1], "develop", silent = not DEBUG) + import_into_monorepo( + monorepo, [repo1, repo2], "develop", silent = not DEBUG) + + commits = [commit for commit in monorepo.iter_commits("develop")] + expected_commits = TWO_INDIVIDUAL_REPOS_EXPECTED_COMMITS + self.assert_commits_equal(expected_commits, commits) + self.assertEqual( + expected_commits.match_head(monorepo, "develop"), "MERGE_MOVED_REPO2") + + def assert_commits_equal(self, expected_commits, actual_commits): + # The number of commits must be the same + self.assertEqual(len(expected_commits.commits), len(actual_commits)) + + # We associate commit hexsha to the names that the commits are + # given in the fixture. + commit_hexsha_to_names = { + actual_commit.hexsha: expected_commits.match(actual_commit)[0] + for actual_commit in actual_commits + } + + for actual_commit in actual_commits: + # The actual commit is matched against an expected commit + (commit_name, expected_commit) = expected_commits.match(actual_commit) + + if expected_commit.committer: + self.assertEqual(actual_commit.committer.name, + expected_commit.committer, + "unexpected committer for {} (sha: {})".format( + commit_name, actual_commit.hexsha)) + if expected_commit.author: + self.assertEqual(actual_commit.author.name, expected_commit.author, + "unexpected author for {} (sha: {})".format( + commit_name, actual_commit.hexsha)) + if expected_commit.parents: + expected = set(expected_commit.parents) + actual = set([ + commit_hexsha_to_names[str(commit)] + for commit in actual_commit.parents + ]) + self.assertSetEqual(expected, actual, + ("unexpected commit parents for {} (sha: {})\n" + + "expected: {}\nactual: {}").format( + commit_name, + actual_commit.hexsha, + expected, + actual, + )) + if expected_commit.diffs_against_null_tree: + expected = set(expected_commit.diffs_against_null_tree) + actual = set([ + ExpectedDiff( + change_type = diff.change_type, + a = (diff.a_path, diff.a_blob.data_stream.read()) + if diff.a_blob else None, + b = (diff.b_path, diff.b_blob.data_stream.read()) + if diff.b_blob else None, + ) for diff in actual_commit.diff(NULL_TREE) + ]) + self.assertSetEqual(expected, actual, + ("unexpected diffs for {} (sha: {})\n" + + "expected: {}\nactual: {}").format( + commit_name, + actual_commit.hexsha, + expected, + actual, + )) + + +def init_repo1(): + repo = Repo.init(os.path.join(REPOS_ROOT, "repo1")) + repo_file(repo, "foo.txt", "FOO") + repo.index.add([os.path.join(repo.working_dir, "foo.txt")]) + commit = repo.index.commit( + "Commit 1", + committer = Actor("Committer1", "committer1@domain.test"), + author = Actor("Author1", "author1@domain.test")) + repo.create_head("master1", commit) + return IndividualRepo(repo.working_dir, "master1") + + +def init_repo2(): + repo = Repo.init(os.path.join(REPOS_ROOT, "repo2")) + repo_file(repo, "bar.txt", "BAR") + repo.index.add([os.path.join(repo.working_dir, "bar.txt")]) + commit = repo.index.commit( + "Commit 2", + committer = Actor("Committer2", "committer2@domain.test"), + author = Actor("Author2", "author2@domain.test")) + repo.create_head("master2", commit) + return IndividualRepo(repo.working_dir, "master2") + + +TWO_INDIVIDUAL_REPOS_EXPECTED_COMMITS = ExpectedCommits({ + "INIT_MONOREPO": + ExpectedCommit( + message = "Initial monorepo commit", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = [], + diffs_against_null_tree = [], + ), + "INIT_REPO1": + ExpectedCommit( + message = "Commit 1", + committer = "Committer1", + author = "Author1", + parents = [], + diffs_against_null_tree = [ + ExpectedDiff(change_type = 'A', b = ('foo.txt', b'FOO')), + ], + ), + "INIT_REPO2": + ExpectedCommit( + message = "Commit 2", + committer = "Committer2", + author = "Author2", + parents = [], + diffs_against_null_tree = [ + ExpectedDiff(change_type = 'A', b = ('bar.txt', b'BAR')), + ], + ), + "PULL_REPO1": + ExpectedCommit( + message = re.compile(r"^Merge branch 'master1' of .*$"), + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["INIT_MONOREPO", "INIT_REPO1"], + diffs_against_null_tree = [], + ), + "PULL_REPO2": + ExpectedCommit( + message = re.compile(r"^Merge branch 'master2' of .*$"), + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["INIT_MONOREPO", "INIT_REPO2"], + diffs_against_null_tree = [], + ), + "MOVE_FILES_REPO1": + ExpectedCommit( + message = "Move files from repo repo1 to directory repo1", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["PULL_REPO1"], + diffs_against_null_tree = [ + ExpectedDiff( + change_type = 'R', + a = ('foo.txt', b'FOO'), + b = ('repo1/foo.txt', b'FOO'), + ), + ], + ), + "MOVE_FILES_REPO2": + ExpectedCommit( + message = "Move files from repo repo2 to directory repo2", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["PULL_REPO2"], + diffs_against_null_tree = [ + ExpectedDiff( + change_type = 'R', + a = ('bar.txt', b'BAR'), + b = ('repo2/bar.txt', b'BAR'), + ), + ], + ), + "MERGE_MOVED_REPO1": + ExpectedCommit( + message = "Merge repo repo1", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["MOVE_FILES_REPO1", "INIT_MONOREPO"], + diffs_against_null_tree = [], + ), + "MERGE_MOVED_REPO2": + ExpectedCommit( + message = "Merge repo repo2", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["MOVE_FILES_REPO2", "MERGE_MOVED_REPO1"], + diffs_against_null_tree = [], + ) +}) + +INCREMENTAL_MERGE_EXPECTED_COMMITS = ExpectedCommits({ + "INIT_MONOREPO": + ExpectedCommit( + message = "Initial monorepo commit", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = [], + diffs_against_null_tree = [], + ), + "INIT_REPO1": + ExpectedCommit( + message = "Commit 1", + committer = "Committer1", + author = "Author1", + parents = [], + diffs_against_null_tree = [ + ExpectedDiff(change_type = 'A', b = ('foo.txt', b'FOO')), + ], + ), + "PULL_REPO1_1": + ExpectedCommit( + message = re.compile(r"^Merge branch 'master1' of .*$"), + name_rev = "individual_repos/develop/repo1~3", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["INIT_MONOREPO", "INIT_REPO1"], + diffs_against_null_tree = [], + ), + "MOVE_FILES_REPO1_1": + ExpectedCommit( + message = "Move files from repo repo1 to directory repo1", + name_rev = "individual_repos/develop/repo1~2", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["PULL_REPO1_1"], + diffs_against_null_tree = [ + ExpectedDiff( + change_type = 'R', + a = ('foo.txt', b'FOO'), + b = ('repo1/foo.txt', b'FOO'), + ), + ], + ), + "MERGE_MOVED_REPO1_1": + ExpectedCommit( + message = "Merge repo repo1", + name_rev = "develop^2", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["MOVE_FILES_REPO1_1", "INIT_MONOREPO"], + diffs_against_null_tree = [], + ), + "NEXT_COMMIT_REPO1": + ExpectedCommit( + message = "Commit 2", + committer = "Committer2", + author = "Author2", + parents = [], + diffs_against_null_tree = [ + ExpectedDiff(change_type = 'A', b = ('qux.txt', b'QUX')), + ], + ), + "PULL_REPO1_2": + ExpectedCommit( + message = re.compile(r"^Merge branch 'master1' of .*$"), + name_rev = "individual_repos/develop/repo1~1", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["MOVE_FILES_REPO1_1", "NEXT_COMMIT_REPO1"], + diffs_against_null_tree = [], + ), + "MOVE_FILES_REPO1_2": + ExpectedCommit( + message = "Move files from repo repo1 to directory repo1", + name_rev = "individual_repos/develop/repo1", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["PULL_REPO1_2"], + diffs_against_null_tree = [ + ExpectedDiff( + change_type = 'R', + a = ('qux.txt', b'QUX'), + b = ('repo1/qux.txt', b'QUX'), + ), + ], + ), + "MERGE_MOVED_REPO1_2": + ExpectedCommit( + message = "Merge repo repo1", + name_rev = "develop", + committer = "monorepo-tools", + author = "monorepo-tools", + parents = ["MOVE_FILES_REPO1_2", "MERGE_MOVED_REPO1_1"], + diffs_against_null_tree = [], + ), +}) + +if __name__ == "__main__": + unittest.main() diff --git a/import_into/individual_repos.py b/import_into/individual_repos.py new file mode 100644 index 0000000..0301bab --- /dev/null +++ b/import_into/individual_repos.py @@ -0,0 +1,29 @@ +# Copyright (c) Hadrien Chauvin +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""Example individual repos for end-to-end tests. This file +is sourced by the CLI and by `e2e_test.py` directly. +""" +from monorepo_tools.import_into import IndividualRepo + + +def individual_repos(dest_branch_name): + # This test that the CLI did pass the correct destination branch name + assert dest_branch_name == "stitched" + + # These repos and branches have been arbitrarily chosen + return [ + IndividualRepo( + location = "https://github.com/reduxjs/redux.git", + branch = "v4.0.4", + name = "redux", + destination = "packages/redux/core", + ), + IndividualRepo( + location = "https://github.com/acdlite/recompose.git", + branch = "v0.30.0", + name = "recompose", + destination = "packages/recompose", + ), + ] diff --git a/import_into/single_commit.py b/import_into/single_commit.py new file mode 100644 index 0000000..363247e --- /dev/null +++ b/import_into/single_commit.py @@ -0,0 +1,126 @@ +# Copyright (c) Hadrien Chauvin +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""Importing algorithm that creates a single commit with all +the files coming from the individual directories. + +This algorithm discards history, but is useful for testing purposes. +""" +import os +import logging +import tempfile +import shutil +import subprocess +from git import Repo, Actor +from monorepo_tools.import_into import IndividualRepo + +DEFAULT_AUTHOR = Actor("monorepo-tools", "monorepo-tools@chauvin.io") +DEFAULT_COMMITTER = DEFAULT_AUTHOR +DEFAULT_LOGGER_NAME = "monorepo" +TEMP_ROOT_DIR = "~/.monorepo-tools" + + +def single_commit(monorepo, + individual_repos, + dest_branch_name = "master", + workdir = None, + silent = False, + author = DEFAULT_AUTHOR, + committer = DEFAULT_COMMITTER, + logger_name = DEFAULT_LOGGER_NAME): + """Imports individual repos into a monorepo using a "single commit" strategy. + The history of the individual repos is discarded. + + Args: + monorepo: Monorepo object, of type `git.Repo`, to import into. + individual_repos: List of individual repos, of type `IndividualRepo`. + dest_branch_name: The destination branch to put the individual repos in. + workdir: The working directory where to put the shallow clones of the + individual repos. By default, a temporary directory is created. + silent: Whether to suppress all progress report. + author: The author to use when the import algorithm creates commits. + committer: The committer to use when the import algorithm creates commits. + logger_name: The `logging` logger name to use for all progress reports. + """ + syncer = _SingleCommit(monorepo, individual_repos, author, committer, + logger_name) + if silent: + syncer.logger.setLevel(logging.WARNING) + + temp_workdir_holder = None + if not workdir: + temp_root_dir = os.path.expanduser(TEMP_ROOT_DIR) + if not os.path.exists(temp_root_dir): + os.mkdir(temp_root_dir) + temp_workdir_holder = tempfile.TemporaryDirectory(dir = temp_root_dir) + workdir = temp_workdir_holder.name + try: + syncer.clone_single_branches(workdir) + syncer.copy(workdir) + finally: + if temp_workdir_holder: + temp_workdir_holder.cleanup() + syncer.create_dest_branch(dest_branch_name) + syncer.logger.info("Done") + + +class _SingleCommit: + + def __init__(self, monorepo, individual_repos, author, committer, + logger_name): + self.logger = logging.getLogger(logger_name) + self.monorepo = monorepo + self.individual_repos = individual_repos + self.author = author + self.committer = committer + + self._init_environ() + self._init_logger() + + def _init_environ(self): + # We need this because some commands (e.g. `pull`) do not allow for + # author/committer override. + os.environ["GIT_AUTHOR_NAME"] = self.author.name + os.environ["GIT_AUTHOR_EMAIL"] = self.author.email + os.environ["GIT_COMMITTER_NAME"] = self.committer.name + os.environ["GIT_COMMITTER_EMAIL"] = self.committer.email + + def _init_logger(self): + self.logger.setLevel(logging.INFO) + self.logger.handlers = [ + h for h in self.logger.handlers + if not isinstance(h, logging.StreamHandler) + ] + ch = logging.StreamHandler() + ch.setFormatter(logging.Formatter("+%(relativeCreated)dms - %(message)s")) + self.logger.addHandler(ch) + + def clone_single_branches(self, workdir): + self.logger.info("Clone single branches...") + for individual_repo in self.individual_repos: + self.logger.info("For {}: {}".format(individual_repo.name, + individual_repo.branch)) + Repo.clone_from( + individual_repo.location, + os.path.join(workdir, individual_repo.name), + single_branch = True, + branch = individual_repo.branch, + depth = 1) + + def copy(self, workdir): + self.logger.info("Copying files in individual repos...") + for individual_repo in self.individual_repos: + self.logger.info(individual_repo.name) + shutil.copytree( + os.path.join(workdir, individual_repo.name), + os.path.join(self.monorepo.working_dir, individual_repo.destination), + symlinks = True, + ignore = shutil.ignore_patterns(".git")) + + def create_dest_branch(self, dest_branch_name): + self.logger.info("Create destination branch...") + self.monorepo.git.checkout("-b", dest_branch_name) + self.monorepo.git.add("-A") + self.monorepo.git.commit("-m", "Monorepo commit") + self.logger.info("Monorepo commit successfully made") diff --git a/import_into/testutils.py b/import_into/testutils.py new file mode 100644 index 0000000..d559a85 --- /dev/null +++ b/import_into/testutils.py @@ -0,0 +1,112 @@ +# Copyright (c) Hadrien Chauvin +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""Utility functions and objects used during unit testing.""" +import attr +import os +from git import Repo, NULL_TREE +import re +import sys + +#: Where to put all the test repos +REPOS_ROOT = os.path.join(os.environ["TEST_TMPDIR"], "REPOS") + + +@attr.s(frozen = True) +class ExpectedCommits(object): + """Sequence of expected commits.""" + + #: Dictionary of name to `ExpectedCommit` objects. + #: Names are used to reference expected commits, e.g. in + #: `ExpectedCommit.parents`. + commits = attr.ib() + + def match(self, actual_commit): + """Tries to match one of the expected commits to an actual commit. + + Args: + actual_commit: Actual commit, of type `git.Commit`. + Returns: + The expected commit. + Throws: + `IndexError` if the commit cannot be found. + """ + stripped_name_rev = actual_commit.name_rev.split()[1] + for commit in self.commits.items(): + message_match = False + if sys.version_info >= (3, 0): + cls = str + else: + cls = basestring + if not isinstance(commit[1].message, cls): + if commit[1].message.match(actual_commit.message): + message_match = True + elif commit[1].message == actual_commit.message: + message_match = True + # There is a match if the messages match and either no expected name_rev + # was specified or the name_revs match. + if message_match and (not commit[1].name_rev or + (commit[1].name_rev == stripped_name_rev)): + return commit + raise IndexError("cannot match message '{}' for name rev '{}'".format( + actual_commit.message, stripped_name_rev)) + + def match_head(self, monorepo, name): + """Matches the head of a ref. See `ExpectedCommits.match` for details.""" + return self.match(monorepo.rev_parse(name))[0] + + +@attr.s(frozen = True) +class ExpectedCommit(object): + """An expected commit.""" + message = attr.ib() + committer = attr.ib() + author = attr.ib() + #: Sequence of commit "names" (see `Commits`). + parents = attr.ib() + #: Sequence of `ExpectedDiff` objects. + diffs_against_null_tree = attr.ib() + name_rev = attr.ib(default = None) + + +@attr.s(frozen = True) +class ExpectedDiff(object): + change_type = attr.ib() + a = attr.ib(default = None) + b = attr.ib(default = None) + + +def repo_file(repo, filename, content): + with open(os.path.join(repo.working_dir, filename), "w") as f: + f.write(content) + + +def debug_repos(): + """Shows debug infos about all the repos.""" + for repo_name in os.listdir(REPOS_ROOT): + repo = Repo(path = os.path.join(REPOS_ROOT, repo_name)) + print("======================================================") + print("REPO: {}".format(repo_name)) + print("PATH: {}".format(repo.working_dir)) + for branch in repo.branches: + print("------------------------------------------------------") + print("BRANCH: {}".format(branch.name)) + for commit in repo.iter_commits(branch.name): + stripped_name_rev = commit.name_rev.split()[1] + print("{} ({}) - {}".format(stripped_name_rev, commit.hexsha, + commit.message)) + print(" Author: {} - committer: {}".format(commit.author, + commit.committer)) + print(" Parents: {}".format(commit.parents)) + print(" Diff against null tree:") + for diff in commit.diff(NULL_TREE): + if diff.a_blob: + a = (diff.a_path, diff.a_blob.data_stream.read()) + else: + a = None + if diff.b_blob: + b = (diff.b_path, diff.b_blob.data_stream.read()) + else: + b = None + print(" {} | A: {} - B: {}".format(diff.change_type, a, b)) diff --git a/internal/BUILD b/internal/BUILD new file mode 100644 index 0000000..e0235a4 --- /dev/null +++ b/internal/BUILD @@ -0,0 +1,5 @@ +java_binary( + name = "java_format", + main_class = "com.google.googlejavaformat.java.Main", + runtime_deps = ["@google_java_format"], +) diff --git a/internal/format.bzl b/internal/format.bzl new file mode 100644 index 0000000..c3c9ed3 --- /dev/null +++ b/internal/format.bzl @@ -0,0 +1,83 @@ +# Copyright 2018 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Dependencies for linting/formatting. +""" + +load( + "@bazel_tools//tools/build_defs/repo:http.bzl", + "http_archive", + "http_file", +) +load( + "@bazel_tools//tools/build_defs/repo:java.bzl", + "java_import_external", +) + +def _com_github_google_yapf_repository_impl(rctx): + rctx.download_and_extract( + url = "https://github.com/google/yapf/archive/v0.21.0.tar.gz", + sha256 = + "b930c1bc8233a9944671db7bdd6c9dc9ba2343b08b726a2dd0bff37ce1815baa", + stripPrefix = "yapf-0.21.0") + rctx.file("BUILD", """ +alias( + name="yapf", + actual="//yapf:yapf", + visibility = ["//visibility:public"], +) +""") + rctx.file("yapf/BUILD", """ +py_binary( + name="yapf", + srcs=glob(["**/*.py"]), + main="__main__.py", + visibility = ["//visibility:public"], +)""") + +_com_github_google_yapf_repository = repository_rule( + attrs = {}, + implementation = _com_github_google_yapf_repository_impl, +) + +def format_repositories(): + _com_github_google_yapf_repository(name = "com_github_google_yapf") + + java_import_external( + name = "google_java_format", + licenses = ["notice"], # Apache 2.0 + jar_urls = [ + "https://github.com/google/google-java-format/releases/download/google-java-format-1.5/google-java-format-1.5-all-deps.jar" + ], + jar_sha256 = ("7b839bb7534a173f0ed0cd0e9a583181d20850fcec8cf6e3800e4420a1fad184"), + ) + + http_file( + name = "io_bazel_buildifier_linux", + urls = [ + "https://github.com/bazelbuild/buildtools/releases/download/0.29.0/buildifier" + ], + sha256 = ( + "4c985c883eafdde9c0e8cf3c8595b8bfdf32e77571c369bf8ddae83b042028d6"), + executable = True, + ) + + http_file( + name = "io_bazel_buildifier_darwin", + urls = [ + "https://github.com/bazelbuild/buildtools/releases/download/0.29.0/buildifier.mac" + ], + sha256 = ( + "860378a2badba9517e523e20f152ef1ca16234e0ca462a1d71e5dbee7d506771"), + executable = True, + ) diff --git a/lint.sh b/lint.sh new file mode 100755 index 0000000..e5f4292 --- /dev/null +++ b/lint.sh @@ -0,0 +1,261 @@ +#!/bin/bash +# Copyright 2018 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eou pipefail + +# Usage +# ===== +# +# - to lint/check conformance to style and best practice of all the files in +# the current working directory: "./lint.sh" or "./lint.sh check". +# - to fix what can be fixed automatically: "./lint.sh fix". +# - to skip a step, e.g. Skylark linting: "FMT_SKYLINT=false ./lint.sh check". +# +# +# Linters/formatters featured here +# ================================ +# +# - google-java-format: Java code +# - buildifier: BUILD/WORKSPACE files +# - skylint: Skylark files (*.bzl) - check only +# - yapf: Skylark and Python files +# +# +# An important note concerning trailing commas +# ============================================ +# +# Yapf could generate during fixing this Skylark one-liner: +# ``` +# foo = rule(attrs = {"hello": attr.string()}, implementation = _impl) +# ``` +# from some perfectly normal-looking code: +# ``` +# foo = rule( +# implementation = _impl, +# attrs = { +# "hello": attr.string() +# } +# ) +# ``` +# but this reformatting is not expected to pass validation. What is missing +# is trailing commas, after the last argument to `rule` and after the last +# element of the dictionary. If those are put: +# ``` +# foo = rule( +# implementation = _impl, +# attrs = { +# "hello": attr.string(), +# }, +# ) +# ``` +# then our configuration of Yapf won't touch anything. +# +# +# Implementation details: Why we need Buildifier, Skylint and Yapf +# ================================================================ +# +# Why do we need Buildifier, Skylint and Yapf to validate Bazel/Skylark files? +# Skylark is after all only a dialect of Python! The reasons are as follows: +# +# - Buildifier can fix BUILD/WORKSPACE files but breaks *.bzl files when it attempts +# to fix them. +# +# - Yapf is the only utility used here that can fix *.bzl files because it understands Python +# fully and not only a special subset of it. +# +# - However, Yapf does not enforce certain conventions that people have in *.bzl files, +# related to the fact that they are used to enrich BUILD files. That's where +# Buildifier comes in handy. For instance, Yapf could generate this one-liner: +# ``` +# foo = rule(implementation = _impl, attrs = {"hello": attr.string()}) +# ``` +# but this is an error for Buildifier, as it expects every keyword argument +# to be on their own line as well as the attribute dictionary to be split, and the +# `attrs` argument to come before the `implementation` argument (alphabetical order). +# By running Buildifier after Yapf, we ensure that these conventions are respected. +# Here, to force Yapf to split the arguments and the dictionary, we can add a comma after +# the last argument/element. Moreover, Yapf does not reorder keywords. Overall, +# if we supply this snippet to Yapf: +# ``` +# foo = rule(attrs = {"hello": attr.string(),}, implementation = _impl,) +# ``` +# we'll end up, after fixing, with: +# ``` +# foo = rule( +# attrs = { +# "hello": attr.string(), +# }, +# implementation = _impl, +# ) +# ``` +# which passes the Buildifier validation. +# +# - Buildifier only validates *.bzl files with respect to their likeness to BUILD files. +# To validate the semantic specific to Skylark files and ensure good practices are followed +# (documentation, unused imports, ...), Skylint can be used. Skylint only operates in "check" +# mode, it cannot fix anything on its own. (On an unrelated note, Pylint gives meaningless +# results when applied to Skylark files, so that's why Skylint is used here.) +# +# - Overall, this sauce has been chosen because it gives an automatic formatting and +# linting warnings that feel natural for Skylark. + +BASE="$(pwd)" +MODE="${1:-check}" + +if [ "$MODE" = "check" ]; then + JAVA_OPTIONS=--dry-run + BUILDIFIER_MODE=check + YAPF_OPTIONS=--diff +else + YAPF_OPTIONS=--in-place + JAVA_OPTIONS=--replace + BUILDIFIER_MODE=fix +fi + +BAZEL_BIN=$(bazel info bazel-bin) +BAZEL_OUTPUT_BASE=$(bazel info output_base) + +function build() { + # NOTE: if and when the Skylink target becomes public, use a sh_binary instead + # of building everything here? + bazel build --color=yes --show_progress_rate_limit=30 \ + //internal:java_format \ + @com_github_google_yapf//:yapf \ + @io_bazel_buildifier_linux//file \ + @io_bazel_buildifier_darwin//file +} + +function format_py_like() { + local PATTERN=$1 + local STYLE=$(cat) + local OUTPUT + + OUTPUT=$(find "$BASE" -name "$PATTERN" -exec "$BAZEL_BIN/external/com_github_google_yapf/yapf/yapf" \ + $YAPF_OPTIONS \ + "--style=$STYLE" \ + {} \;) + if [ $? != 0 ]; then + return 1 + fi + if [ "$MODE" = "check" ] && [ ! -z "$OUTPUT" ]; then + echo "$OUTPUT" + return 1 + fi +} + +function format_skylark() { + format_py_like "*.bzl" <<'EOF' +{ + based_on_style: google, + spaces_around_default_or_named_assign: True, + blank_lines_around_top_level_definition: 1, + indent_width: 2, + allow_split_before_dict_value: False, + each_dict_entry_on_separate_line: True, + split_arguments_when_comma_terminated: True, +} +EOF +} + +function format_python() { + format_py_like "*.py" <<'EOF' +{ + based_on_style: google, + spaces_around_default_or_named_assign: True, + blank_lines_around_top_level_definition: 2, + indent_width: 2, + indent_dictionary_value: True +} +EOF +} + +function format_bazel() { + if [ "$(uname)" = "Darwin" ]; then + BUILDIFIER=$BAZEL_OUTPUT_BASE/external/io_bazel_buildifier_darwin/file/downloaded + else + BUILDIFIER=$BAZEL_OUTPUT_BASE/external/io_bazel_buildifier_linux/file/downloaded + fi + + ERRORS=0 + $BUILDIFIER -mode=$BUILDIFIER_MODE $(find "$BASE" -name BUILD -type f) + ERRORS=$((ERRORS+$?)) + $BUILDIFIER -mode=$BUILDIFIER_MODE $(find "$BASE" -name WORKSPACE -type f) + ERRORS=$((ERRORS+$?)) + + # (buildifier cannot format *.bzl files) + if [ "$MODE" = "check" ] && ! $BUILDIFIER -mode=check $(find "$BASE" -name "*.bzl" -type f) >/dev/null; then + echo "*.bzl BUILDIFIER ERRORS:" + for f in $(find "$BASE" -name "*.bzl" -type f); do + OUTPUT=$($BUILDIFIER -mode=diff $f) + if [ ! -z "$OUTPUT" ]; then + echo "$f" + echo "$OUTPUT" + fi + done + # Some errors are false positives. + echo "(buildifier on *.bzl files: not enforced)" + fi + + if [ $ERRORS != 0 ]; then + echo "Errors: $ERRORS" + return 1 + fi +} + +function format_java() { + local OUTPUT + + OUTPUT=$("$BAZEL_BIN/internal/java_format" $JAVA_OPTIONS $(find "$BASE" -name "*.java" -type f)) + if [ "$MODE" = "check" ] && [ ! -z "$OUTPUT" ]; then + echo "$OUTPUT" + return 1 + fi +} + +SUMMARY="" +OVERALL_RESULT=0 + +function record() { + local SECTION_NAME=$1 + local FUNC=$2 + local DO=$3 + local STATUS + + if ! $DO; then + STATUS="Skipped" + elif eval "$FUNC"; then + STATUS="Ok" + else + STATUS="Failure" + OVERALL_RESULT=1 + fi + + SUMMARY+="$SECTION_NAME $STATUS"$'\n' +} + +function summarize() { + echo "============ SUMMARY ============" + echo "$SUMMARY" + return $OVERALL_RESULT +} + +if "${FMT_PREPARE:-true}"; then + build +fi +record skylark format_skylark "${FMT_SKYLARK:-true}" +record python format_python "${FMT_PYTHON:-true}" +record bazel format_bazel "${FMT_BAZEL:-true}" +record java format_java "${FMT_JAVA:-true}" +summarize diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..f45d8f1 --- /dev/null +++ b/renovate.json @@ -0,0 +1,5 @@ +{ + "extends": [ + "config:base" + ] +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bd9b396 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +GitPython==2.1.14 +gitdb==0.6.4 +attrs==19.1.0 +smmap==0.9.0 \ No newline at end of file