Skip to content

Commit

Permalink
Add JVM BUILD generation example (#214)
Browse files Browse the repository at this point in the history
**Problem**
JVM BUILD generation is left to the reader as an exercise.

**Solution**
This implements a working example of BUILD generation for Java,
including the JAR scanning and protobuf-java indexing.
  • Loading branch information
eed3si9n authored Feb 8, 2024
1 parent d519fd7 commit 49ee024
Show file tree
Hide file tree
Showing 11 changed files with 462 additions and 0 deletions.
11 changes: 11 additions & 0 deletions example/MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ module(

bazel_dep(name = "bazel_skylib", version = "1.4.1")
bazel_dep(name = "rules_python", version = "0.24.0")
bazel_dep(name = "rules_jvm_external", version = "4.5")

python = use_extension("@rules_python//python/extensions:python.bzl", "python")
python.toolchain(
python_version = "3.9",
Expand All @@ -18,3 +20,12 @@ pip.parse(
requirements_lock = "//:requirements_lock_3_9.txt",
)
use_repo(pip, "pip", "pip_39")

maven = use_extension(":maven.bzl", "maven")
maven.install(
artifacts = [
"ch.epfl.scala:::scalafix-cli:0.11.0",
"org.slf4j:slf4j-api:1.7.28",
],
)
use_repo(maven, "maven")
5 changes: 5 additions & 0 deletions example/build_tools/bazel_rules/jar_scanner/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
py_binary(
name = "py_build_commands",
srcs = ["py_build_commands.py"],
visibility = ["//visibility:public"],
)
96 changes: 96 additions & 0 deletions example/build_tools/bazel_rules/jar_scanner/py_build_commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import sys
import json
import os
import zipfile

BATCH_SIZE = 256

PRELUDE = """
#!/bin/bash
set -efo pipefail
set +x
trap 'echo ERROR in ${BASH_SOURCE[0]}, failed to run command, line with error: $LINENO' ERR
"""
TEMPLATE = """
echo -n "Running scan of 3rdparty files in batches, working on batch {output_idx}, with {target_count} targets in it"
START_BATCH=$(date +%s)
set +e
bazel build {targets} \
--aspects build_tools/bazel_rules/jar_scanner/rule.bzl%jar_scanner_aspect \
--output_groups=+jar_scanner_out \
--override_repository=external_build_tooling_gen={bzl_gen_build_path} \
--show_result=1000000 2> /tmp/cmd_out
RET=$?
if [ "$RET" != "0" ]; then
cat /tmp/cmd_out
exit $RET
fi
set -e
set +o pipefail
inner_idx=0
for f in `cat $OUTPUT_BASE/command.log |
grep ".*\.json$" |
sed -e 's/^[^ ]*//' |
sed -e 's/^[^A-Za-z0-9/]*//' |
sed 's/^ *//;s/ *$//'`; do
if [ -f "$f" ]; then
cp $f ${{BZL_BUILD_GEN_EXTERNAL_FILES_PATH}}/{output_idx}_${{inner_idx}}_jar_scanner.json
inner_idx=$((inner_idx + 1))
fi
done
set -o pipefail
END_BATCH=$(date +%s)
echo "...complete in $(($END_BATCH-$START_BATCH)) seconds"
"""


def __transform_target(t):
if t.startswith("//external:"):
return "@%s//:jar" % (t.lstrip("//external:"))
else:
return t


def write_command(file, output_idx, command_list, bzl_gen_build_path):
file.write(
TEMPLATE.format(
targets=" ".join([__transform_target(t) for t in command_list]),
output_idx=output_idx,
target_count=len(command_list),
bzl_gen_build_path=bzl_gen_build_path,
)
)
pass


if __name__ == "__main__":
input_file = sys.argv[1]
output_file_path = sys.argv[2]
bzl_gen_build_path = sys.argv[3]
external_targets = []
output_idx = 0
with open(input_file, "r") as file1:
with open(output_file_path, "w") as output_file:
output_file.write(PRELUDE)
for line in file1.readlines():
external_targets.append(line.strip())
if len(external_targets) > BATCH_SIZE:
write_command(
output_file, output_idx, external_targets, bzl_gen_build_path
)
output_idx += 1
external_targets = []
if len(external_targets) > 0:
write_command(
output_file, output_idx, external_targets, bzl_gen_build_path
)
output_idx += 1
91 changes: 91 additions & 0 deletions example/build_tools/bazel_rules/jar_scanner/rule.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# jar_scanner_aspect called by py_build_commands.py is intended for scan
# exactly one JAR file, typically ones exposed by scala_import(...),
# and generates JSON file listing the class names used by bzl-gen-build.

def _jar_scanner_impl(target, ctx):
label = str(target.label)
name = target.label.name
if ((not target.label.workspace_name.startswith("_main~maven~maven")) and
(not label.endswith("proto_java")) and
(not label.endswith("proto_scala"))):
return []

# Make sure the rule has a srcs attribute.
out = ctx.actions.declare_file("%s_jar_scanner.json" % (target.label.name))
files = ctx.rule.files
all_jars = []

# For protobuf-generated targets, we end up with multiple JAR files
# returned by info.files, so here I am manually narrowing down to exactly 1 JAR file.
if hasattr(files, "deps"):
info = target[DefaultInfo]

for jar in info.files.to_list():
if jar.basename.endswith("-src.jar"):
None
elif (jar.basename == "scala-reflect.jar") and (not label.startswith("@@_main~maven~maven//:org_scala_lang__scala_reflect")):
None
elif ("scalapb-runtime" in jar.basename) and (not label.startswith("@@_main~maven~maven//:com_thesamet_scalapb_scalapb_runtime")):
None
else:
all_jars.append(jar)

if len(all_jars) > 1:
# proto targets returns many JARs so we need to pick them up outselves.
if name.endswith("_java"):
name = name[:-5]
elif name.endswith("_scala"):
name = name[:-6]
all_jars0 = [jar for jar in all_jars if name in jar.short_path]

if len(all_jars0) == 0:
all_jars = [all_jars[-1]]
else:
all_jars = [all_jars0[-1]]
elif hasattr(files, "jars"):
# this is a scala_import (it seems.... :|)
for jar in files.jars:
if not jar.basename.endswith("-sources.jar"):
all_jars.append(jar)

if len(all_jars) != 1:
fail("%s (%s) has incorrect jars: %s" % (label, name, all_jars))

short = all_jars[0].short_path
prefix_len = 0
if label.startswith("@jvm"):
len_workspace = len(target.label.workspace_name)
prefix_len = len_workspace + 1 # workspace + /
if short.startswith("../"):
prefix_len = 3 + prefix_len
relative = short[prefix_len:]

args = ctx.actions.args()
args.add("--label")
args.add(label)
args.add("--input-jar")
args.add(all_jars[0])
args.add("--relative-path")
args.add(relative)
args.add("--out")
args.add(out)
ctx.actions.run(
outputs = [out],
inputs = all_jars,
executable = ctx.files._jarscanner_exe[0],
mnemonic = "JarScanner",
arguments = [args],
)
return [OutputGroupInfo(jar_scanner_out = depset([out]))]

jar_scanner_aspect = aspect(
implementation = _jar_scanner_impl,
attr_aspects = [],
attrs = {
"_jarscanner_exe": attr.label(
default = Label("@external_build_tooling_gen//:jarscanner"),
allow_files = True,
cfg = "host",
),
},
)
63 changes: 63 additions & 0 deletions example/build_tools/bazel_rules/maven_dep.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
TARGET_SCALA_SUFFIX="_2_12"

TARGET_FULL_SCALA_SUFFIX="_2_12_18"

def maven_dep(coordinates_string):
coord = _parse_maven_coordinates(coordinates_string)
if coord["full_scala"]:
artifact_id = coord["artifact_id"] + TARGET_FULL_SCALA_SUFFIX
elif coord["is_scala"]:
artifact_id = coord["artifact_id"] + TARGET_SCALA_SUFFIX
else:
artifact_id = coord["artifact_id"]
if "version" in coord:
str = "@maven//:{}_{}_{}".format(coord["group_id"], artifact_id, coord["version"])
else:
str = "@maven//:{}_{}".format(coord["group_id"], artifact_id)
return str.replace(".", "_").replace("-", "_")

def _parse_maven_coordinates(coordinates_string):
"""
Given a string containing a standard Maven coordinate (g:a:[p:[c:]]v),
returns a Maven artifact map (see above).
See also https://github.com/bazelbuild/rules_jvm_external/blob/4.3/specs.bzl
"""
if ":::" in coordinates_string:
idx = coordinates_string.find(":::")
group_id = coordinates_string[:idx]
rest = coordinates_string[idx + 3:]
is_scala = True
full_scala = True
elif "::" in coordinates_string:
idx = coordinates_string.find("::")
group_id = coordinates_string[:idx]
rest = coordinates_string[idx + 2:]
is_scala = True
full_scala = False
elif ":" in coordinates_string:
idx = coordinates_string.find(":")
group_id = coordinates_string[:idx]
rest = coordinates_string[idx + 1:]
is_scala = False
full_scala = False
else:
fail("failed to parse '{}'".format(coordinates_string))
parts = rest.split(":")
artifact_id = parts[0]
if (len(parts)) == 1:
result = dict(group_id=group_id, artifact_id=artifact_id, is_scala=is_scala, full_scala=full_scala)
elif len(parts) == 2:
version = parts[1]
result = dict(group_id=group_id, artifact_id=artifact_id, version=version, is_scala=is_scala, full_scala=full_scala)
elif len(parts) == 3:
packaging = parts[1]
version = parts[2]
result = dict(group_id=group_id, artifact_id=artifact_id, packaging=packaging, version=version, is_scala=is_scala, full_scala=full_scala)
elif len(parts) == 4:
packaging = parts[1]
classifier = parts[2]
version = parts[3]
result = dict(group_id=group_id, artifact_id=artifact_id, packaging=packaging, classifier=classifier, version=version, is_scala=is_scala, full_scala=full_scala)
else:
fail("failed to parse '{}'".format(coordinates_string))
return result
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"includes": [
"build_tools/lang_support/create_lang_build_files/bazel_jvm_modules_java.json"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"configurations": {
"java": {
"file_extensions": [
"java"
],
"build_config": {
"main": {
"headers": [],
"function_name": "java_library"
},
"test": {
"headers": [
{
"load_from": "//build_tools/lang_support/java/test:junit.bzl",
"load_value": "junit_tests"
}
],
"function_name": "junit_tests"
},
"binary_application": {
"headers": [
{
"load_from": "//build_tools/lang_support/java:java_binary.bzl",
"load_value": "java_binary"
}
],
"function_name": "java_binary"
}
},
"main_roots": [
"src/main/java"
],
"test_roots": [
"src/test/java"
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env bash

set -o errexit # abort on nonzero exitstatus
set -o nounset # abort on unbound variable
set -o pipefail # don't hide errors within pipes

if [ -n "${INVOKED_VIA_BAZEL:-}" ]; then
REPO_ROOT="$BUILD_WORKING_DIRECTORY"
else
REPO_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd ../../../ && pwd )"
fi

GEN_FLAVOR=jvm
source "$REPO_ROOT/build_tools/lang_support/create_lang_build_files/bzl_gen_build_common.sh"
set -x

bazel query 'kind(jvm_import, @maven//...)' > /dev/null
cat "$OUTPUT_BASE/command.log" | grep '@maven' > "$TMP_WORKING_STATE/external_targets"

bazel query 'kind("java|scala", com/...)' > /dev/null
cat "$OUTPUT_BASE/command.log" | grep '//' >> "$TMP_WORKING_STATE/external_targets"


cat "$TMP_WORKING_STATE/external_targets"

CACHE_KEY="$(generate_cache_key $TMP_WORKING_STATE/external_targets $REPO_ROOT/WORKSPACE $REPO_ROOT/MODULE.bazel)"
rm -rf $TMP_WORKING_STATE/external_files &> /dev/null || true
# try_fetch_from_remote_cache "remote_jvm_${CACHE_KEY}"

# if [ ! -d $TMP_WORKING_STATE/external_files ]; then
# log "cache wasn't ready or populated"

bazel run build_tools/bazel_rules/jar_scanner:py_build_commands -- \
"$TMP_WORKING_STATE/external_targets" \
"$TMP_WORKING_STATE/external_targets_commands.sh" \
"$BZL_BUILD_GEN_TOOLS_LOCAL_PATH"

cat "${TMP_WORKING_STATE}/external_targets_commands.sh"

chmod +x ${TMP_WORKING_STATE}/external_targets_commands.sh
mkdir -p $TMP_WORKING_STATE/external_files
if [[ -d $TOOLING_WORKING_DIRECTORY ]]; then
BZL_GEN_BUILD_TOOLS_PATH=$TOOLING_WORKING_DIRECTORY ${TMP_WORKING_STATE}/external_targets_commands.sh
else
BZL_GEN_BUILD_TOOLS_PATH=$BZL_BUILD_GEN_TOOLS_LOCAL_PATH ${TMP_WORKING_STATE}/external_targets_commands.sh
fi

# update_remote_cache "remote_jvm_${CACHE_KEY}"
# fi

run_system_apps "build_tools/lang_support/create_lang_build_files/bazel_${GEN_FLAVOR}_modules.json"
Loading

0 comments on commit 49ee024

Please sign in to comment.