Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce ocrd/core-cuda #1041

Merged
merged 19 commits into from
Jun 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND noninteractive
ENV PYTHONIOENCODING utf8
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
ENV PIP=pip3
ENV PIP=pip

WORKDIR /build-ocrd
COPY ocrd ./ocrd
Expand All @@ -24,7 +24,6 @@ RUN apt-get update && apt-get -y install software-properties-common \
&& apt-get update && apt-get -y install \
ca-certificates \
python3-dev \
python3-pip \
python3-venv \
gcc \
make \
Expand All @@ -34,11 +33,11 @@ RUN apt-get update && apt-get -y install software-properties-common \
sudo \
git \
&& make deps-ubuntu \
&& pip3 install --upgrade pip setuptools \
&& python3 -m venv /usr/local \
&& hash -r \
kba marked this conversation as resolved.
Show resolved Hide resolved
&& pip install --upgrade pip setuptools \
&& make install \
&& apt-get remove -y gcc \
&& apt-get autoremove -y \
&& $FIXUP \
&& eval $FIXUP \
&& rm -rf /build-ocrd

WORKDIR /data
Expand Down
61 changes: 61 additions & 0 deletions Dockerfile.cuda
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
ARG BASE_IMAGE
FROM $BASE_IMAGE

ENV MAMBA_EXE=/usr/local/bin/conda
ENV MAMBA_ROOT_PREFIX=/conda
ENV PATH=$MAMBA_ROOT_PREFIX/bin:$PATH
ENV CONDA_PREFIX=$MAMBA_ROOT_PREFIX
ENV CONDA_SHLVL='1'

WORKDIR $MAMBA_ROOT_PREFIX

RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
RUN mv bin/micromamba $MAMBA_EXE
RUN hash -r
RUN mkdir -p $CONDA_PREFIX/lib $CONDA_PREFIX/include
RUN echo $CONDA_PREFIX/lib >> /etc/ld.so.conf.d/conda.conf
# Get CUDA toolkit, including compiler and libraries with dev.
# The nvidia channels do not provide (recent) cudnn (needed for Torch, TF etc):
#RUN conda install -c nvidia/label/cuda-11.8.0 cuda && conda clean -a
# The conda-forge channel has cudnn but no cudatoolkit-dev anymore,
# so let's combine nvidia and conda-forge (will be same lib versions, no waste of space):
RUN conda install -c nvidia/label/cuda-11.8.0 \
cuda-nvcc \
cuda-cccl \
&& conda clean -a \
&& find $CONDA_PREFIX -name "*_static.a" -delete
# cuda-cudart-dev \
# cuda-libraries-dev \
#RUN conda install -c conda-forge \
# cudatoolkit=11.8.0 \
# cudnn=8.8.* && \
# conda clean -a && \
# find $CONDA_PREFIX -name "*_static.a" -delete
kba marked this conversation as resolved.
Show resolved Hide resolved
# Since Torch will pull in the CUDA libraries (as Python pkgs) anyway,
# let's jump the shark and pull these via NGC index directly,
# but then share them with the rest of the system so native compilation/linking
# works, too:
RUN pip3 install nvidia-pyindex \
&& pip3 install nvidia-cudnn-cu11==8.6.0.163 \
nvidia-cublas-cu11 \
nvidia-cusparse-cu11 \
nvidia-cusolver-cu11 \
nvidia-curand-cu11 \
nvidia-cufft-cu11 \
nvidia-cuda-runtime-cu11 \
nvidia-cuda-nvrtc-cu11 \
&& for pkg in cudnn cublas cusparse cusolver curand cufft cuda_runtime cuda_nvrtc; do \
for lib in /usr/local/lib/python3.8/site-packages/nvidia/$pkg/lib/lib*.so.*; do \
base=$(basename $lib); \
ln -s $lib $CONDA_PREFIX/lib/$base.so; \
ln -s $lib $CONDA_PREFIX/lib/${base%.so.*}.so; \
done \
&& ln -s /usr/local/lib/python3.8/site-packages/nvidia/$pkg/include/* $CONDA_PREFIX/include/; \
done \
&& ldconfig
# gputil/nvidia-smi would be nice, too – but that drags in Python as a conda dependency...

WORKDIR /data

CMD ["/usr/local/bin/ocrd", "--help"]

61 changes: 17 additions & 44 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,18 @@ help:
@echo " docs-clean Clean docs"
@echo " docs-coverage Calculate docstring coverage"
@echo " docker Build docker image"
@echo " docker-cuda Build docker GPU / CUDA image"
@echo " cuda-ubuntu Install native CUDA toolkit in different versions"
@echo " docker-cuda Build docker image for GPU / CUDA"
@echo " pypi Build wheels and source dist and twine upload them"
@echo ""
@echo " Variables"
@echo ""
@echo " DOCKER_TAG Docker tag. Default: '$(DOCKER_TAG)'."
@echo " DOCKER_BASE_IMAGE Docker base image. Default: '$(DOCKER_BASE_IMAGE)'."
@echo " DOCKER_TAG Docker target image tag. Default: '$(DOCKER_TAG)'."
@echo " DOCKER_BASE_IMAGE Docker source image tag. Default: '$(DOCKER_BASE_IMAGE)'."
@echo " DOCKER_ARGS Additional arguments to docker build. Default: '$(DOCKER_ARGS)'"
@echo " PIP_INSTALL pip install command. Default: $(PIP_INSTALL)"

# END-EVAL

# Docker tag. Default: '$(DOCKER_TAG)'.
DOCKER_TAG = ocrd/core

# Docker base image. Default: '$(DOCKER_BASE_IMAGE)'.
DOCKER_BASE_IMAGE = ubuntu:20.04

# Additional arguments to docker build. Default: '$(DOCKER_ARGS)'
DOCKER_ARGS =

# pip install command. Default: $(PIP_INSTALL)
PIP_INSTALL = $(PIP) install

Expand All @@ -68,12 +58,13 @@ deps-test:

# (Re)install the tool
install:
$(PIP) install -U pip wheel setuptools fastentrypoints
$(PIP) install -U pip wheel setuptools
@# speedup for end-of-life builds
@# we cannot use pip config here due to pip#11988
if $(PYTHON) -V | fgrep -e 3.5 -e 3.6; then $(PIP) install --prefer-binary opencv-python-headless numpy; fi
for mod in $(BUILD_ORDER);do (cd $$mod ; $(PIP_INSTALL) .);done
@# workaround for shapely#1598
$(PIP) install --no-binary shapely --force-reinstall shapely
kba marked this conversation as resolved.
Show resolved Hide resolved
$(PIP) config set global.no-binary shapely

# Install with pip install -e
install-dev: uninstall
Expand Down Expand Up @@ -214,40 +205,22 @@ pyclean:

.PHONY: docker docker-cuda

# Additional arguments to docker build. Default: '$(DOCKER_ARGS)'
DOCKER_ARGS =

# Build docker image
docker docker-cuda:
docker build -t $(DOCKER_TAG) --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) .
docker: DOCKER_BASE_IMAGE = ubuntu:20.04
docker: DOCKER_TAG = ocrd/core
docker: DOCKER_FILE = Dockerfile

# Build docker GPU / CUDA image
docker-cuda: DOCKER_BASE_IMAGE = nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04
docker-cuda: DOCKER_BASE_IMAGE = ocrd/core
docker-cuda: DOCKER_TAG = ocrd/core-cuda
docker-cuda: DOCKER_ARGS += --build-arg FIXUP="make cuda-ubuntu cuda-ldconfig"
docker-cuda: DOCKER_FILE = Dockerfile.cuda

#
# CUDA
#
docker-cuda: docker

.PHONY: cuda-ubuntu cuda-ldconfig

# Install native CUDA toolkit in different versions
cuda-ubuntu: cuda-ldconfig
apt-get -y install --no-install-recommends cuda-runtime-11-0 cuda-runtime-11-1 cuda-runtime-11-3 cuda-runtime-11-7 cuda-runtime-12-1

cuda-ldconfig: /etc/ld.so.conf.d/cuda.conf
ldconfig

/etc/ld.so.conf.d/cuda.conf:
@echo > $@
@echo /usr/local/cuda-11.0/lib64 >> $@
@echo /usr/local/cuda-11.0/targets/x86_64-linux/lib >> $@
@echo /usr/local/cuda-11.1/lib64 >> $@
@echo /usr/local/cuda-11.1/targets/x86_64-linux/lib >> $@
@echo /usr/local/cuda-11.3/lib64 >> $@
@echo /usr/local/cuda-11.3/targets/x86_64-linux/lib >> $@
@echo /usr/local/cuda-11.7/lib64 >> $@
@echo /usr/local/cuda-11.7/targets/x86_64-linux/lib >> $@
@echo /usr/local/cuda-12.1/lib64 >> $@
@echo /usr/local/cuda-12.1/targets/x86_64-linux/lib >> $@
kba marked this conversation as resolved.
Show resolved Hide resolved
docker docker-cuda:
docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) .

# Build wheels and source dist and twine upload them
pypi: uninstall install
Expand Down
2 changes: 1 addition & 1 deletion ocrd/ocrd/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Constants for ocrd.
"""
from ocrd_utils.package_resources import resource_filename
from pkg_resources import resource_filename
kba marked this conversation as resolved.
Show resolved Hide resolved

__all__ = [
'TMP_PREFIX',
Expand Down
4 changes: 2 additions & 2 deletions ocrd/ocrd/processor/builtin/dummy_processor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# pylint: disable=missing-module-docstring,invalid-name
from os.path import join, basename
from ocrd_utils.package_resources import resource_string
from pkg_resources import resource_string

import click

Expand All @@ -17,7 +17,7 @@
)
from ocrd_modelfactory import page_from_file

OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))
OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'dummy/ocrd-tool.json').decode('utf8'))

class DummyProcessor(Processor):
"""
Expand Down
16 changes: 0 additions & 16 deletions ocrd/ocrd/resource_list.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,3 @@ ocrd-sbb-binarize:
type: archive
path_in_archive: models
size: 1654623597
ocrd-sbb-textline-detector:
- url: https://qurator-data.de/sbb_textline_detector/models.tar.gz
description: default models provided by github.com/qurator-spk
name: default
type: archive
size: 1194551551
ocrd-kraken-segment:
- url: https://github.com/mittagessen/kraken/raw/master/kraken/blla.mlmodel
description: Pretrained baseline segmentation model
name: blla.mlmodel
size: 5046835
ocrd-kraken-recognize:
- url: https://zenodo.org/record/2577813/files/en_best.mlmodel?download=1
name: en_best.mlmodel
description: This model has been trained on a large corpus of modern printed English text\naugmented with ~10000 lines of historical pages
size: 2930723
2 changes: 1 addition & 1 deletion ocrd/ocrd/workspace_bagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import sys
from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module
from distutils.dir_util import copy_tree
from pkg_resources import get_distribution

from ocrd_utils import (
pushd_popd,
Expand All @@ -22,7 +23,6 @@
from ocrd_validators.constants import BAGIT_TXT, TMP_BAGIT_PREFIX, OCRD_BAGIT_PROFILE_URL
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import to_xml
from ocrd_utils.package_resources import get_distribution

from .workspace import Workspace

Expand Down
1 change: 0 additions & 1 deletion ocrd/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
import fastentrypoints
from setuptools import setup, find_packages
from ocrd_utils import VERSION

Expand Down
2 changes: 1 addition & 1 deletion ocrd_models/ocrd_models/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Constants for ocrd_models.
"""
from ocrd_utils.package_resources import resource_string
from pkg_resources import resource_string
import re

__all__ = [
Expand Down
3 changes: 1 addition & 2 deletions ocrd_utils/ocrd_utils/constants.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""
Constants for ocrd_utils.
"""
from pkg_resources import get_distribution
from re import compile as regex_compile
from os import environ
from os.path import join, expanduser

from ocrd_utils.package_resources import get_distribution

__all__ = [
'EXT_TO_MIME',
'LOG_FORMAT',
Expand Down
49 changes: 0 additions & 49 deletions ocrd_utils/ocrd_utils/package_resources.py

This file was deleted.

2 changes: 0 additions & 2 deletions ocrd_utils/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@ Pillow >= 7.2.0
# tensorflow versions might require different versions
numpy
atomicwrites >= 1.3.0
importlib_metadata;python_version<'3.8'
importlib_resources;python_version<'3.9'
frozendict>=2.3.4
2 changes: 1 addition & 1 deletion ocrd_validators/ocrd_validators/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Constants for ocrd_validators.
"""
import yaml
from ocrd_utils.package_resources import resource_string, resource_filename
from pkg_resources import resource_string, resource_filename

__all__ = [
'PROCESSING_SERVER_CONFIG_SCHEMA',
Expand Down