Skip to content

Commit

Permalink
Merge pull request #1 from actuarialopensource/honest-effort
Browse files Browse the repository at this point in the history
Implement memory optimizations
  • Loading branch information
MatthewCaseres authored Mar 22, 2024
2 parents 1921f6f + 3d6fad4 commit a7f6b70
Show file tree
Hide file tree
Showing 16 changed files with 1,716 additions and 2,137 deletions.
37 changes: 37 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.8-bookworm",
"features": {
"ghcr.io/devcontainers/features/docker-in-docker:2": {},
"ghcr.io/dhoeric/features/act:1": {},
"ghcr.io/hspaans/devcontainer-features/pytest:1": {},
"ghcr.io/meaningful-ooo/devcontainer-features/fish:1": {},
"ghcr.io/stuartleeks/dev-container-features/shell-history:0": {}
},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Use 'postCreateCommand' to pip install the editable
// "postCreateCommand": "pip install -e .[dev]",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"github.vscode-github-actions",
"ms-toolsai.jupyter",
"ms-python.black-formatter",
"tamasfe.even-better-toml"
]
}
},

// Configure tool-specific properties.
// "customizations": {},

// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
12 changes: 12 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for more information:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
# https://containers.dev/guide/dependabot

version: 2
updates:
- package-ecosystem: "devcontainers"
directory: "/"
schedule:
interval: weekly
40 changes: 40 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Python package

on:
workflow_dispatch:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11"]
env:
PYTHON: ${{ matrix.python-version }}

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pytest
python -m pip install -e .[dev]
- name: Test with pytest
run: |
pytest --cov=src tests/
- run: ls
- name: Upload coverage to codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"python.analysis.extraPaths": [
"./src"
]
}
18 changes: 0 additions & 18 deletions Pipfile

This file was deleted.

1,945 changes: 0 additions & 1,945 deletions Pipfile.lock

This file was deleted.

24 changes: 24 additions & 0 deletions developer-setup/environment-notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
## Installation

We set up a devcontainer, so that when people open the repo in GitHub codespaces everything is set up. Can run in local devcontainer as well.

Otherwise, you should run this command
```
pip install -e .[dev]
```

## Running tests

The previous install should install optional dependencies `pytest` and `pytest-cov`

```sh
# test
pytest
# with coverage reporting
pytest --cov=src tests/
```

In the devcontainer we have act installed, allowing us to verify that pytest runs in the CI/CD pipeline as well.
```
act -j build -s "CODECOV_TOKEN=your-codecov-token-abc555-5555"
```
18 changes: 18 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
src-dir = "src"

[project]
name = "heavylight"
Expand All @@ -17,6 +19,22 @@ dependencies = [
"pandas>=1.2",
]

[project.optional-dependencies]
dev = [
"pytest==7.4.3",
"pytest-cov",
"pytest-timeout",
"numpy"
]

[tool.pytest.ini_options]
addopts = [
"--import-mode=importlib",
]
testpaths = [
"tests",
]

classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
Expand Down
1,195 changes: 1,195 additions & 0 deletions scratch.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/heavylight/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .heavylight import Model, Table
from .heavylight import Model
from .cache_graph import CacheGraph
112 changes: 112 additions & 0 deletions src/heavylight/cache_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from typing import Callable
from collections import defaultdict
from dataclasses import dataclass
from functools import wraps
from typing import Any, Callable, Tuple, Union, FrozenSet

@dataclass(frozen=True)
class FunctionCall:
func_name: str
args: tuple
kwargs: FrozenSet[Tuple[str, Any]]

def __repr__(self):
if len(self.kwargs) == 0:
if len(self.args) == 1:
return f"{self.func_name}({self.args[0]})"
return f"{self.func_name}{self.args}"
return f"{self.func_name}({', '.join(map(str, self.args))}, {', '.join(f'{k}={v}' for k, v in self.kwargs)})"

ArgsHash = Tuple[Tuple, frozenset]

class CacheGraph:
def __init__(self):
self.reset()

def reset(self):
self.stack: list[FunctionCall] = [] # what function is currently being called
self.caches: defaultdict[str, dict[ArgsHash, Any]] = defaultdict(dict) # Results of function calls
self.graph: defaultdict[FunctionCall, set[FunctionCall]] = defaultdict(set) # Call graph, graph[caller] = [callee1, callee2, ...]
# Typically aggregated results for a function at a timestep.
self.stored_results: defaultdict[str, dict[int, Any]] = defaultdict(dict)
# What is the last function that needs the result of a function? Used to help in clearing the cache
self.last_needed_by: dict[FunctionCall, FunctionCall] = {}
# can_clear[caller] = [callee1, callee2, ...] means that caller can clear the cache of callee1 and callee2
self.can_clear: dict[FunctionCall, list[FunctionCall]] = defaultdict(list)
self.all_calls: set[FunctionCall] = set()
self.cache_misses: defaultdict[FunctionCall, int] = defaultdict(int)

def check_if_cached(self, function_call: FunctionCall):
name_in_cache = function_call.func_name in self.caches
return name_in_cache and (function_call.args, function_call.kwargs) in self.caches[function_call.func_name]

def optimize(self):
self.can_clear = defaultdict(list)
for callee, caller in self.last_needed_by.items():
self.can_clear[caller].append(callee)
uncleared_calls = self.all_calls - set(self.last_needed_by.keys())
for call in uncleared_calls:
self.can_clear[call].append(call)

def optimize_and_reset(self):
self.optimize()
can_clear = self.can_clear
self.reset()
self.can_clear = can_clear

def __call__(self, storage_func: Union[Callable[[int], Any], None] = None):
def custom_cache_decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
frozen_kwargs = frozenset(kwargs.items())
function_call = FunctionCall(func.__name__, args, frozen_kwargs)
if self.stack:
self.graph[self.stack[-1]].add(function_call)
self.last_needed_by[function_call] = self.stack[-1]
if not self.check_if_cached(function_call):
self.all_calls.add(function_call)
self.cache_misses[function_call] += 1
self.stack.append(function_call)
result = func(*args, **kwargs)
self.caches[func.__name__][(args, frozen_kwargs)] = result
for clearable_call in self.can_clear[function_call]:
del self.caches[clearable_call.func_name][(clearable_call.args, clearable_call.kwargs)]
self.stack.pop()
self._store_result(storage_func, func, args, kwargs, result)
return result
return self.caches[func.__name__][(args, frozen_kwargs)]
decorator = _Cache(self, wrapper)
return decorator
return custom_cache_decorator

def _store_result(self, storage_func: Union[Callable, None], func: Callable, args: tuple, kwargs: dict, raw_result: Any):
"""We might want to store an intermediate result"""
if storage_func is None:
return
# These conditions should not trigger, why we assert and not throw an exception
assert len(args) == 1 and isinstance(args[0], int)
assert len(kwargs) == 0
# store the processed result
timestep = args[0]
stored_result = storage_func(raw_result)
self.stored_results[func.__name__][timestep] = stored_result

def size(self):
return sum(len(cache) for cache in self.caches.values())

class _Cache:
def __init__(self, cache_graph: CacheGraph, func: Callable):
self.cache = cache_graph.caches[func.__name__]
self._func = func

def __setitem__(self, key, value):
if isinstance(key, int):
self.cache[((key,), frozenset())] = value
else:
self.cache[(key, frozenset())] = value

def __repr__(self):
return f"<Cache Function: {self._func.__name__}, Size: {len(self.cache)}>"

def __call__(self, *args: Any, **kwds: Any) -> Any:
return self._func(*args, **kwds)
Loading

0 comments on commit a7f6b70

Please sign in to comment.