-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathflake.nix
81 lines (76 loc) · 1.79 KB
/
flake.nix
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
{
description = "TGI development";
inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "github:danieldk/nixpkgs/outlines-v0.1.4-tgi";
flake-compat.url = "github:edolstra/flake-compat";
};
outputs =
{
self,
flake-compat,
flake-utils,
nixpkgs,
}:
let
config = {
allowUnfree = true;
cudaSupport = true;
cudaCapabilities = [
"7.5"
"8.0"
"8.6"
"8.9"
"9.0"
"9.0a"
];
};
overlay = import ./overlay.nix;
in
flake-utils.lib.eachSystem [ flake-utils.lib.system.x86_64-linux ] (
system:
let
pkgs = import nixpkgs {
inherit config system;
overlays = [ overlay ];
};
in
rec {
formatter = pkgs.nixfmt-rfc-style;
packages = rec {
all = pkgs.symlinkJoin {
name = "all";
paths = pkgs.lib.attrsets.attrValues python3Packages;
};
python3Packages = with pkgs.python3.pkgs; {
inherit
attention-kernels
awq-inference-engine
causal-conv1d
compressed-tensors
eetq
exllamav2
flash-attn
flash-attn-layer-norm
flash-attn-rotary
flash-attn-v1
flashinfer
hf-transfer
mamba-ssm
marlin-kernels
moe-kernels
opentelemetry-instrumentation-grpc
outlines
punica-kernels
torch
;
};
};
}
)
// {
# Cheating a bit to conform to the schema.
lib.config = config;
overlays.default = overlay;
};
}