Skip to content

File tree

1 file changed

+53
-13
lines changed
  • pkgs/development/python-modules/transformers

1 file changed

+53
-13
lines changed

pkgs/development/python-modules/transformers/default.nix

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,76 @@
11
{ buildPythonPackage
22
, stdenv
33
, fetchFromGitHub
4-
, sacremoses
5-
, requests
6-
, sentencepiece
74
, boto3
8-
, tqdm
5+
, filelock
96
, regex
7+
, requests
108
, numpy
11-
, pytest
9+
, sacremoses
10+
, sentencepiece
11+
, timeout-decorator
12+
, tokenizers
13+
, tqdm
14+
, pytestCheckHook
1215
}:
1316

1417
buildPythonPackage rec {
1518
pname = "transformers";
16-
version = "2.2.1";
19+
version = "3.0.1";
1720

1821
src = fetchFromGitHub {
1922
owner = "huggingface";
2023
repo = pname;
2124
rev = "v${version}";
22-
sha256 = "1p8p3lhhiyk1xl9gpgq4vbchyz57v3w7hhvsj1r90zs3cckindl8";
25+
sha256 = "1l8l82zi021sq5dnzlbjx3wx0n4yy7k96n3m2fr893y9lfkhhd8z";
2326
};
2427

25-
propagatedBuildInputs = [ numpy sacremoses requests sentencepiece boto3 tqdm regex ];
28+
propagatedBuildInputs = [
29+
boto3
30+
filelock
31+
numpy
32+
regex
33+
requests
34+
sacremoses
35+
sentencepiece
36+
tokenizers
37+
tqdm
38+
];
39+
40+
checkInputs = [
41+
pytestCheckHook
42+
timeout-decorator
43+
];
2644

27-
checkInputs = [ pytest ];
28-
# pretrained tries to download from s3
29-
checkPhase = ''
30-
cd transformers # avoid importing local files
31-
HOME=$TMPDIR pytest -k 'not pretrained_tokenizers'
45+
postPatch = ''
46+
substituteInPlace setup.py \
47+
--replace "tokenizers == 0.8.0-rc4" "tokenizers>=0.8,<0.9"
3248
'';
3349

50+
preCheck = ''
51+
export HOME="$TMPDIR"
52+
cd tests
53+
'';
54+
55+
# Disable tests that require network access.
56+
disabledTests = [
57+
"test_all_tokenizers"
58+
"test_batch_encoding_is_fast"
59+
"test_batch_encoding_pickle"
60+
"test_config_from_model_shortcut"
61+
"test_config_model_type_from_model_identifier"
62+
"test_from_pretrained_use_fast_toggle"
63+
"test_hf_api"
64+
"test_outputs_can_be_shorter"
65+
"test_outputs_not_longer_than_maxlen"
66+
"test_pretokenized_tokenizers"
67+
"test_tokenizer_equivalence_en_de"
68+
"test_tokenizer_from_model_type"
69+
"test_tokenizer_from_model_type"
70+
"test_tokenizer_from_pretrained"
71+
"test_tokenizer_identifier_with_correct_config"
72+
];
73+
3474
meta = with stdenv.lib; {
3575
homepage = "https://github.com/huggingface/transformers";
3676
description = "State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch";

0 commit comments

Comments
 (0)