sci-libs/tokenizers: enable test

Signed-off-by: Alfredo Tupone <tupone@gentoo.org>
author: Alfredo Tupone <tupone@gentoo.org> 2024-04-07 18:50:04 +0200
committer: Alfredo Tupone <tupone@gentoo.org> 2024-04-14 15:03:26 +0200
commit: 99e70c7721b929f98c3da0451fffcdccd834d996 (patch)
tree: ab0d93bf1043bcac3961d2804f1fddf0e5c6a1cc /sci-libs/tokenizers
parent: sys-apps/cpuid: switch to app-alternatives/gzip (diff)
download: gentoo-99e70c7721b929f98c3da0451fffcdccd834d996.tar.gz
gentoo-99e70c7721b929f98c3da0451fffcdccd834d996.tar.bz2
gentoo-99e70c7721b929f98c3da0451fffcdccd834d996.zip
2 files changed, 49 insertions, 5 deletions
diff --git a/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch
new file mode 100644
index 000000000000..01a872cb846a
--- /dev/null
+++ b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch
@@ -0,0 +1,39 @@
+--- a/tests/bindings/test_trainers.py	2024-04-07 18:21:19.443506351 +0200
++++ b/tests/bindings/test_trainers.py	2024-04-07 18:21:54.893466083 +0200
+@@ -295,8 +295,8 @@
+         tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
+             [pre_tokenizers.Whitespace(), pre_tokenizers.Digits(individual_digits=True)]
+         )
+-        tokenizer.train(files=["data/big.txt"], trainer=trainer)
++        tokenizer.train(files=["tests/data/big.txt"], trainer=trainer)
+ 
+-        tokenizer.save("data/tokenizer.json")
++        tokenizer.save("tests/data/tokenizer.json")
+ 
+-        tokenizer.from_file("data/tokenizer.json")
++        tokenizer.from_file("tests/data/tokenizer.json")
+--- a/tests/documentation/test_tutorial_train_from_iterators.py	2024-04-07 18:19:08.653593406 +0200
++++ b/tests/documentation/test_tutorial_train_from_iterators.py	2024-04-07 18:19:39.206906910 +0200
+@@ -40,7 +40,7 @@
+     def setup_gzip_files(self, train_files):
+         with open(train_files["small"], "rt") as small:
+             for n in range(3):
+-                path = f"data/my-file.{n}.gz"
++                path = f"tests/data/my-file.{n}.gz"
+                 with gzip.open(path, "wt") as f:
+                     f.write(small.read())
+ 
+@@ -87,11 +87,11 @@
+         # START single_gzip
+         import gzip
+ 
+-        with gzip.open("data/my-file.0.gz", "rt") as f:
++        with gzip.open("tests/data/my-file.0.gz", "rt") as f:
+             tokenizer.train_from_iterator(f, trainer=trainer)
+         # END single_gzip
+         # START multi_gzip
+-        files = ["data/my-file.0.gz", "data/my-file.1.gz", "data/my-file.2.gz"]
++        files = ["tests/data/my-file.0.gz", "tests/data/my-file.1.gz", "tests/data/my-file.2.gz"]
+ 
+         def gzip_iterator():
+             for path in files:
diff --git a/sci-libs/tokenizers/tokenizers-0.15.2.ebuild b/sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild
index d2da8b88ac9b..ed6b224ac702 100644
--- a/sci-libs/tokenizers/tokenizers-0.15.2.ebuild
+++ b/sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild
@@ -6,8 +6,9 @@
 EAPI=8
 
 DISTUTILS_USE_PEP517=maturin
-PYTHON_COMPAT=( python3_{9..12} )
+PYTHON_COMPAT=( python3_{10..12} )
 DISTUTILS_EXT=1
+DISTUTILS_SINGLE_IMPL=1
 
 CRATES="
 	adler@1.0.2
@@ -290,9 +291,13 @@ LICENSE+="
 "
 SLOT="0"
 KEYWORDS="~amd64"
-RESTRICT="test"
 
-BDEPEND="dev-python/setuptools-rust[${PYTHON_USEDEP}]"
+BDEPEND="
+	test? ( sci-libs/datasets[${PYTHON_SINGLE_USEDEP}] )
+	$(python_gen_cond_dep '
+		dev-python/setuptools-rust[${PYTHON_USEDEP}]
+	')
+"
 
 distutils_enable_tests pytest
 
@@ -305,6 +310,7 @@ src_unpack() {
 src_prepare() {
 	default
 	cd bindings/python
+	eapply "${FILESDIR}"/${P}-test.patch
 	distutils-r1_src_prepare
 }
 
@@ -327,8 +333,7 @@ src_test() {
 	# Tests do not work
 	#cargo_src_test
 	cd ../bindings/python
-	# Need dataset module
-	#distutils-r1_src_test
+	distutils-r1_src_test
 }
 
 src_install() {
author	Alfredo Tupone <tupone@gentoo.org>	2024-04-07 18:50:04 +0200
committer	Alfredo Tupone <tupone@gentoo.org>	2024-04-14 15:03:26 +0200
commit	99e70c7721b929f98c3da0451fffcdccd834d996 (patch)
tree	ab0d93bf1043bcac3961d2804f1fddf0e5c6a1cc /sci-libs/tokenizers
parent	sys-apps/cpuid: switch to app-alternatives/gzip (diff)
download	gentoo-99e70c7721b929f98c3da0451fffcdccd834d996.tar.gz gentoo-99e70c7721b929f98c3da0451fffcdccd834d996.tar.bz2 gentoo-99e70c7721b929f98c3da0451fffcdccd834d996.zip