From f091707389cd5eba7cff31996d5a60bc94b466fe Mon Sep 17 00:00:00 2001 From: kheffah Date: Sun, 25 Jul 2021 18:59:43 -0500 Subject: [PATCH 1/3] allow only keeping a subset of slides for SlideSet -- this would be useful when, for example, you want to exclude slides used in machine learning model training for cross validation. --- histolab/slide.py | 19 +++++++++++++++---- tests/unit/test_slide.py | 21 +++++++++++++++++---- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/histolab/slide.py b/histolab/slide.py index 7c1311180..68c951e7e 100644 --- a/histolab/slide.py +++ b/histolab/slide.py @@ -510,11 +510,16 @@ class SlideSet: """Slideset object. It is considered a collection of Slides.""" def __init__( - self, slides_path: str, processed_path: str, valid_extensions: list + self, + slides_path: str, + processed_path: str, + valid_extensions: list, + keep_slides: list = None, ) -> None: self._slides_path = slides_path self._processed_path = processed_path self._valid_extensions = valid_extensions + self._keep_slides = keep_slides def __iter__(self) -> Iterator[Slide]: """Slides of the slideset @@ -523,11 +528,17 @@ def __iter__(self) -> Iterator[Slide]: ------- generator of `Slide` objects. """ + slide_names = [ + _name + for _name in os.listdir(self._slides_path) + if (os.path.splitext(_name)[1] in self._valid_extensions) + ] + if self._keep_slides is not None: + slide_names = [_name for _name in slide_names if _name in self._keep_slides] return iter( [ - Slide(os.path.join(self._slides_path, _path), self._processed_path) - for _path in os.listdir(self._slides_path) - if os.path.splitext(_path)[1] in self._valid_extensions + Slide(os.path.join(self._slides_path, _name), self._processed_path) + for _name in slide_names ] ) diff --git a/tests/unit/test_slide.py b/tests/unit/test_slide.py index e7dd8b386..3b34efe69 100644 --- a/tests/unit/test_slide.py +++ b/tests/unit/test_slide.py @@ -453,15 +453,22 @@ def it_constructs_from_args(self, request): _slides_path = "/foo/bar/" _processed_path = "/foo/bar/wsislides/processed" _valid_extensions = [".svs", ".tiff"] + _keep_slides = ["mywsi.svs"] - slideset = SlideSet(_slides_path, _processed_path, _valid_extensions) + slideset = SlideSet( + _slides_path, _processed_path, _valid_extensions, keep_slides=_keep_slides + ) _init_.assert_called_once_with( - ANY, _slides_path, _processed_path, _valid_extensions + ANY, + _slides_path, + _processed_path, + _valid_extensions, + keep_slides=_keep_slides, ) assert isinstance(slideset, SlideSet) - def it_can_constructs_slides(self, request, tmpdir, Slide_): + def it_can_construct_slides(self, request, tmpdir, Slide_): tmp_path_ = tmpdir.mkdir("myslide") slides_ = method_mock(request, SlideSet, "__iter__") slides_.return_value = [Slide_ for _ in range(10)] @@ -475,9 +482,15 @@ def it_can_constructs_slides(self, request, tmpdir, Slide_): def it_knows_its_slides(self, tmpdir): tmp_path_ = tmpdir.mkdir("myslide") image = PILIMG.RGBA_COLOR_500X500_155_249_240 - image.save(os.path.join(tmp_path_, "mywsi.svs"), "TIFF") + image.save(os.path.join(tmp_path_, "mywsi1.svs"), "TIFF") + image.save(os.path.join(tmp_path_, "mywsi2.svs"), "TIFF") slideset = SlideSet(tmp_path_, "proc", [".svs"]) + assert len(slideset) == 2 + + # it can keep a subset of slides + slideset = SlideSet(tmp_path_, "proc", [".svs"], keep_slides=["mywsi1.svs"]) + assert len(slideset) == 1 slideset = SlideSet(None, "proc", [".svs"]) From f8886fcaef486b909551a7861ab70a72f7db8f1f Mon Sep 17 00:00:00 2001 From: Mohamed Amgad Tageldin Date: Mon, 26 Jul 2021 13:09:47 -0500 Subject: [PATCH 2/3] Typing improvements and parameter style changes according to CR Co-authored-by: Alessia Marcolini <98marcolini@gmail.com> --- histolab/slide.py | 4 ++-- tests/unit/test_slide.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/histolab/slide.py b/histolab/slide.py index 68c951e7e..1c3854b6f 100644 --- a/histolab/slide.py +++ b/histolab/slide.py @@ -513,8 +513,8 @@ def __init__( self, slides_path: str, processed_path: str, - valid_extensions: list, - keep_slides: list = None, + valid_extensions: List[str], + keep_slides: List[str] = None, ) -> None: self._slides_path = slides_path self._processed_path = processed_path diff --git a/tests/unit/test_slide.py b/tests/unit/test_slide.py index 3b34efe69..0f4f534b9 100644 --- a/tests/unit/test_slide.py +++ b/tests/unit/test_slide.py @@ -456,7 +456,7 @@ def it_constructs_from_args(self, request): _keep_slides = ["mywsi.svs"] slideset = SlideSet( - _slides_path, _processed_path, _valid_extensions, keep_slides=_keep_slides + _slides_path, _processed_path, _valid_extensions, _keep_slides ) _init_.assert_called_once_with( @@ -464,7 +464,7 @@ def it_constructs_from_args(self, request): _slides_path, _processed_path, _valid_extensions, - keep_slides=_keep_slides, + _keep_slides, ) assert isinstance(slideset, SlideSet) From 3c0da7ddba3df955b81203c4559879f607013a73 Mon Sep 17 00:00:00 2001 From: kheffah Date: Sat, 31 Jul 2021 08:27:28 -0500 Subject: [PATCH 3/3] remove underscore from list comprehension in Slide class --- histolab/slide.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/histolab/slide.py b/histolab/slide.py index 1c3854b6f..88d4ed21c 100644 --- a/histolab/slide.py +++ b/histolab/slide.py @@ -529,16 +529,16 @@ def __iter__(self) -> Iterator[Slide]: generator of `Slide` objects. """ slide_names = [ - _name - for _name in os.listdir(self._slides_path) - if (os.path.splitext(_name)[1] in self._valid_extensions) + name + for name in os.listdir(self._slides_path) + if (os.path.splitext(name)[1] in self._valid_extensions) ] if self._keep_slides is not None: - slide_names = [_name for _name in slide_names if _name in self._keep_slides] + slide_names = [name for name in slide_names if name in self._keep_slides] return iter( [ - Slide(os.path.join(self._slides_path, _name), self._processed_path) - for _name in slide_names + Slide(os.path.join(self._slides_path, name), self._processed_path) + for name in slide_names ] )