"""Define classes and methods for dataset information."""from__future__importannotationsfromabcimportABC,abstractmethodfrompathlibimportPathfromtiatoolboximportrcParamfromtiatoolbox.utilsimportdownload_data,unzip_datafromtiatoolbox.utils.miscimportgrab_files_from_dir
[docs]classDatasetInfoABC(ABC):"""Define an abstract class for holding dataset information. Enforcing such that following attributes must always be defined by the subclass. Property: inputs (list): A list of paths where each path points to a sample image. labels (list): A list of `int` where each is the label of the sample at the same index. label_names (dict): A dict indicates the possible associate name of each label value. """@property@abstractmethoddefinputs(self:DatasetInfoABC)->None:"""A list of paths where each path points to a sample image."""raiseNotImplementedError@property@abstractmethoddeflabels(self:DatasetInfoABC)->None:"""A list of labels where each is the label of the sample at the same index."""raiseNotImplementedError@property@abstractmethoddeflabel_names(self:DatasetInfoABC)->None:"""A dict indicates the possible associate name of each label value."""raiseNotImplementedError
[docs]classKatherPatchDataset(DatasetInfoABC):"""Define a class for holding the Kather dataset information. Args: save_dir_path (str or None): Path to directory containing the Kather dataset. This is assumed to be the same form after the data is initially downloaded. If the argument is `None`, the dataset will be downloaded and extracted into the 'run_dir/download/Kather'. Attributes: inputs (list): A list of paths where each path points to a sample image. labels (list): A list of `int` where each value corresponds to the label of the sample at the same index. label_names (dict): A dict mapping each unique label value to the associated class name as a string. """# We pre-define to follow enforcement, actual initialization in initinputs=Nonelabels=Nonelabel_names=Nonedef__init__(self:KatherPatchDataset,save_dir_path:Path|None=None,)->None:"""Initialize :class:`KatherPatchDataset`."""label_names=["BACK","NORM","DEB","TUM","ADI","MUC","MUS","STR","LYM",]ifsave_dir_pathisNone:# pragma: no coversave_dir_path=rcParam["TIATOOLBOX_HOME"]/"dataset"ifnotPath.exists(save_dir_path/"kather100k-validation"):save_zip_path=save_dir_path/"Kather.zip"url=("https://tiatoolbox.dcs.warwick.ac.uk/datasets""/kather100k-train-nonorm-subset-20k.zip")download_data(url,save_path=save_zip_path)unzip_data(save_zip_path,save_dir_path)save_dir_path=Path(save_dir_path,"kather100k-validation")# bring outside to prevent case where download failsave_dir_path=Path(save_dir_path)ifnotsave_dir_path.exists():msg=f"Dataset does not exist at `{save_dir_path}`"raiseValueError(msg)# What will happen if downloaded data get corrupted?uid_name_map={}all_paths=[]forlabel_id,label_nameinenumerate(label_names):paths=grab_files_from_dir(f"{save_dir_path}/{label_name}/",file_types="*.tif",)paths=[[v,label_id]forvinpaths]paths.sort()all_paths.extend(paths)uid_name_map[label_id]=label_nameinputs,labels=list(zip(*all_paths))self.label_names=uid_name_mapself.inputs=list(inputs)# type casting to listself.labels=list(labels)# type casting to list