Source code for wsireg.utils.shape_utils

import json
import zipfile
from copy import deepcopy
from pathlib import Path

import cv2
import geojson
import numpy as np
import SimpleITK as sitk

from wsireg.reg_transforms.reg_transform import RegTransform
from wsireg.utils.tform_utils import wsireg_transforms_to_itk_composite

GJ_SHAPE_TYPE = {
    "polygon": geojson.Polygon,
    "multipolygon": geojson.MultiPolygon,
    "point": geojson.Point,
    "multipoint": geojson.MultiPoint,
    "multilinestring": geojson.MultiLineString,
    "linestring": geojson.LineString,
}
GJ_SHAPE_TYPE_NAME = {
    "polygon": "Polygon",
    "multipolygon": "MultiPolygon",
    "point": "Point",
    "multipoint": "MultiPoint",
    "multilinestring": "MultiLineString",
    "linestring": "LineString",
}


[docs] def gj_to_np(gj: dict): """ Convert geojson representation to np.ndarray representation of shape Parameters ---------- gj : dict GeoJSON data stored as python dict Returns ------- dict containing keys "array": np.ndarray - x,y point data in array "shape_type": str - indicates GeoJSON shape_type (Polygon, MultiPolygon, etc.) "shape_name": str - name inherited from QuPath GeoJSON """ if gj.get("geometry").get("type") == "MultiPolygon": pts = [] for geo in gj.get("geometry").get("coordinates"): pts.append(np.squeeze(np.array(geo))) pts = np.vstack(pts) elif gj.get("geometry").get("type") == "Polygon": pts = np.squeeze(np.asarray(gj.get("geometry").get("coordinates"))) elif gj.get("geometry").get("type") == "Point": pts = np.expand_dims( np.asarray(gj.get("geometry").get("coordinates")), 0 ) elif gj.get("geometry").get("type") == "MultiPoint": pts = np.asarray(gj.get("geometry").get("coordinates")) elif gj.get("geometry").get("type") == "LineString": pts = np.asarray(gj.get("geometry").get("coordinates")) if gj.get("properties").get("classification") is None: shape_name = "unnamed" else: shape_name = gj.get("properties").get("classification").get("name") if len(pts.shape) == 1: return { "array": np.asarray(pts[0]).astype(np.double), "shape_type": gj.get("geometry").get("type"), "shape_name": shape_name, } else: return { "array": pts.astype(np.double), "shape_type": gj.get("geometry").get("type"), "shape_name": shape_name, }
[docs] def add_unamed(gj): if gj.get("properties").get("classification") is None: gj.get("properties").update({"classification": {"name": "unnamed"}}) return gj
[docs] def read_geojson(json_file: str): """Read GeoJSON files (and some QuPath metadata). Parameters ---------- json_file : str file path of QuPath exported GeoJSON Returns ------- gj_data : dict dict of GeoJSON information shapes_np : dict dict of GeoJSON information stored in np.ndarray "array": np.ndarray - x,y point data in array "shape_type": str - indicates GeoJSON shape_type (Polygon, MultiPolygon, etc.) "shape_name": str - name inherited from QuPath GeoJSON """ if Path(json_file).suffix != ".zip": gj_data = json.load(open(json_file, "r")) else: with zipfile.ZipFile(json_file, "r") as z: for filename in z.namelist(): with z.open(filename) as f: data = f.read() gj_data = json.loads(data.decode("utf-8")) if isinstance(gj_data, dict): gj_data = [gj_data] shapes_np = [gj_to_np(s) for s in gj_data] gj_data = [add_unamed(gj) for gj in gj_data] return gj_data, shapes_np
[docs] def np_to_geojson( np_array: np.ndarray, shape_type="polygon", shape_name="unnamed" ): """convert np.ndarray to GeoJSON dict Parameters ---------- np_array: np.ndarray coordinates of data shape_type:str GeoJSON shape type shape_name:str Name of the shape Returns ------- shape_gj : dict dict of GeoJSON information shape_np : dict dict of GeoJSON information stored in np.ndarray "array": np.ndarray - x,y point data in array "shape_type": str - indicates GeoJSON shape_type (Polygon, MultiPolygon, etc.) "shape_name": str - name inherited from QuPath GeoJSON """ sh_type = shape_type.lower() gj_func = GJ_SHAPE_TYPE[sh_type] if sh_type == "polygon": np_array = np.vstack([np_array, np_array[0, :]]) geometry = gj_func([np_array.tolist()]) elif sh_type in ["multipoint", "linestring"]: geometry = gj_func(np_array.transpose().tolist()) else: geometry = gj_func(np_array.tolist()) shape_gj = { "type": "Feature", "id": "annotation", "geometry": geometry, "properties": { "classification": {"name": shape_name, "colorRGB": -1}, "isLocked": False, }, } shape_np = { "array": np_array, "shape_type": shape_type.lower(), "shape_name": shape_name, } return shape_gj, shape_np
[docs] def shape_reader(shape_data, **kwargs): """ Read shape data for transformation Shape data is stored as numpy arrays for operations but also as GeoJSON to contain metadata and interface with QuPath Parameters ---------- shape_data: list of np.ndarray or str if str, will read data as GeoJSON file, if np.ndarray with assume it is coordinates kwargs keyword args passed to np_to_geojson convert Returns ------- shapes_gj: list of dicts list of dicts of GeoJSON information shapes_np: list of dicts list of dicts of GeoJSON information stored in np.ndarray "array": np.ndarray - x,y point data in array "shape_type": str - indicates GeoJSON shape_type (Polygon, MultiPolygon, etc.) "shape_name": str - name inherited from QuPath GeoJSON """ if isinstance(shape_data, list) is False: shape_data = [shape_data] shapes_gj = [] shapes_np = [] for sh in shape_data: if isinstance(sh, dict): out_shape_gj, out_shape_np = np_to_geojson( sh["array"], sh["shape_type"], sh["shape_name"] ) elif isinstance(sh, np.ndarray): out_shape_gj, out_shape_np = np_to_geojson(sh, **kwargs) else: if Path(sh).is_file(): sh_fp = Path(sh) if sh_fp.suffix in [".json", ".geojson", ".zip"]: out_shape_gj, out_shape_np = read_geojson(str(sh_fp)) # elif sh_fp.suffix == ".cz": # out_shape_gj = read_zen_shapes(str(sh_fp)) # out_shape_np = [gj_to_np(s) for s in out_shape_gj] else: raise ValueError( "{} is not a geojson or numpy array".format(str(sh_fp)) ) else: raise FileNotFoundError( "{} file not found".format(str(Path(sh).as_posix())) ) if isinstance(out_shape_gj, list): shapes_gj.extend(out_shape_gj) else: shapes_gj.append(out_shape_gj) if isinstance(out_shape_np, list): shapes_np.extend(out_shape_np) else: shapes_np.append(out_shape_np) return shapes_gj, shapes_np
[docs] def scale_shape_coordinates(poly: dict, scale_factor: float): """ Scale coordinates by a factor Parameters ---------- poly: dict dict of coordinate data contain np.ndarray in "array" key scale_factor: float isotropic scaling factor for the coordinates Returns ------- poly: dict dict containing coordinates scaled by scale_factor """ poly_coords = poly["array"] poly_coords = poly_coords * scale_factor poly["array"] = poly_coords return poly
[docs] def invert_nonrigid_transforms(itk_transforms: list): """ Check list of sequential ITK transforms for non-linear (i.e., bspline) transforms Transformations need to be inverted to transform from moving to fixed space as transformations are mapped from fixed space to moving. This will first convert any non-linear transforms to a displacement field then invert the displacement field using ITK methods. It usually works quite well but is not an exact solution. Linear transforms can be inverted on the fly when transforming points Parameters ---------- itk_transforms:list list of itk.Transform Returns ------- itk_transforms:list list of itk.Transform where any non-linear transforms are replaced with an inverted displacement field """ tform_linear = [t.is_linear for t in itk_transforms] if all(tform_linear): return itk_transforms else: nl_idxs = np.where(np.array(tform_linear) == 0)[0] for nl_idx in nl_idxs: if not itk_transforms[nl_idx].inverse_transform: print( f"transform at index {nl_idx} is non-linear and the inverse has not been computed\n" "inverting displacement field(s)...\n" "this can take some time" ) itk_transforms[nl_idx].compute_inverse_nonlinear() return itk_transforms
[docs] def prepare_pt_transformation_data(transformations, compute_inverse=True): """ Read and prepare wsireg transformation data for point set transformation Parameters ---------- transformations list of dict containing elastix transformation data or str to wsireg .json file containing elastix transformation data compute_inverse : bool whether or not to compute the inverse transformation for moving to fixed point transformations Returns ------- itk_pt_transforms:list list of transformation data ready to operate on points target_res: physical spacing of the final transformation in the transform sequence This is needed to map coordinates defined as pixel indices to physical coordinates and then back """ if all([isinstance(t, RegTransform) for t in transformations]) is False: _, transformations = wsireg_transforms_to_itk_composite( transformations ) if compute_inverse: transformations = invert_nonrigid_transforms(transformations) target_res = float(transformations[-1].output_spacing[0]) return transformations, target_res
[docs] def itk_transform_pts( pt_data: np.ndarray, itk_transforms: list, px_idx=True, source_res=1, output_idx=True, target_res=2, ): """ Transforms x,y points stored in np.ndarray using list of ITK transforms All transforms are in physical coordinates, so all points must be converted to physical coordinates before transformation, but this function allows converting back to pixel indices after transformation Can intake points in physical coordinates is px_idx == False Can output points in physical coordinates if output_idx == False Parameters ---------- pt_data : np.ndarray array where rows are points and columns are x,y itk_transforms: list list of ITK transforms, non-linear transforms should be inverted px_idx: bool whether points are specified in physical coordinates (i.e., microns) or in pixel indices source_res: float resolution of the image on which annotations were made output_idx: bool whether transformed points should be output in physical coordinates (i.e., microns) or in pixel indices target_res: float resolution of the final target image for conversion back to pixel indices Returns ------- tformed_pts:np.ndarray transformed points array where rows are points and columns are x,y """ tformed_pts = [] for pt in pt_data: if px_idx is True: pt = pt * source_res for idx, t in enumerate(itk_transforms): if idx == 0: t_pt = t.inverse_transform.TransformPoint(pt) else: t_pt = t.inverse_transform.TransformPoint(t_pt) t_pt = np.array(t_pt) if output_idx is True: t_pt *= 1 / target_res tformed_pts.append(t_pt) return np.stack(tformed_pts)
[docs] def transform_shapes( shape_data: list, itk_transforms: list, px_idx=True, source_res=1, output_idx=True, target_res=2, ): """ Convenience function to apply itk_transform_pts to a list of shape data Parameters ---------- shape_data: list of arrays where rows are points and columns are x,y itk_transforms: list list of ITK transforms, non-linear transforms should be inverted px_idx: bool whether points are specified in physical coordinates (i.e., microns) or in pixel indices source_res: float resolution of the image on which annotations were made output_idx: bool whether transformed points should be output in physical coordinates (i.e., microns) or in pixel indices target_res: float resolution of the final target image for conversion back to pixel indices Returns ------- transformed_shape_data:list list of transformed np.ndarray data where rows are points and columns are x,y """ transformed_shape_data = [] for sh in shape_data: t_ptset = deepcopy(sh) ptset = sh.get("array") t_pts = itk_transform_pts( ptset, itk_transforms, px_idx=px_idx, source_res=source_res, output_idx=output_idx, target_res=target_res, ) t_ptset["array"] = t_pts transformed_shape_data.append(t_ptset) return transformed_shape_data
[docs] def insert_transformed_pts_gj(gj_data: list, np_data: list): """ insert point data into a list of geojson data Parameters ---------- shape_gj : dict dict of GeoJSON information shape_np : dict transformed point data in wsireg shape dict Returns ------- shape_gj : dict dict of GeoJSON information with updated coordinate information """ gj_data_t = deepcopy(gj_data) for sh, gj in zip(np_data, gj_data_t): shape_type = gj.get("geometry").get("type") if shape_type == "Polygon": gj.get("geometry").update({"coordinates": [sh["array"].tolist()]}) elif shape_type == "Point": gj.get("geometry").update( {"coordinates": np.squeeze(sh["array"]).tolist()} ) elif shape_type == "MultiPoint": gj.get("geometry").update({"coordinates": sh["array"].tolist()}) elif shape_type == "LineString": gj.get("geometry").update({"coordinates": sh["array"].tolist()}) return gj_data_t
[docs] def get_int_dtype(value: int): """ Determine appropriate bit precision for indexed image Parameters ---------- value:int number of shapes Returns ------- dtype:np.dtype apppropriate data type for index mask """ if value <= np.iinfo(np.uint8).max: return np.uint8 if value <= np.iinfo(np.uint16).max: return np.uint16 if value <= np.iinfo(np.uint32).max: return np.int32 else: raise ValueError("Too many shapes")
[docs] def get_all_shape_coords(shapes: list): return np.vstack( [np.squeeze(sh["geometry"]["coordinates"][0]) for sh in shapes] )
# code below is for managing transforms as masks rather than point sets # will probably not reimplement, if segmentation data can is expressed # as a mask, it can be transformed as an image (using attachment_modality)
[docs] def approx_polygon_contour(mask: np.ndarray, percent_arc_length=0.01): """ Approximate binary mask contours to polygon vertices using cv2. Parameters ---------- mask : numpy.ndarray 2-d numpy array of datatype np.uint8. percent_arc_length : float scaling of epsilon for polygon approximate vertices accuracy. maximum distance of new vertices from original. Returns ------- numpy.ndarray returns an 2d array of vertices, rows: points, columns: y,x """ contours, hierarchy = cv2.findContours( mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE ) if len(contours) > 1: contours = [contours[np.argmax([cnt.shape[0] for cnt in contours])]] epsilon = percent_arc_length * cv2.arcLength(contours[0], True) approx = cv2.approxPolyDP(contours[0], epsilon, True) return np.squeeze(approx).astype(np.uint32)
[docs] def index_mask_to_shapes(index_mask, shape_name, tf_shapes): """ Find the polygons of a transformed shape mask, conveting binary mask to list of polygon verteces and sorting by numerical index Parameters ---------- index_mask:np.ndarray mask where each shape is defined by it's index shape_name:str name of the shape tf_shapes:list original list of shape GeoJSON data to be updated Returns ------- updated_shapes:list dict of GeoJSON information with updated coordinate information """ labstats = sitk.LabelShapeStatisticsImageFilter() labstats.SetBackgroundValue(0) labstats.Execute(index_mask) index_mask = sitk.GetArrayFromImage(index_mask) updated_shapes = deepcopy(tf_shapes) for idx, shape in enumerate(tf_shapes): if shape["properties"]["classification"]["name"] == shape_name: label_bb = labstats.GetBoundingBox(idx + 1) x_min = label_bb[0] x_len = label_bb[2] y_min = label_bb[1] y_len = label_bb[3] sub_mask = index_mask[y_min : y_min + y_len, x_min : x_min + x_len] sub_mask[sub_mask == idx + 1] = 255 yx_coords = approx_polygon_contour(sub_mask, 0.00001) xy_coords = yx_coords xy_coords = np.append(xy_coords, xy_coords[:1, :], axis=0) xy_coords = xy_coords + [x_min, y_min] updated_shapes[idx]["geometry"]["coordinates"] = [ xy_coords.tolist() ] return updated_shapes
# don't intend to maintain # def read_zen_shapes(zen_fp): # """Read Zeiss Zen Blue .cz ROIs files to wsimap shapely format. # # Parameters # ---------- # zen_fp : str # file path of Zen .cz. # # Returns # ------- # list # list of wsimap shapely rois # # """ # # root = etree.parse(zen_fp) # # rois = root.xpath("//Elements")[0] # shapes_out = [] # for shape in rois: # try: # ptset_name = shape.find("Attributes/Name").text # except AttributeError: # ptset_name = "unnamed" # # if shape.tag == "Polygon": # ptset_cz = shape.find("Geometry/Points") # # ptset_type = "Polygon" # # poly_str = ptset_cz.text # poly_str = poly_str.split(" ") # poly_str = [poly.split(",") for poly in poly_str] # poly_str = [[float(pt[0]), float(pt[1])] for pt in poly_str] # # poly = { # "geometry": geojson.Polygon(poly_str), # "properties": {"classification": {"name": ptset_name}}, # } # # shapes_out.append(poly) # # if shape.tag == "Rectangle": # rect_pts = shape.find("Geometry") # # x = float(rect_pts.findtext("Left")) # y = float(rect_pts.findtext("Top")) # width = float(rect_pts.findtext("Width")) # height = float(rect_pts.findtext("Height")) # # rect = geojson.Polygon( # [ # [x, y], # [x + width, y], # [x + width, y + height], # [x, y + height], # [x, y], # ] # ) # # rect = { # "geometry": rect, # "properties": {"classification": {"name": ptset_name}}, # } # # shapes_out.append(rect) # # return shapes_out