chlamy-comparison/code/python/morphology_2d/segment_chlamy.py at main · Arcadia-Science/chlamy-comparison · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""
The code is a Python script designed for image segmentation
and analysis of cells in a set of TIFF images.
It uses various scientific libraries like NumPy, SciPy,
scikit-image, and pandas. The `segment_cells` function reads
a "probability map" image, performs thresholding,
and labels individual cell regions, filtering them based on
area constraints defined by min_diameter and max_diameter parameters.
The `save_measurements_to_csv` function takes these labeled regions and
 calculates properties like area, perimeter, and centroids,
 saving them to a CSV file. Metadata from the file paths,
 like experiment details and species, are also extracted and saved.
 The `process_directory` function walks through a given root directory,
 looking for relevant TIFF images and applying the above-mentioned functions.
 The script runs the `process_directory` function for a specified base directory,
 effectively enabling batch processing of cell images for analysis.
"""

# Import required libraries
import numpy as np  # For numerical operations like array manipulations
import os  # For operating system-dependent functionality like reading or writing to the file system
import re  # For regular expression operations
from scipy.ndimage import label, find_objects, sum as ndi_sum  # For image processing
from skimage.io import imread, imsave  # For reading and saving image files
import pandas as pd  # For data manipulation and analysis
from skimage.measure import regionprops  # For measuring properties of labeled image regions

# Function to segment cells in an image
def segment_cells(prob_map_path, threshold=32767.5, min_diameter=3, max_diameter=40):
    # Read the image
    prob_map = imread(prob_map_path)
    # Keep only the first channel (assuming grayscale)
    prob_map = prob_map[:, :, 0]
    # Apply threshold to create a binary image
    binary_map = np.where(prob_map > threshold, 1, 0)
    # Label connected regions in the binary image
    labeled_map, num_features = label(binary_map)

    # Calculate minimum and maximum area based on provided diameters
    min_area = np.pi * (min_diameter / 2) ** 2
    max_area = np.pi * (max_diameter / 2) ** 2

    # Measure properties of labeled regions
    properties = regionprops(labeled_map)
    # Filter regions based on area
    filtered_properties = [prop for prop in properties if min_area <= prop.area <= max_area]

    # Find slices for labeled regions
    slice_objects = find_objects(labeled_map)
    # Remove regions that don't meet area criteria
    for i in range(num_features):
        component = labeled_map[slice_objects[i]]
        area = ndi_sum(1, component, index=i + 1)
        if area < min_area or area > max_area:
            labeled_map[slice_objects[i]] = 0

    # Convert to 8-bit image for saving
    binary_map = binary_map * 255
    return binary_map.astype(np.uint8), filtered_properties

# Function to save properties of segmented cells to a CSV file
def save_measurements_to_csv(prob_map_path, filtered_properties):
    # Prepare path to save CSV
    directory = os.path.dirname(prob_map_path).replace('prob_maps_organized', 'objects')
    csv_save_path = os.path.join(directory, 'object_measurements.csv')

    # Calculate distances for sorting
    distances = [np.sqrt(prop.centroid[0] ** 2 + prop.centroid[1] ** 2) for prop in filtered_properties]
    sorted_indices = np.argsort(distances)

    # Gather measurements into a dictionary
    measurements = {
        'Object_ID': [i + 1 for i in sorted_indices],
        'Image': [os.path.basename(prob_map_path)] * len(filtered_properties),
        'Area': [filtered_properties[i].area for i in sorted_indices],
        'Perimeter': [filtered_properties[i].perimeter for i in sorted_indices],
        'MajorAxisLength': [filtered_properties[i].major_axis_length for i in sorted_indices],
        'MinorAxisLength': [filtered_properties[i].minor_axis_length for i in sorted_indices],
        'Eccentricity': [filtered_properties[i].eccentricity for i in sorted_indices],
        'Center_Y': [filtered_properties[i].centroid[0] for i in sorted_indices],
        'Center_X': [filtered_properties[i].centroid[1] for i in sorted_indices]
    }

    # Extract metadata
    metadata = extract_metadata_from_path(prob_map_path)
    for key, value in metadata.items():
        measurements[key] = [value] * len(filtered_properties)

    # Save as a DataFrame to CSV
    df = pd.DataFrame(measurements)
    if os.path.exists(csv_save_path):
        df.to_csv(csv_save_path, mode='a', header=False, index=False)  # Append if CSV already exists
    else:
        df.to_csv(csv_save_path, mode='w', header=True, index=False)  # Create new CSV
    print(f"Appended measurements to {csv_save_path}")
# Function to extract metadata from a file path
def extract_metadata_from_path(path):
    # Split the path into its components
    components = path.split(os.sep)

    # Extract the 'experiment' metadata from the second part of the path
    metadata_experiment = components[2]

    # Extract the filename from the full path
    filename = os.path.basename(path)

    # Extract the 'species' metadata from the filename by splitting it by underscores
    metadata_species = filename.split('_')[0]

    # Extract the 'pool_id' from the filename
    # Find the starting and ending positions for the substring we want to extract
    start_index = filename.find('pools_') + len('pools_')
    end_index = filename.rfind('_seq')
    # If the start and end positions are valid, extract the substring; otherwise, set to None
    metadata_pool_id = filename[start_index:end_index] if start_index != -1 and end_index != -1 else None

    # Extract the 'frames' from the filename in a similar manner as above
    start_index = filename.find("_f") + 2
    end_index = filename.find("_Prob")
    metadata_frames = filename[start_index:end_index] if start_index != -1 and end_index != -1 else None

    # Extract the 'sequence' metadata
    start_index = filename.find("seq") + 3
    end_index = filename.find("_f")
    metadata_seq = filename[start_index:end_index] if start_index != -1 and end_index != -1 else None

    # Return a dictionary containing all extracted metadata
    return {
        'metadata_experiment': metadata_experiment,
        'metadata_species': metadata_species,
        'metadata_pool_id': metadata_pool_id,
        'metadata_frames': metadata_frames,
        'metadata_sequence': metadata_seq
    }

# Function to process a directory containing image files
def process_directory(root_directory):
    # Loop through each sub-directory and file in the root directory
    for root, dirs, files in os.walk(root_directory):
        # Only process directories that contain 'prob_maps_org' in their name
        if 'prob_maps_org' in root:
            # Loop through each file in the directory
            for filename in files:
                # Only process files with the '.tif' extension
                if filename.endswith('.tif'):
                    # Create the full path to the input file
                    input_path = os.path.join(root, filename)
                    # Create the directory path where the output will be saved, replacing 'prob_maps_organized' with 'objects'
                    output_dir = root.replace('prob_maps_organized', 'objects')
                    # If the output directory doesn't exist, create it
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    # Create the full path to the output file
                    output_path = os.path.join(output_dir, filename)

                    # Run the cell segmentation function on the input file
                    binary_map, properties = segment_cells(input_path)
                    # Save the segmented image
                    imsave(output_path, binary_map)
                    # Save the properties of the segmented cells to a CSV file
                    save_measurements_to_csv(input_path, properties)

# Main function, entry point of the script
if __name__ == "__main__":
    # Starting directory (change this to your specific directory if needed)
    base_directory = "."
    # Call the function to start processing the directory
    process_directory(base_directory)