Example: run MultiSolSegment and validate

This examples hows how to download the weights for MultiSolSegment, run it on an image to segment crack, dark and busbar masks. Then use pvimage to look at crack count and parameters

[1]:
from pathlib import Path
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader

from pvcracks.utils import viz_functions, train_functions

import requests

from pvimage import features
[2]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.simplefilter(action="ignore", category=pd.errors.SettingWithCopyWarning)

Step 1: Download Model

[4]:
weight_path = "multisolsegment.pt"

if os.path.exists(weight_path):
    print("Weights already downloaded.")
else:
    response = requests.get(
        "https://datahub.duramat.org/dataset/24d7ed98-956e-4ebc-bf21-6584ecec2ff1/resource/e6d5dee2-0aef-439e-978c-d8c572e7039e/download/model.pt"
    )
    with open(weight_path, "wb") as f:
        f.write(response.content)
Weights already downloaded.

Step 2: Download and Load Images

[ ]:
from zipfile import ZipFile
from io import BytesIO
import os

url = (
    "https://datahub.duramat.org/"
    "dataset/5e7587ad-6ad1-4d6f-8432-70940a6d7ca1/"
    "resource/8f12be45-b929-4f5c-9400-84dde27b0e90/"
    "download/labeledelcells.zip"
)

out_dir = os.path.expanduser("labeled_image_data")
if os.path.exists(out_dir):
    print("Images already downloaded.")
else:
    os.makedirs(out_dir, exist_ok=True)

    resp = requests.get(url)
    resp.raise_for_status()

    with ZipFile(BytesIO(resp.content)) as z:
        z.extractall(path=out_dir)
Images already downloaded.
[6]:
out_dir
[6]:
'labeled_image_data'

Step 3: Set local paths

[ ]:
files_root = f"{out_dir}/LabeledELcells/"
# files_root = "/Users/ojas/Desktop/saj/SANDIA/pvcracks_data/Channeled_Combined_CWRU_LBNL_ASU_No_Empty/"

# if you have have already downloaded model weights, replace this with the local path
# weight_path = "/Users/ojas/Desktop/saj/SANDIA/pvcracks_data/Channeled_Combined_CWRU_LBNL_ASU_No_Empty/checkpoints/Channeled_Combined_CWRU_LBNL_ASU_No_Empty10/epoch_19/model.pt"

Step 4: Load in everything

[8]:
category_mapping = {0: "dark", 1: "busbar", 2: "crack", 3: "non-cell"}

root = Path(files_root)
img_root = root / "img" / "all"

train_dataset, val_dataset = train_functions.load_dataset(root)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
device, model = train_functions.load_device_and_model(
    category_mapping, existing_weight_path=weight_path
)

Step 5: Pick image and threshold

Index is what image to run this model on.

Threshold is what confidence % we want to code as an actual detection target, rather than just background noise.

[9]:
category_mapping = {0: "dark", 1: "busbar", 2: "crack", 3: "non-cell"}

idx = 7
threshold = 0.5  # default

Step 6: Inference and Show

[ ]:
viz_functions.channeled_inference_and_show(
    val_loader, device, model, category_mapping, idx, threshold
)
# plt.savefig("mss_model_prediction.png", dpi=300)
../_images/Examples_example_download_run_multisolsegment_15_0.png
<Figure size 640x480 with 0 Axes>

Step 7: Investigate dark area %

[11]:
def channel_activation_percentages(mask, category_mapping):
    """
    Calculate the percentage of pixels activated for each channel in a multi-hot mask.

    Args:
        mask: 3D array-like of shape (n_channels, height, width) containing multi-hot activations.
        category_mapping (dict): Mapping from channel index to class name. The iteration order
            defines the channel order in the mask tensor.

    Returns:
        dict: Mapping from class name to percentage (0-100) of activated pixels in that channel.
    """

    mask_np = np.asarray(mask)

    if mask_np.ndim != 3:
        raise ValueError(
            f"Expected a 3D mask of shape (channels, height, width); got {mask_np.shape}."
        )

    n_channels, height, width = mask_np.shape
    total_pixels = height * width

    if total_pixels == 0:
        raise ValueError("Mask must contain at least one pixel.")

    percentages = {}
    for channel_idx, class_name in category_mapping.items():
        if channel_idx >= n_channels:
            raise ValueError(
                f"Channel index {channel_idx} for class '{class_name}' is out of bounds "
                f"for mask with {n_channels} channel(s)."
            )
        channel_activation = mask_np[channel_idx].sum()
        percentages[class_name] = (
            float(channel_activation) / float(total_pixels) * 100.0
        )

    return percentages


def print_channel_activation_percentages(percentages):
    print("Predicted channel activation (% of image):")
    sum = 0
    for _, class_name in category_mapping.items():
        print(f"\t{class_name}: {percentages[class_name]:.2f}%")
        sum += percentages[class_name]
    print("\tRemaining (solar cell): %.2f%%\n" % (100 - sum))
[ ]:
img, mask = val_dataset.__getitem__(idx)
img = img.to(device)
img_path = val_dataset.get_img_path(idx)

logits = model(img.unsqueeze(0)).detach().cpu()
probs = torch.sigmoid(logits)
pred_mask = (probs > threshold).float().squeeze(0).numpy()

gt_mask = mask.cpu().numpy()

gt_percentages = channel_activation_percentages(gt_mask, category_mapping)
pred_percentages = channel_activation_percentages(pred_mask, category_mapping)

print_channel_activation_percentages(gt_percentages)
print_channel_activation_percentages(pred_percentages)
Predicted channel activation (% of image):
        dark: 11.16%
        busbar: 7.75%
        crack: 3.05%
        non-cell: 3.70%
        Remaining (solar cell): 74.34%

Predicted channel activation (% of image):
        dark: 10.31%
        busbar: 7.81%
        crack: 3.08%
        non-cell: 3.70%
        Remaining (solar cell): 75.10%

Step 8: Crack mask features with PVImage

Extract predicted crack masks, compute crack statistics with PVImage, and visualize the distribution of key metrics.

[ ]:
img, mask = val_dataset.__getitem__(idx)
img = img.to(device)
img_path = val_dataset.get_img_path(idx)

logits = model(img.unsqueeze(0)).detach().cpu()
probs = torch.sigmoid(logits)
pred_mask = (probs > threshold).float().squeeze(0).numpy()

fig, ax = plt.subplots()
ax.imshow(pred_mask[2], cmap="viridis")
<matplotlib.image.AxesImage at 0x7f5ceb69b890>
../_images/Examples_example_download_run_multisolsegment_20_1.png
[14]:
crack_mask = (pred_mask[2] * 255).astype(np.uint8)  # Convert [0,1] to [0,255]

# Create DataFrame with image path
dfinfo = pd.DataFrame()
dfinfo["impath"] = [str(img_path)]  # Wrap in list

# Run feature extraction
dffeaturesreal = features.feature_extraction_crack_mask([crack_mask], dfinfo)
dffeaturesreal.head()
[14]:
cell_number i prop.perimeter slope prop.convex_area prop.area prop.orientation
0 mxy_0540 4 745.404112 -2.728464 13499.0 5157.0 -1.459944
0 mxy_0540 0 332.468037 -1.693958 3134.0 1898.0 -1.429506
0 mxy_0540 2 149.59798 -1.927778 949.0 789.0 -1.273929
[15]:
def violinplt(data, palette=None, figsize=(5, 4), y_axis_log=False):
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns

    # Coerce to DataFrame so .mean() and column names behave
    if isinstance(data, pd.Series):
        data = data.to_frame()
    elif not isinstance(data, pd.DataFrame):
        data = pd.DataFrame(data)

    fig, ax = plt.subplots(figsize=figsize)

    # Seaborn API: no widths/showmeans/showmedians/showextrema here
    # Use inner="quartile" to show quartiles; cut=0 to avoid extrapolation
    sns.violinplot(
        data=data,
        ax=ax,
        palette=palette,
        inner="quartile",
        cut=0,
        linewidth=1,
    )

    # Overlay means as points
    means = data.mean().to_numpy()
    x_pos = np.arange(len(means))
    ax.scatter(x_pos, means, zorder=3)

    # Prepare mean labels
    nobs = [f"{m:.2f}" for m in means]

    # Place mean labels slightly above the point
    y_offset = 0.03 * (
        np.nanmax(means) - np.nanmin(means) if np.all(np.isfinite(means)) else 1.0
    )
    if len(x_pos) > 3:
        for i in x_pos:
            t = ax.text(
                i,
                means[i] + y_offset,
                nobs[i],
                ha="center",
                va="bottom",
                fontsize=10,
                color="black",
                weight="semibold",
            )
            # subtle white box for readability
            t.set_bbox(
                dict(
                    facecolor="white",
                    alpha=0.5,
                    edgecolor="none",
                    boxstyle="round,pad=0.15",
                )
            )
        # Rotate tick labels cleanly
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
    else:
        for i in x_pos:
            ax.text(
                i,
                means[i] + y_offset,
                nobs[i],
                ha="center",
                va="bottom",
                fontsize=12,
                color="black",
                weight="semibold",
            )

    ax.set_ylabel("Value")
    ax.set_xlabel("")
    if y_axis_log:
        ax.set_yscale("log")
    ax.margins(x=0.05)
    fig.tight_layout()
    return fig
[16]:
dffeaturesreal["count_features"] = dffeaturesreal.groupby("cell_number")[
    "cell_number"
].transform("count")
dffeaturesrealcount = dffeaturesreal.sort_values(by="cell_number").drop_duplicates(
    "cell_number", keep="first"
)
[17]:
dffeaturesrealcount.reset_index(drop=True, inplace=True)

featurescounted = pd.concat(
    [dffeaturesrealcount["count_features"]], axis=1, ignore_index=True
)
featurescounted.columns = ["Original"]
featurescounted.head()
[17]:
Original
0 3
[18]:
from sklearn.preprocessing import MinMaxScaler


dffeaturesall = dffeaturesreal


vp = violinplt(featurescounted[["Original"]], figsize=(5, 4))


scaler = MinMaxScaler()
columnstonormalize = [
    " prop.perimeter",
    " slope",
    " prop.area",
    " prop.orientation",
]
dffeaturesall_norm = dffeaturesall[columnstonormalize]
dffeaturesall_norm[columnstonormalize] = scaler.fit_transform(dffeaturesall_norm)
fig = violinplt(
    dffeaturesall_norm[
        [
            " prop.perimeter",
            " slope",
            " prop.area",
            " prop.orientation",
        ]
    ],
    figsize=None,
)
# fig.savefig("violins.png", bbox_inches="tight")
../_images/Examples_example_download_run_multisolsegment_25_0.png
../_images/Examples_example_download_run_multisolsegment_25_1.png

Step 9: PVImage stats for many images

[ ]:
m_idx = len(val_dataset)  # indices to process
masks = []
impaths = []

for i in range(m_idx):
    img, _ = val_dataset.__getitem__(i)
    img = img.to(device)
    img_path = val_dataset.get_img_path(i)

    # model inference
    logits = model(img.unsqueeze(0)).detach().cpu()
    probs = torch.sigmoid(logits)
    pred_mask = (probs > threshold).float().squeeze(0).numpy()  # (n_classes, H, W)

    # Extract crack channel (index 2) and convert to uint8 [0,255]
    crack_mask = (pred_mask[2] * 255).astype(np.uint8)

    masks.append(crack_mask)
    impaths.append(str(img_path))

# Build dataframe of image paths expected by feature_extraction_crack_mask
dfinfo_multi = pd.DataFrame({"impath": impaths})

# Run PVImage feature extraction across all predicted crack masks
dffeatures_multi = features.feature_extraction_crack_mask(masks, dfinfo_multi)
print(
    f"Processed {len(masks)} ground-truth images, extracted {len(dffeatures_multi)} feature rows."
)
display(dffeatures_multi.head())
Processed 585 ground-truth images, extracted 601 feature rows.
cell_number i prop.perimeter slope prop.convex_area prop.area prop.orientation
0 mxy_sa19965_sub_EL_9 2 271.740115 -0.298467 2296.0 1497.0 -0.486834
0 mxy_sa19965_sub_EL_9 8 266.468037 -0.155785 1826.0 1403.0 -0.386939
0 mxy_sa19965_sub_EL_9 4 235.39697 -1.476617 2109.0 1289.0 -1.394845
0 mxy_sa19965_sub_EL_9 5 233.39697 0.500036 2353.0 1422.0 0.739917
0 SW2 1 157.556349 1.625017 887.0 789.0 1.500654
[20]:
dffeatures_multi["count_features"] = dffeatures_multi.groupby("cell_number")[
    "cell_number"
].transform("count")
dffeatures_multi_count = dffeatures_multi.sort_values(by="cell_number").drop_duplicates(
    "cell_number", keep="first"
)
dffeatures_multi_count.reset_index(drop=True, inplace=True)

featurescounted_multi = pd.concat(
    [dffeatures_multi_count["count_features"]], axis=1, ignore_index=True
)
featurescounted_multi.columns = ["Original"]

# Show basic outputs
print(f"Processed {len(masks)} images, extracted {len(dffeatures_multi)} feature rows.")
display(featurescounted_multi.head())
Processed 585 images, extracted 601 feature rows.
Original
0 1
1 2
2 1
3 3
4 3
[21]:
dffeatures_multi.to_pickle("mms_dffeatures.pickle")
dffeatures_multi = pd.read_pickle("mms_dffeatures.pickle")
[22]:
vp = violinplt(featurescounted_multi[["Original"]], figsize=(5, 4))
../_images/Examples_example_download_run_multisolsegment_30_0.png
[23]:
scaler = MinMaxScaler()
columnstonormalize = [
    " prop.perimeter",
    " slope",
    " prop.area",
    " prop.orientation",
]
dffeatures_multi_norm = dffeatures_multi[columnstonormalize]
dffeatures_multi_norm[columnstonormalize] = scaler.fit_transform(dffeatures_multi_norm)
fig = violinplt(
    dffeatures_multi_norm[
        [
            " prop.perimeter",
            " slope",
            " prop.area",
            " prop.orientation",
        ]
    ],
    figsize=None,
)
../_images/Examples_example_download_run_multisolsegment_31_0.png

Step 10: PVImage stats for ground truth crack masks

[ ]:
gt_masks = []
gt_impaths = []

for i in range(m_idx):
    _, gt_mask = val_dataset.__getitem__(i)
    img_path = val_dataset.get_img_path(i)

    crack_mask_gt = (gt_mask.cpu().numpy()[2] * 255).astype(np.uint8)

    gt_masks.append(crack_mask_gt)
    gt_impaths.append(str(img_path))

dfinfo_gt = pd.DataFrame({"impath": gt_impaths})
dffeatures_gt = features.feature_extraction_crack_mask(gt_masks, dfinfo_gt)


display(dffeatures_gt.head())
cell_number i prop.perimeter slope prop.convex_area prop.area prop.orientation
0 mxy_sa19965_sub_EL_9 7 214.083261 -1.170103 1893.0 1167.0 -1.370287
0 mxy_sa19965_sub_EL_9 14 206.325902 -0.380381 1568.0 1059.0 -1.052619
0 mxy_sa19965_sub_EL_9 4 162.426407 -0.890218 1256.0 887.0 -0.862461
0 SW2 3 119.313708 1.319687 616.0 546.0 1.455919
0 my_SW1 0 454.575685 -0.897546 5464.0 2804.0 -0.985487
[25]:
dffeatures_gt["count_features"] = dffeatures_gt.groupby("cell_number")[
    "cell_number"
].transform("count")
dffeatures_gt_count = dffeatures_gt.sort_values(by="cell_number").drop_duplicates(
    "cell_number", keep="first"
)
dffeatures_gt_count.reset_index(drop=True, inplace=True)

featurescounted_gt = pd.concat(
    [dffeatures_gt_count["count_features"]], axis=1, ignore_index=True
)
featurescounted_gt.columns = ["Ground Truth"]

display(featurescounted_gt.head())
print(
    f"Processed {len(gt_masks)} ground-truth images, extracted {len(dffeatures_gt)} feature rows."
)
Ground Truth
0 1
1 2
2 1
3 4
4 4
Processed 585 ground-truth images, extracted 590 feature rows.
[26]:
vp = violinplt(featurescounted_gt[["Ground Truth"]], figsize=(5, 4))

../_images/Examples_example_download_run_multisolsegment_35_0.png
[27]:
scaler = MinMaxScaler()
columnstonormalize = [
    " prop.perimeter",
    " slope",
    " prop.area",
    " prop.orientation",
]
dffeatures_gt_norm = dffeatures_gt[columnstonormalize]
dffeatures_gt_norm[columnstonormalize] = scaler.fit_transform(dffeatures_gt_norm)
fig = violinplt(
    dffeatures_gt_norm[
        [
            " prop.perimeter",
            " slope",
            " prop.area",
            " prop.orientation",
        ]
    ],
    figsize=None,
)
../_images/Examples_example_download_run_multisolsegment_36_0.png

Step 11: Compare PVImage stats (predicted vs. ground truth)

Join the PVImage outputs to see how the model’s crack counts and shape metrics differ from the labeled data.

[28]:
pred_counts = dffeatures_multi.groupby("cell_number").size().rename("pred_crack_count")
gt_counts = dffeatures_gt.groupby("cell_number").size().rename("gt_crack_count")

count_comparison = (
    pd.concat([pred_counts, gt_counts], axis=1).fillna(0).astype(int).reset_index()
)
count_comparison.columns = ["cell_number", "pred_crack_count", "gt_crack_count"]
count_comparison["count_diff"] = (
    count_comparison["pred_crack_count"] - count_comparison["gt_crack_count"]
)

display(count_comparison.head())

cell_number pred_crack_count gt_crack_count count_diff
0 0563 1 1 0
1 0774 2 2 0
2 0809 1 1 0
3 0_EL_18.09.2023 3 4 -1
4 1090 3 4 -1
[29]:
vp = violinplt(
    count_comparison[["pred_crack_count", "gt_crack_count"]].rename(
        columns={
            "pred_crack_count": "Predicted",
            "gt_crack_count": "Ground Truth",
        }
    ),
    figsize=(6, 4),
)

../_images/Examples_example_download_run_multisolsegment_39_0.png
[30]:
feature_cols = [
    " prop.perimeter",
    " slope",
    " prop.area",
    " prop.orientation",
]
feature_comparison = pd.DataFrame(
    {
        "Predicted_mean": dffeatures_multi_norm[feature_cols].mean(),
        "GroundTruth_mean": dffeatures_gt_norm[feature_cols].mean(),
    }
)
feature_comparison["abs_diff"] = (
    feature_comparison["Predicted_mean"] - feature_comparison["GroundTruth_mean"]
).abs()
feature_comparison

[30]:
Predicted_mean GroundTruth_mean abs_diff
prop.perimeter 0.151889 0.170860 0.018971
slope 0.485083 0.457917 0.027167
prop.area 0.163280 0.153094 0.010186
prop.orientation 0.345932 0.337139 0.008792
[31]:
feature_pairs = []

for col in feature_cols:
    if (
        col not in dffeatures_multi_norm.columns
        or col not in dffeatures_gt_norm.columns
    ):
        print(f"Skipping {col}: not found in features dataframe.")
        continue

    label = col.strip()
    pred = dffeatures_multi_norm[col].reset_index(drop=True)
    gt = dffeatures_gt_norm[col].reset_index(drop=True)

    max_len = max(len(pred), len(gt))
    if max_len == 0:
        print(f"Skipping {label}: no data to plot.")
        continue

    pred = pred.reindex(range(max_len))
    gt = gt.reindex(range(max_len))

    feature_pairs.append(
        pd.DataFrame(
            {
                f"Predicted {label}": pred,
                f"Ground Truth {label}": gt,
            }
        )
    )

if not feature_pairs:
    print("No feature data available for violin plots.")
    feature_violin_df = pd.DataFrame()
else:
    feature_violin_df = pd.concat(feature_pairs, axis=1)
    feature_violin_df = feature_violin_df.apply(pd.to_numeric, errors="coerce")
    feature_violin_df = feature_violin_df.dropna(axis=1, how="all")

    if feature_violin_df.empty:
        print("No numeric feature data available for violin plots.")
    else:
        # Render comparison violins
        vp = violinplt(feature_violin_df, figsize=(10, 5))

feature_violin_df

[31]:
Predicted prop.perimeter Ground Truth prop.perimeter Predicted slope Ground Truth slope Predicted prop.area Ground Truth prop.area Predicted prop.orientation Ground Truth prop.orientation
0 0.078961 0.057378 0.517969 0.420981 0.077577 0.047678 0.344652 0.063179
1 0.076532 0.053472 0.528462 0.483016 0.070977 0.041025 0.376496 0.164383
2 0.062214 0.031365 0.431319 0.442967 0.062974 0.030430 0.055206 0.224965
3 0.061292 0.009654 0.576696 0.616562 0.072311 0.009425 0.735703 0.963567
4 0.026345 0.178485 0.659435 0.442391 0.027871 0.148515 0.978202 0.185770
... ... ... ... ... ... ... ... ...
596 0.000177 NaN 0.475078 NaN 0.009407 NaN 0.201521 NaN
597 0.120884 NaN 0.463893 NaN 0.148343 NaN 0.233929 NaN
598 0.086195 NaN 0.501708 NaN 0.098568 NaN 0.284622 NaN
599 0.096668 NaN 0.521463 NaN 0.103131 NaN 0.054135 NaN
600 0.003081 NaN 0.533490 NaN 0.011303 NaN 0.027597 NaN

601 rows × 8 columns

../_images/Examples_example_download_run_multisolsegment_41_1.png

Do same for pv-vision

[ ]:
# Load pv-vision model and weights
from tutorials.unet_model import construct_unet  # this tutorial is from pv-vision
from torch.nn import DataParallel

device = torch.device("cpu")

# build & wrap
unet = construct_unet(5)
unet = DataParallel(unet)

# load the dict right out of the file — no prefixing
state_dict = torch.load(
    "unet_oversample_low_final_model_for_paper/model.pt", map_location=device
)

unet.load_state_dict(state_dict)
model_pvvision = unet.module.to(device)
[ ]:
category_mapping = {
    0: "dark",
    1: "busbar",
    2: "crack",
    3: "cross",
}  # I think this is the third category "x-cracks"

idx = 7
threshold = 0.5  # default
[ ]:
viz_functions.channeled_inference_and_show(
    val_loader,
    device=device,
    model=model_pvvision,
    category_mapping=category_mapping,
    idx=idx,
    threshold=0.5,
)
plt.savefig("pvvision_model_prediction.png", dpi=300)
../_images/Examples_example_download_run_multisolsegment_45_0.png
<Figure size 640x480 with 0 Axes>
[ ]:
# All images
m_idx = len(val_dataset)  # indices to process
masks = []
impaths = []

for i in range(m_idx):
    img, _ = val_dataset.__getitem__(i)
    img = img.to(device)
    img_path = val_dataset.get_img_path(i)

    # model inference
    logits = model_pvvision(img.unsqueeze(0)).detach().cpu()
    probs = torch.sigmoid(logits)
    pred_mask = (probs > threshold).float().squeeze(0).numpy()  # (n_classes, H, W)

    # Extract crack channel (index 2) and convert to uint8 [0,255]
    crack_mask = (pred_mask[2] * 255).astype(np.uint8)

    masks.append(crack_mask)
    impaths.append(str(img_path))

# Build dataframe of image paths expected by feature_extraction_crack_mask
dfinfo_multi = pd.DataFrame({"impath": impaths})

# Run PVImage feature extraction across all predicted crack masks
dffeatures_multi_pvvision = features.feature_extraction_crack_mask(masks, dfinfo_multi)
print(
    f"Processed {len(masks)} ground-truth images, extracted {len(dffeatures_multi)} feature rows."
)
display(dffeatures_multi_pvvision.head())

dffeatures_multi_pvvision["count_features"] = dffeatures_multi_pvvision.groupby(
    "cell_number"
)["cell_number"].transform("count")
dffeatures_multi_count = dffeatures_multi_pvvision.sort_values(
    by="cell_number"
).drop_duplicates("cell_number", keep="first")
dffeatures_multi_count.reset_index(drop=True, inplace=True)

featurescounted_multi = pd.concat(
    [dffeatures_multi_count["count_features"]], axis=1, ignore_index=True
)
featurescounted_multi.columns = ["Original"]

# Show basic outputs
print(f"Processed {len(masks)} images, extracted {len(dffeatures_multi)} feature rows.")
display(featurescounted_multi.head())


dffeatures_multi_pvvision.to_pickle("pvvision_dffeatures.pickle")
dffeatures_multi_pvvision = pd.read_pickle("pvvision_dffeatures.pickle")
Processed 585 ground-truth images, extracted 572 feature rows.
cell_number i prop.perimeter slope prop.convex_area prop.area prop.orientation
0 mxy_sa19965_sub_EL_9 1 552.83557 -0.524613 7743.0 3487.0 -0.699696
0 mxy_sa19965_sub_EL_9 4 198.325902 -0.461665 1710.0 1447.0 -0.683089
0 SW2 0 153.59798 -0.985689 1174.0 924.0 -1.113709
0 SW2 10 127.313708 0.557848 809.0 630.0 1.319049
0 SW2 2 115.213203 0.222527 666.0 580.0 0.33296
Processed 585 images, extracted 572 feature rows.
Original
0 1
1 2
2 1
3 2
4 9
[ ]:
# Add ground truth
pred_counts = (
    dffeatures_multi_pvvision.groupby("cell_number").size().rename("pred_crack_count")
)
gt_counts = dffeatures_gt.groupby("cell_number").size().rename("gt_crack_count")

count_comparison = (
    pd.concat([pred_counts, gt_counts], axis=1).fillna(0).astype(int).reset_index()
)
count_comparison.columns = ["cell_number", "pred_crack_count", "gt_crack_count"]
count_comparison["count_diff"] = (
    count_comparison["pred_crack_count"] - count_comparison["gt_crack_count"]
)

scaler = MinMaxScaler()
columnstonormalize = [
    " prop.perimeter",
    " slope",
    " prop.area",
    " prop.orientation",
]
dffeatures_multi_norm = dffeatures_multi_pvvision[columnstonormalize]
dffeatures_multi_norm[columnstonormalize] = scaler.fit_transform(dffeatures_multi_norm)
fig = violinplt(
    dffeatures_multi_norm[
        [
            " prop.perimeter",
            " slope",
            " prop.area",
            " prop.orientation",
        ]
    ],
    figsize=None,
)
../_images/Examples_example_download_run_multisolsegment_47_0.png
[57]:
vp = violinplt(
    count_comparison[["pred_crack_count", "gt_crack_count"]].rename(
        columns={
            "pred_crack_count": "Predicted",
            "gt_crack_count": "Ground Truth",
        }
    ),
    figsize=(6, 4),
)
plt.title("pv-vision")
[57]:
Text(0.5, 1.0, 'pv-vision')
../_images/Examples_example_download_run_multisolsegment_48_1.png
[58]:
feature_cols = [
    " prop.perimeter",
    " slope",
    " prop.area",
    " prop.orientation",
]
feature_comparison = pd.DataFrame(
    {
        "Predicted_mean": dffeatures_multi_norm[feature_cols].mean(),
        "GroundTruth_mean": dffeatures_gt_norm[feature_cols].mean(),
    }
)
feature_comparison["abs_diff"] = (
    feature_comparison["Predicted_mean"] - feature_comparison["GroundTruth_mean"]
).abs()
feature_comparison

[58]:
Predicted_mean GroundTruth_mean abs_diff
prop.perimeter 0.122926 0.170860 0.047934
slope 0.562446 0.457917 0.104529
prop.area 0.134435 0.153094 0.018659
prop.orientation 0.394483 0.337139 0.057343
[59]:
feature_pairs = []

for col in feature_cols:
    if (
        col not in dffeatures_multi_norm.columns
        or col not in dffeatures_gt_norm.columns
    ):
        print(f"Skipping {col}: not found in features dataframe.")
        continue

    label = col.strip()
    pred = dffeatures_multi_norm[col].reset_index(drop=True)
    gt = dffeatures_gt_norm[col].reset_index(drop=True)

    max_len = max(len(pred), len(gt))
    if max_len == 0:
        print(f"Skipping {label}: no data to plot.")
        continue

    pred = pred.reindex(range(max_len))
    gt = gt.reindex(range(max_len))

    feature_pairs.append(
        pd.DataFrame(
            {
                f"Predicted {label}": pred,
                f"Ground Truth {label}": gt,
            }
        )
    )

if not feature_pairs:
    print("No feature data available for violin plots.")
    feature_violin_df = pd.DataFrame()
else:
    feature_violin_df = pd.concat(feature_pairs, axis=1)
    feature_violin_df = feature_violin_df.apply(pd.to_numeric, errors="coerce")
    feature_violin_df = feature_violin_df.dropna(axis=1, how="all")

    if feature_violin_df.empty:
        print("No numeric feature data available for violin plots.")
    else:
        # Render comparison violins
        vp = violinplt(feature_violin_df, figsize=(10, 5))
        plt.title("pv-vision")

feature_violin_df

[59]:
Predicted prop.perimeter Ground Truth prop.perimeter Predicted slope Ground Truth slope Predicted prop.area Ground Truth prop.area Predicted prop.orientation Ground Truth prop.orientation
0 0.224055 0.057378 0.551249 0.420981 0.216021 0.047678 0.277144 0.063179
1 0.048634 0.053472 0.556411 0.483016 0.075988 0.041025 0.282431 0.164383
2 0.026501 0.031365 0.513440 0.442967 0.040088 0.030430 0.145335 0.224965
3 0.013495 0.009654 0.640013 0.616562 0.019907 0.009425 0.919853 0.963567
4 0.007507 0.178485 0.612516 0.442391 0.016474 0.148515 0.605911 0.185770
... ... ... ... ... ... ... ... ...
1168 0.018054 NaN 0.626138 NaN 0.036793 NaN 0.738026 NaN
1169 0.012385 NaN 0.602619 NaN 0.018808 NaN 0.542269 NaN
1170 0.001909 NaN 0.631208 NaN 0.006796 NaN 0.664104 NaN
1171 0.131226 NaN 0.599652 NaN 0.177993 NaN 0.994440 NaN
1172 0.001499 NaN 0.592021 NaN 0.012287 NaN 0.003767 NaN

1173 rows × 8 columns

../_images/Examples_example_download_run_multisolsegment_50_1.png
[ ]: