Source code for Fishing_Line_Material_Properties_Analysis.analysis

"""Material analysis module for fishing line properties."""

import logging
import os
import types
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import numpy as np
import pandas as pd
from kneed import KneeLocator


try:
    from scipy.integrate import trapezoid as trapz
except ImportError:
    # Fallback for older scipy versions
    from scipy.integrate import trapz


[docs] class MaterialAnalyzer: """Analyzer for fishing line material properties."""
[docs] def __init__( self, efficiency: float = 0.5, projectile_mass_kg: float = 0.045, line_accel_length_m: float = 1.0, ) -> None: """Initialize the MaterialAnalyzer. Args: efficiency: Energy conversion efficiency (0-1), accounts for losses projectile_mass_kg: Mass of projectile in kg (default 45g) line_accel_length_m: Length of line that accelerates (default 1m) """ self.log = logging.getLogger(__name__) self.efficiency = efficiency self.projectile_mass_kg = projectile_mass_kg self.line_accel_length_m = line_accel_length_m # Line density estimate (kg/m³) - adjust based on actual material # Typical monofilament: ~1100 kg/m³ density self.line_density_kg_m3 = 1100.0
[docs] def load_file(self, filepath: str) -> pd.DataFrame: """Load material test data from CSV file. Args: filepath: Path to CSV file Returns: DataFrame with material test data and metadata Raises: ValueError: If file is not in CSV format """ self.log.debug("Loading file: %s", filepath) if not filepath.endswith(".csv"): raise ValueError(f"File must be CSV format: {filepath}") # First, read the file to check for the extra header row with open(filepath) as f: first_line = f.readline().strip() second_line = f.readline().strip() # Check if first line is the problematic header skip_rows = 0 if ( first_line.startswith('"1 _ 1"') or first_line.startswith("1 _ 1") or "Unnamed" in first_line or ( '"Time"' not in first_line and "Time" not in first_line and ('"Time"' in second_line or "Time" in second_line) ) ): skip_rows = 1 self.log.debug( f"Detected extra header row: {first_line!r}, skipping first line" ) # Read CSV file, skipping the extra header if present df = pd.read_csv(filepath, skiprows=skip_rows) # Check if first row contains units instead of data if len(df) > 0: first_row = df.iloc[0].astype(str) if any( unit in " ".join(first_row.values).lower() for unit in ["sec", "mm", "n", "pa"] ): df = df.drop(0).reset_index(drop=True) self.log.debug("Removed units row from data") required_columns = ["Force", "Stroke"] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: available_cols = list(df.columns) raise ValueError( f"Missing required columns: {missing_columns}. " f"Available columns: {available_cols}" ) # Parse metadata from filename and directory structure metadata = self._parse_metadata(filepath) # Convert data types df["Force"] = pd.to_numeric(df["Force"], errors="coerce") df["Stroke"] = pd.to_numeric(df["Stroke"], errors="coerce") # Remove any rows with NaN values df = df.dropna(subset=["Force", "Stroke"]).reset_index(drop=True) # Calculate stress and strain (still useful for visualization) area = np.pi * 0.25 * (metadata.size * 1e-3) ** 2 # Fixed: mm to m if area <= 0: self.log.warning("Zero or negative area detected, using default") area = np.pi * 0.25 * (2.1 * 1e-3) ** 2 stress = (df["Force"] - df["Force"].min()) / area strain = (df["Stroke"] - df["Stroke"].min()) / metadata.length df["Stress"] = stress df["Strain"] = strain # Add metadata to dataframe df.meta = metadata # Calculate derived properties using force-stroke method self._calculate_material_properties(df) self.log.debug("File loaded successfully") return df
def _parse_metadata(self, filepath: str) -> types.SimpleNamespace: """Parse metadata from file path and name.""" fname = os.path.basename(filepath) dname = os.path.dirname(filepath) # Parse filename: test--line-crimp-XX--Y.csv slugs = fname.split("--") if len(slugs) >= 3: size_part = slugs[1].split("-") if len(size_part) >= 3: size = int(size_part[2]) ctype = size_part[1] else: size = 21 ctype = "crimp" try: run_num = int(slugs[2].split(".")[0]) except (ValueError, IndexError): run_num = 0 else: size = 21 ctype = "crimp" run_num = 0 # Parse length from directory structure dname_parts = dname.split("/") length = self._parse_length(dname_parts, run_num) metadata = types.SimpleNamespace() metadata.filepath = filepath metadata.size = size / 10 # diameter in mm metadata.ctype = ctype metadata.test_run = run_num metadata.length = length # gauge length in mm return metadata def _parse_length(self, dname_parts: List[str], run_num: int) -> float: """Parse specimen length from directory structure.""" for part in dname_parts: if "in" in part and any(char.isdigit() for char in part): length_str = part.replace("in", "") try: length_inches = int(length_str) return length_inches * 25.4 # Convert to mm except ValueError: pass return 254.0 # 10 inches in mm def _calculate_material_properties(self, df: pd.DataFrame) -> None: """Calculate material properties using force-stroke method. This method: 1. Finds elastic cutoff using knee detection 2. Computes recoverable energy from F-S curve (J) 3. Normalizes to energy per meter (J/m) 4. Scales to effective lengths with realistic parameters """ force = df["Force"].values stroke_mm = df["Stroke"].values stroke_m = (stroke_mm - stroke_mm.min()) / 1000.0 # Convert to meters stress = df["Stress"].values strain = df["Strain"].values # Find elastic cutoff using knee detection on stress-strain knee_strain = self._find_yield_point(df, "Stress", "Strain") # Store yield point for visualization yield_point_strain = None yield_point_stress = None modulus = 0 # Find cutoff index in original data if knee_strain and len(knee_strain) > 0: knee_strain_val = knee_strain[0] yield_point_strain = knee_strain_val # Find index closest to knee strain cutoff_idx = np.argmin(np.abs(strain - knee_strain_val)) closest_idx = (df["Strain"] - knee_strain_val).abs().idxmin() yield_point_stress = df.loc[closest_idx, "Stress"] # Calculate modulus from elastic region elastic_data = df[df["Strain"] <= knee_strain_val] if len(elastic_data) > 5: modulus, _ = np.polyfit( elastic_data["Strain"], elastic_data["Stress"], 1 ) else: max_stress_idx = df[df["Stress"] == stress.max()].index.values[0] fit_max = int(max_stress_idx * 0.2) if fit_max > 1: modulus, _ = np.polyfit(strain[0:fit_max], stress[0:fit_max], 1) else: # Fallback: use 70% of max strain max_strain_idx = np.argmax(strain) cutoff_idx = int(max_strain_idx * 0.7) max_stress_idx = df[df["Stress"] == stress.max()].index.values[0] fit_max = int(max_stress_idx * 0.2) if fit_max > 1: modulus, _ = np.polyfit(strain[0:fit_max], stress[0:fit_max], 1) # === NEW: Force-Stroke Energy Calculation === # 1. Compute recoverable energy from F-S curve (up to elastic cutoff) e_sample_j = trapz(force[:cutoff_idx], stroke_m[:cutoff_idx]) # 2. Normalize by gauge length to get energy per meter l_gauge_m = df.meta.length / 1000.0 # mm to m e_per_m = e_sample_j / l_gauge_m # 3. Calculate line properties diameter_m = df.meta.size / 1000.0 # mm to m area_m2 = np.pi * 0.25 * diameter_m**2 line_mass_per_m = self.line_density_kg_m3 * area_m2 # 4. Scale to multiple effective lengths for sensitivity analysis l_eff_options = [0.5, 1.0, 2.0, 5.0, 10.0, 20.0] # meters # Effective mass includes projectile + small amount of accelerating line # Assume ~1m of line accelerates with the projectile m_line_accelerates = line_mass_per_m * self.line_accel_length_m m_eff = self.projectile_mass_kg + m_line_accelerates velocities = [] kinetic_energies = [] for l_eff in l_eff_options: # Total recoverable energy at this effective length e_total = e_per_m * l_eff # Convert to velocity with efficiency factor # v = sqrt(2 * η * E / m_eff) if m_eff > 0 and e_total > 0: v = np.sqrt(2 * self.efficiency * e_total / m_eff) ke = 0.5 * self.projectile_mass_kg * v**2 else: v = 0 ke = 0 velocities.append(v) kinetic_energies.append(ke) # Store primary results (using 1m effective length as reference) idx_1m = l_eff_options.index(1.0) # Store all properties in metadata df.meta.modulus = modulus df.meta.yield_stress = float(stress.max()) df.meta.max_force = float(force.max()) df.meta.yield_point_strain = yield_point_strain df.meta.yield_point_stress = yield_point_stress # Store energy calculation results df.meta.E_sample_J = e_sample_j df.meta.E_per_m = e_per_m df.meta.L_gauge_m = l_gauge_m df.meta.line_mass_per_m = line_mass_per_m # Store velocity/energy for different effective lengths df.meta.L_eff_options = l_eff_options df.meta.velocities = velocities df.meta.kinetic_energies = kinetic_energies # Store primary values (1m reference) df.meta.velocity = velocities[idx_1m] df.meta.kinetic_energy = kinetic_energies[idx_1m] # Log the range for user awareness self.log.info( f"Energy per meter: {e_per_m:.4f} J/m | " f"Velocity range: {min(velocities):.2f}-{max(velocities):.2f} m/s " f"(L_eff: {min(l_eff_options):.1f}-{max(l_eff_options):.1f} m)" ) def _find_yield_point( # noqa: C901 self, df: pd.DataFrame, stress_col: str, strain_col: str ) -> List[float]: """Find yield point using smoothed derivative method.""" try: stress = df[stress_col] strain = df[strain_col] max_stress_idx = df[df[stress_col] == stress.max()].index.values[0] start_idx = int(max_stress_idx * 0.4) end_idx = int(max_stress_idx * 0.85) if end_idx - start_idx < 20: return [strain.max() * 0.7] subset_stress = stress[start_idx:end_idx] subset_strain = strain[start_idx:end_idx] # Method 1: Knee detection try: kn = KneeLocator( subset_strain, subset_stress, curve="concave", direction="increasing", S=50, ) if kn.knee is not None: max_strain = strain.max() if 0.15 < kn.knee < (max_strain * 0.9): return [kn.knee] except Exception as e: self.log.warning(f"Knee detection failed: {e}") # Method 2: Modulus drop detection try: window_size = max(5, len(subset_stress) // 20) moduli = [] strain_points = [] for i in range(window_size, len(subset_stress) - window_size): start_window = i - window_size end_window = i + window_size window_strain = subset_strain.iloc[start_window:end_window] window_stress = subset_stress.iloc[start_window:end_window] if len(window_strain) > 3: slope, _ = np.polyfit(window_strain, window_stress, 1) moduli.append(slope) strain_points.append(subset_strain.iloc[i]) if len(moduli) > 10: moduli_array = np.array(moduli) strain_points_array = np.array(strain_points) initial_modulus = np.mean(moduli_array[:5]) threshold_modulus = initial_modulus * 0.7 drop_indices = np.where(moduli_array < threshold_modulus)[0] if len(drop_indices) > 0: yield_strain = strain_points_array[drop_indices[0]] if 0.15 < yield_strain < (strain.max() * 0.9): return [yield_strain] except Exception as e: self.log.warning(f"Modulus drop detection failed: {e}") # Fallback max_strain = strain.max() if max_strain > 0.4: return [max_strain * 0.65] else: return [max_strain * 0.75] except Exception as e: self.log.warning(f"Yield detection failed: {e}") return [df[strain_col].max() * 0.7]
[docs] def calculate_summary_stats(self, data_list: List[pd.DataFrame]) -> Dict[str, Any]: """Calculate summary statistics for a group of test data.""" if not data_list: return {} moduli = [df.meta.modulus for df in data_list if hasattr(df.meta, "modulus")] yield_stresses = [ df.meta.yield_stress for df in data_list if hasattr(df.meta, "yield_stress") ] max_forces = [ df.meta.max_force for df in data_list if hasattr(df.meta, "max_force") ] # New: E_per_m statistics e_per_m_values = [ df.meta.E_per_m for df in data_list if hasattr(df.meta, "E_per_m") ] stats = { "sample_count": len(data_list), "modulus_avg": np.mean(moduli) if moduli else 0, "modulus_std": np.std(moduli) if moduli else 0, "yield_stress_avg": np.mean(yield_stresses) if yield_stresses else 0, "yield_stress_std": np.std(yield_stresses) if yield_stresses else 0, "max_force_avg": np.mean(max_forces) if max_forces else 0, "max_force_std": np.std(max_forces) if max_forces else 0, "E_per_m_avg": np.mean(e_per_m_values) if e_per_m_values else 0, "E_per_m_std": np.std(e_per_m_values) if e_per_m_values else 0, } if data_list: first_meta = data_list[0].meta stats["length"] = getattr(first_meta, "length", 254.0) stats["size"] = getattr(first_meta, "size", 21) stats["ctype"] = getattr(first_meta, "ctype", "crimp") return stats
[docs] def generate_summary_report( self, group_results: Dict[str, Any], output_dir: str ) -> None: """Generate a summary report of all test results.""" output_path = Path(output_dir) / "summary_report.txt" with open(output_path, "w") as f: f.write("Fishing Line Material Properties Analysis Summary\n") f.write("=" * 50 + "\n\n") f.write("Energy Calculation Method: Force-Stroke Integration\n") f.write(f"Efficiency Factor: {self.efficiency}\n") f.write(f"Line Mass Fraction: {self.alpha_mass_fraction}\n") f.write(f"Projectile Mass: {self.projectile_mass_kg * 1000:.1f}g\n\n") for group_name, group_data in group_results.items(): f.write(f"Group: {group_name}\n") f.write("-" * 30 + "\n") for length_name, stats in group_data.items(): f.write(f" Length: {length_name}\n") f.write(f" Sample Count: {stats.get('sample_count', 0)}\n") f.write( f" Energy/meter: {stats.get('E_per_m_avg', 0):.4f} ± " f"{stats.get('E_per_m_std', 0):.4f} J/m\n" ) f.write( f" Modulus: {stats.get('modulus_avg', 0):.2e} ± " f"{stats.get('modulus_std', 0):.2e} Pa\n" ) f.write( f" Yield Stress: {stats.get('yield_stress_avg', 0):.2e} ± " f"{stats.get('yield_stress_std', 0):.2e} Pa\n" ) f.write( f" Max Force: {stats.get('max_force_avg', 0):.2f} ± " f"{stats.get('max_force_std', 0):.2f} N\n" ) f.write("\n") f.write("\n") self.log.info(f"Summary report saved to {output_path}")