import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import (classification_report, confusion_matrix,
                             accuracy_score, cohen_kappa_score)
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import time
import warnings
warnings.filterwarnings('ignore')

# Reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {DEVICE}")

# Plot style
sns.set_theme(style="whitegrid")
plt.rcParams['figure.dpi'] = 120
plt.rcParams['figure.figsize'] = (10, 6)

Using device: cuda

# ================================================================
# DATA CONFIGURATION — Change this to use real satellite data
# ================================================================
DATA_MODE = "synthetic"  # Options: "enmap", "aviris", "synthetic"

# Only needed if DATA_MODE is "enmap" or "aviris":
DATA_PATH = "data/enmap_scene.tif"   # Path to hyperspectral GeoTIFF
LABELS_PATH = "data/labels.tif"       # Path to land cover labels GeoTIFF

# Scene parameters (used for synthetic generation)
HEIGHT, WIDTH = 200, 200
N_BANDS = 224
N_CLASSES = 6
CLASS_NAMES = ['Water', 'Forest', 'Cropland', 'Urban', 'Bare Soil', 'Wetland']
CLASS_COLORS = ['#2166ac', '#1b7837', '#a6d96a', '#d73027', '#f4a582', '#80cdc1']

def load_enmap_data(data_path, labels_path):
    """Load EnMAP or AVIRIS hyperspectral GeoTIFF data."""
    import rasterio

    with rasterio.open(data_path) as src:
        data = src.read()  # (bands, height, width)
        data = np.transpose(data, (1, 2, 0))  # -> (height, width, bands)

    with rasterio.open(labels_path) as src:
        labels = src.read(1)  # (height, width)

    # Get unique classes
    unique_classes = np.unique(labels)
    unique_classes = unique_classes[unique_classes > 0]  # drop background/nodata

    # Remap to 0-indexed
    label_map = np.zeros_like(labels)
    class_names = [f"Class {i}" for i in unique_classes]
    for new_idx, old_idx in enumerate(unique_classes):
        label_map[labels == old_idx] = new_idx

    wavelengths = np.linspace(420, 2450, data.shape[2])
    return data, label_map, class_names, wavelengths


def generate_synthetic_scene(height, width, n_bands, n_classes, class_names):
    """
    Generate synthetic hyperspectral data with physically-motivated
    spectral signatures for common land cover types.

    Designed to produce realistic classification difficulty — some classes
    are spectrally similar (forest/cropland, water/wetland), creating the
    kind of confusion you see with real satellite data.
    """
    wavelengths = np.linspace(420, 2450, n_bands)  # EnMAP spectral range in nm
    wl = wavelengths  # shorthand

    # --- Define spectral signatures based on known reflectance characteristics ---
    signatures = np.zeros((n_classes, n_bands))

    # Water: Low overall, drops after ~700nm, near zero in SWIR
    signatures[0] = (0.04 * np.exp(-((wl - 480) ** 2) / (2 * 80 ** 2)) +
                     0.02 * np.exp(-((wl - 560) ** 2) / (2 * 100 ** 2)) +
                     0.005 * np.exp(-((wl - 900) ** 2) / (2 * 300 ** 2)))
    signatures[0] = np.clip(signatures[0], 0.001, None)

    # Forest: Chlorophyll absorption, strong red edge, NIR plateau, water absorption
    veg_base = 0.03 + 0.02 * (wl - 420) / 2030
    chlorophyll_abs = 0.04 * np.exp(-((wl - 480) ** 2) / (2 * 30 ** 2))
    chlorophyll_red = 0.06 * np.exp(-((wl - 670) ** 2) / (2 * 25 ** 2))
    red_edge = 0.4 / (1 + np.exp(-(wl - 720) / 15))
    nir_plateau = 0.45 * np.exp(-((wl - 900) ** 2) / (2 * 400 ** 2))
    water_abs1 = 0.15 * np.exp(-((wl - 1400) ** 2) / (2 * 40 ** 2))
    water_abs2 = 0.15 * np.exp(-((wl - 1900) ** 2) / (2 * 50 ** 2))
    cellulose = 0.03 * np.exp(-((wl - 2100) ** 2) / (2 * 60 ** 2))
    signatures[1] = veg_base - chlorophyll_abs - chlorophyll_red + red_edge + nir_plateau - water_abs1 - water_abs2 - cellulose
    signatures[1] = np.clip(signatures[1], 0.01, 0.55)

    # Cropland: Very similar to forest — this is the realistic hard pair
    signatures[2] = signatures[1] * 0.85 + 0.02
    signatures[2][wl > 1300] *= 0.88
    signatures[2] = np.clip(signatures[2], 0.02, 0.45)

    # Urban: Relatively flat, moderate reflectance
    urban_base = 0.12 + 0.05 * (wl - 420) / 2030
    urban_variation = 0.02 * np.sin(wl / 200)
    signatures[3] = urban_base + urban_variation
    signatures[3] = np.clip(signatures[3], 0.08, 0.25)

    # Bare Soil: Gradually increasing, iron absorption features
    soil_base = 0.08 + 0.15 * (wl - 420) / 2030
    iron_abs = 0.04 * np.exp(-((wl - 900) ** 2) / (2 * 80 ** 2))
    clay_abs = 0.03 * np.exp(-((wl - 2200) ** 2) / (2 * 40 ** 2))
    signatures[4] = soil_base - iron_abs - clay_abs
    signatures[4] = np.clip(signatures[4], 0.05, 0.30)

    # Wetland: Mix of water and vegetation — should confuse with both
    signatures[5] = 0.35 * signatures[0] + 0.65 * signatures[1]
    signatures[5] = np.clip(signatures[5], 0.01, 0.40)

    # --- Create spatial label map ---
    label_map = np.zeros((height, width), dtype=int)

    label_map[10:35, 20:180] = 0   # river
    label_map[50:90, 120:175] = 0  # lake

    label_map[40:100, 10:110] = 1
    label_map[130:190, 10:70] = 1

    label_map[100:130, 10:100] = 2
    label_map[100:150, 100:180] = 2

    label_map[150:195, 80:180] = 3

    label_map[0:10, :] = 4
    label_map[130:155, 70:100] = 4

    label_map[35:50, 20:120] = 5
    label_map[90:100, 120:180] = 5

    from scipy.ndimage import gaussian_filter
    for cls in range(n_classes):
        mask = (label_map == cls).astype(float)
        smoothed = gaussian_filter(mask, sigma=2)
        label_map[smoothed > 0.7] = cls

    # --- Generate pixel spectra with AGGRESSIVE noise for realistic difficulty ---
    data = np.zeros((height, width, n_bands), dtype=np.float32)
    from scipy.ndimage import gaussian_filter1d

    # Sub-class definitions: list of (weight_offset_from_base, noise_scale) tuples
    # Some sub-classes deliberately overlap with neighboring classes
    sub_class_defs = {
        0: [  # Water sub-types
            (0.0, 0.012),                  # clear water (close to base)
            (0.015, 0.015),                # turbid water (brighter, noisier)
            (-0.005, 0.010),               # deep water (darker)
        ],
        1: [  # Forest sub-types
            (0.0, 0.04),                   # mixed forest
            (0.03, 0.05),                  # dense broadleaf (brighter NIR)
            (-0.03, 0.045),                # sparse/stressed (shifts toward cropland!)
            (-0.05, 0.05),                 # very sparse canopy (significant overlap with cropland)
        ],
        2: [  # Cropland sub-types
            (0.0, 0.045),                  # healthy crops
            (0.02, 0.05),                  # mature/senescent (shifts toward forest)
            (-0.02, 0.04),                 # young/fallow (lower reflectance)
            (0.04, 0.055),                 # mixed vegetation (high overlap with forest)
        ],
        3: [  # Urban sub-types
            (0.0, 0.035),                  # concrete/asphalt
            (0.03, 0.04),                  # bright rooftops
            (-0.03, 0.04),                 # dark surfaces (overlap with bare soil)
        ],
        4: [  # Bare Soil sub-types
            (0.0, 0.03),                   # dry soil
            (-0.02, 0.035),                # moist soil (darker, toward urban)
            (0.025, 0.035),                # bright/sandy soil
        ],
        5: [  # Wetland sub-types
            (0.0, 0.04),                   # typical wetland
            (0.03, 0.045),                 # heavily vegetated (toward forest)
            (-0.02, 0.035),                # open water wetland (toward water)
        ],
    }

    for cls in range(n_classes):
        mask = label_map == cls
        n_pixels = mask.sum()
        if n_pixels == 0:
            continue

        base = signatures[cls]
        sub_defs = sub_class_defs[cls]
        n_sub = len(sub_defs)

        # Create sub-class centroids with spectrally-correlated offsets
        sub_centroids = np.zeros((n_sub, n_bands))
        for s, (offset, _) in enumerate(sub_defs):
            spectral_shift = np.random.randn(n_bands) * 0.03
            spectral_shift = gaussian_filter1d(spectral_shift, sigma=10)
            sub_centroids[s] = base + offset + spectral_shift

        # Assign pixels to sub-classes (weighted toward first/most common)
        weights = np.array([1.0 / (s + 1) for s in range(n_sub)])
        weights /= weights.sum()
        sub_assignments = np.random.choice(n_sub, size=n_pixels, p=weights)

        # Generate spectra
        pixel_spectra = np.zeros((n_pixels, n_bands), dtype=np.float32)
        for i in range(n_pixels):
            s = sub_assignments[i]
            noise_scale = sub_defs[s][1]
            noise = np.random.randn(n_bands) * noise_scale
            noise = gaussian_filter1d(noise, sigma=2)
            pixel_spectra[i] = sub_centroids[s] + noise

        # Multiplicative brightness variation (illumination + topography)
        brightness = 0.7 + 0.6 * np.random.rand(n_pixels, 1)
        pixel_spectra *= brightness

        data[mask] = pixel_spectra

    # Global sensor noise
    sensor_noise = np.random.randn(height, width, n_bands).astype(np.float32) * 0.008
    data += sensor_noise

    # Mixed pixels at class boundaries (3-pixel wide transition zone)
    from scipy.ndimage import uniform_filter
    for b in range(n_bands):
        boundary_smooth = uniform_filter(data[:, :, b], size=5)
        boundary_mask = np.zeros((height, width), dtype=bool)
        for d in range(1, 3):
            if d < height:
                boundary_mask[d:, :] |= (label_map[d:, :] != label_map[:-d, :])
            if d < width:
                boundary_mask[:, d:] |= (label_map[:, d:] != label_map[:, :-d])
        mix_factor = 0.4 + 0.2 * np.random.rand(*boundary_mask.shape)
        data[boundary_mask, b] = (mix_factor[boundary_mask] * data[boundary_mask, b] +
                                  (1 - mix_factor[boundary_mask]) * boundary_smooth[boundary_mask])

    data = np.clip(data, 0.0, 1.0)
    return data, label_map, class_names, wavelengths


# --- Load or generate data ---
if DATA_MODE == "synthetic":
    print("Generating synthetic hyperspectral scene...")
    data, labels, class_names, wavelengths = generate_synthetic_scene(
        HEIGHT, WIDTH, N_BANDS, N_CLASSES, CLASS_NAMES)
    print(f"  Scene: {data.shape[0]}x{data.shape[1]} pixels, {data.shape[2]} bands")
    print(f"  Wavelength range: {wavelengths[0]:.0f}–{wavelengths[-1]:.0f} nm")
    print(f"  Classes: {class_names}")
else:
    print(f"Loading {DATA_MODE.upper()} data from {DATA_PATH}...")
    data, labels, class_names, wavelengths = load_enmap_data(DATA_PATH, LABELS_PATH)
    N_CLASSES = len(class_names)
    CLASS_NAMES = class_names
    HEIGHT, WIDTH = data.shape[:2]
    N_BANDS = data.shape[2]
    print(f"  Scene: {HEIGHT}x{WIDTH} pixels, {N_BANDS} bands")
    print(f"  Classes: {class_names}")

print(f"\nPixels per class:")
for i, name in enumerate(CLASS_NAMES):
    count = (labels == i).sum()
    print(f"  {name}: {count:,} ({100 * count / labels.size:.1f}%)")

Generating synthetic hyperspectral scene...
  Scene: 200x200 pixels, 224 bands
  Wavelength range: 420–2450 nm
  Classes: ['Water', 'Forest', 'Cropland', 'Urban', 'Bare Soil', 'Wetland']

Pixels per class:
  Water: 15,350 (38.4%)
  Forest: 8,700 (21.8%)
  Cropland: 6,700 (16.8%)
  Urban: 4,400 (11.0%)
  Bare Soil: 2,750 (6.9%)
  Wetland: 2,100 (5.2%)

def find_nearest_band(wavelengths, target_nm):
    """Find the band index closest to a target wavelength."""
    return np.argmin(np.abs(wavelengths - target_nm))

# RGB composite (approximate true color)
r_idx = find_nearest_band(wavelengths, 650)  # Red
g_idx = find_nearest_band(wavelengths, 550)  # Green
b_idx = find_nearest_band(wavelengths, 450)  # Blue

# NIR false color composite (classic vegetation highlight)
nir_idx = find_nearest_band(wavelengths, 850)

fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# True color RGB
rgb = np.stack([data[:, :, r_idx], data[:, :, g_idx], data[:, :, b_idx]], axis=-1)
rgb = (rgb - rgb.min()) / (rgb.max() - rgb.min() + 1e-8)
axes[0].imshow(rgb)
axes[0].set_title(f'RGB Composite\n(R={wavelengths[r_idx]:.0f}, G={wavelengths[g_idx]:.0f}, B={wavelengths[b_idx]:.0f} nm)')
axes[0].axis('off')

# NIR false color
nir_fc = np.stack([data[:, :, nir_idx], data[:, :, r_idx], data[:, :, g_idx]], axis=-1)
nir_fc = (nir_fc - nir_fc.min()) / (nir_fc.max() - nir_fc.min() + 1e-8)
axes[1].imshow(nir_fc)
axes[1].set_title(f'NIR False Color\n(NIR={wavelengths[nir_idx]:.0f}, R={wavelengths[r_idx]:.0f}, G={wavelengths[g_idx]:.0f} nm)')
axes[1].axis('off')

# Ground truth labels
cmap = mcolors.ListedColormap(CLASS_COLORS[:N_CLASSES])
bounds = np.arange(N_CLASSES + 1) - 0.5
norm = mcolors.BoundaryNorm(bounds, cmap.N)
im = axes[2].imshow(labels, cmap=cmap, norm=norm)
axes[2].set_title('Ground Truth Labels')
axes[2].axis('off')
cbar = plt.colorbar(im, ax=axes[2], ticks=range(N_CLASSES), shrink=0.8)
cbar.ax.set_yticklabels(CLASS_NAMES)

plt.tight_layout()
plt.show()

# Plot mean spectral signatures per class
fig, ax = plt.subplots(figsize=(12, 6))

for i, (name, color) in enumerate(zip(CLASS_NAMES, CLASS_COLORS)):
    mask = labels == i
    if mask.sum() == 0:
        continue
    spectra = data[mask]
    mean_spectrum = spectra.mean(axis=0)
    std_spectrum = spectra.std(axis=0)

    ax.plot(wavelengths, mean_spectrum, color=color, label=name, linewidth=2)
    ax.fill_between(wavelengths, mean_spectrum - std_spectrum, mean_spectrum + std_spectrum,
                    color=color, alpha=0.15)

# Mark key spectral features
ax.axvline(x=670, color='gray', linestyle=':', alpha=0.5, linewidth=0.8)
ax.text(675, ax.get_ylim()[1] * 0.95, 'Chlorophyll\nabsorption', fontsize=7, color='gray')
ax.axvline(x=720, color='gray', linestyle=':', alpha=0.5, linewidth=0.8)
ax.text(725, ax.get_ylim()[1] * 0.95, 'Red\nedge', fontsize=7, color='gray')
ax.axvline(x=1400, color='gray', linestyle=':', alpha=0.5, linewidth=0.8)
ax.text(1405, ax.get_ylim()[1] * 0.95, 'Water\nabs.', fontsize=7, color='gray')
ax.axvline(x=1900, color='gray', linestyle=':', alpha=0.5, linewidth=0.8)
ax.text(1905, ax.get_ylim()[1] * 0.95, 'Water\nabs.', fontsize=7, color='gray')

ax.set_xlabel('Wavelength (nm)', fontsize=12)
ax.set_ylabel('Reflectance', fontsize=12)
ax.set_title('Mean Spectral Signatures by Land Cover Class', fontsize=14)
ax.legend(loc='upper right', fontsize=10)
plt.tight_layout()
plt.show()

print("\nThis is why hyperspectral data is powerful — you can see how different")
print("materials have distinct spectral 'fingerprints' across the full wavelength range.")
print("The question is: can a neural network learn these patterns better than a")
print("Random Forest that sees the raw values?")

This is why hyperspectral data is powerful — you can see how different
materials have distinct spectral 'fingerprints' across the full wavelength range.
The question is: can a neural network learn these patterns better than a
Random Forest that sees the raw values?

# Reshape to (n_pixels, n_bands)
X = data.reshape(-1, N_BANDS)
y = labels.reshape(-1)

# Remove any nodata pixels (if applicable)
valid_mask = np.all(np.isfinite(X), axis=1) & (y >= 0)
X = X[valid_mask]
y = y[valid_mask]

print(f"Total valid pixels: {len(X):,}")

# Stratified train/test split (70/30)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=SEED, stratify=y)

print(f"Training pixels:    {len(X_train):,}")
print(f"Test pixels:        {len(X_test):,}")

# Normalize (fit on train, transform both)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"\nFeature scaling: mean={X_train_scaled.mean():.4f}, std={X_train_scaled.std():.4f}")

Total valid pixels: 40,000
Training pixels:    28,000
Test pixels:        12,000

Feature scaling: mean=0.0000, std=1.0000

# Train Random Forest
print("Training Random Forest...")
t0 = time.time()

rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    min_samples_split=5,
    min_samples_leaf=2,
    max_features='sqrt',
    n_jobs=-1,
    random_state=SEED
)
rf.fit(X_train_scaled, y_train)
rf_train_time = time.time() - t0
print(f"Training time: {rf_train_time:.2f}s")

# Predict
t0 = time.time()
rf_pred = rf.predict(X_test_scaled)
rf_pred_time = time.time() - t0

rf_accuracy = accuracy_score(y_test, rf_pred)
rf_kappa = cohen_kappa_score(y_test, rf_pred)

print(f"Prediction time: {rf_pred_time:.4f}s")
print(f"\nOverall Accuracy: {rf_accuracy:.4f} ({rf_accuracy*100:.2f}%)")
print(f"Cohen's Kappa:    {rf_kappa:.4f}")
print(f"\n{classification_report(y_test, rf_pred, target_names=CLASS_NAMES)}")

Training Random Forest...
Training time: 1.58s
Prediction time: 0.0925s

Overall Accuracy: 0.9698 (96.98%)
Cohen's Kappa:    0.9600

              precision    recall  f1-score   support

       Water       1.00      1.00      1.00      4605
      Forest       0.95      0.99      0.97      2610
    Cropland       0.92      0.98      0.95      2010
       Urban       1.00      1.00      1.00      1320
   Bare Soil       1.00      1.00      1.00       825
     Wetland       0.90      0.54      0.68       630

    accuracy                           0.97     12000
   macro avg       0.96      0.92      0.93     12000
weighted avg       0.97      0.97      0.97     12000

# RF confusion matrix + classification map
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Confusion matrix
cm = confusion_matrix(y_test, rf_pred, normalize='true')
sns.heatmap(cm, annot=True, fmt='.2f', cmap='Blues', xticklabels=CLASS_NAMES,
            yticklabels=CLASS_NAMES, ax=axes[0], cbar=False)
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('True')
axes[0].set_title('Random Forest — Normalized Confusion Matrix')

# Classification map (predict on full scene)
t0 = time.time()
X_full_scaled = scaler.transform(data.reshape(-1, N_BANDS))
rf_map = rf.predict(X_full_scaled).reshape(HEIGHT, WIDTH)
rf_full_time = time.time() - t0

cmap = mcolors.ListedColormap(CLASS_COLORS[:N_CLASSES])
bounds = np.arange(N_CLASSES + 1) - 0.5
norm = mcolors.BoundaryNorm(bounds, cmap.N)
im = axes[1].imshow(rf_map, cmap=cmap, norm=norm)
axes[1].set_title(f'Random Forest Classification Map\nOA={rf_accuracy*100:.1f}%')
axes[1].axis('off')
cbar = plt.colorbar(im, ax=axes[1], ticks=range(N_CLASSES), shrink=0.8)
cbar.ax.set_yticklabels(CLASS_NAMES)

plt.tight_layout()
plt.show()

# Feature importance — which wavelengths matter most?
fig, ax = plt.subplots(figsize=(12, 4))
importances = rf.feature_importances_
ax.fill_between(wavelengths, importances, alpha=0.5, color='steelblue')
ax.plot(wavelengths, importances, color='steelblue', linewidth=1)
ax.set_xlabel('Wavelength (nm)')
ax.set_ylabel('Feature Importance')
ax.set_title('Random Forest — Which Wavelengths Drive Classification?')
plt.tight_layout()
plt.show()

print(f"\nFull scene prediction time: {rf_full_time:.3f}s")
print(f"Top 5 most important wavelengths:")
top_idx = np.argsort(importances)[-5:][::-1]
for idx in top_idx:
    print(f"  {wavelengths[idx]:.0f} nm (band {idx}) — importance: {importances[idx]:.4f}")

Full scene prediction time: 0.126s
Top 5 most important wavelengths:
  1922 nm (band 165) — importance: 0.0302
  1913 nm (band 164) — importance: 0.0285
  1877 nm (band 160) — importance: 0.0282
  1831 nm (band 155) — importance: 0.0220
  1904 nm (band 163) — importance: 0.0204

class SpectralCNN(nn.Module):
    """1D CNN for pixel-wise spectral classification.

    Treats each pixel's spectral signature as a 1D signal
    and learns spectral features via convolution.
    """
    def __init__(self, n_bands, n_classes):
        super().__init__()

        self.features = nn.Sequential(
            # Block 1: wide kernel to capture broad spectral features
            nn.Conv1d(1, 32, kernel_size=7, padding=3),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2),

            # Block 2: medium kernel for intermediate features
            nn.Conv1d(32, 64, kernel_size=5, padding=2),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2),

            # Block 3: narrow kernel for fine spectral details
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),

            # Global average pooling — collapse spatial dimension
            nn.AdaptiveAvgPool1d(1)
        )

        self.classifier = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, n_classes)
        )

    def forward(self, x):
        # x shape: (batch, n_bands)
        x = x.unsqueeze(1)  # -> (batch, 1, n_bands) — single "channel"
        x = self.features(x)
        x = x.squeeze(-1)   # -> (batch, 128)
        return self.classifier(x)

model = SpectralCNN(N_BANDS, N_CLASSES).to(DEVICE)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model architecture:\n{model}")
print(f"\nTotal parameters:     {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Model architecture:
SpectralCNN(
  (features): Sequential(
    (0): Conv1d(1, 32, kernel_size=(7,), stride=(1,), padding=(3,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv1d(32, 64, kernel_size=(5,), stride=(1,), padding=(2,))
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (9): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): AdaptiveAvgPool1d(output_size=1)
  )
  (classifier): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=64, out_features=6, bias=True)
  )
)

Total parameters:     44,358
Trainable parameters: 44,358

# Prepare PyTorch datasets
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.LongTensor(y_test)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

BATCH_SIZE = 256
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE * 2, shuffle=False)

# Training config
N_EPOCHS = 30
LR = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=N_EPOCHS)

# Training loop
print(f"Training for {N_EPOCHS} epochs (batch size={BATCH_SIZE}, lr={LR})...")
print("-" * 60)

train_losses = []
train_accs = []
test_accs = []

t0 = time.time()

for epoch in range(N_EPOCHS):
    # Train
    model.train()
    epoch_loss = 0
    correct = 0
    total = 0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * X_batch.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(y_batch).sum().item()
        total += y_batch.size(0)

    scheduler.step()
    train_loss = epoch_loss / total
    train_acc = correct / total
    train_losses.append(train_loss)
    train_accs.append(train_acc)

    # Evaluate
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
            outputs = model(X_batch)
            _, predicted = outputs.max(1)
            correct += predicted.eq(y_batch).sum().item()
            total += y_batch.size(0)
    test_acc = correct / total
    test_accs.append(test_acc)

    if (epoch + 1) % 5 == 0 or epoch == 0:
        print(f"Epoch {epoch+1:3d}/{N_EPOCHS} | "
              f"Loss: {train_loss:.4f} | "
              f"Train Acc: {train_acc*100:.2f}% | "
              f"Test Acc: {test_acc*100:.2f}%")

cnn_train_time = time.time() - t0
print("-" * 60)
print(f"Total training time: {cnn_train_time:.2f}s")

Training for 30 epochs (batch size=256, lr=0.001)...
------------------------------------------------------------
Epoch   1/30 | Loss: 0.6596 | Train Acc: 73.00% | Test Acc: 75.48%
Epoch   5/30 | Loss: 0.2642 | Train Acc: 89.31% | Test Acc: 88.95%
Epoch  10/30 | Loss: 0.2063 | Train Acc: 91.67% | Test Acc: 74.01%
Epoch  15/30 | Loss: 0.1890 | Train Acc: 92.43% | Test Acc: 77.91%
Epoch  20/30 | Loss: 0.1653 | Train Acc: 93.42% | Test Acc: 92.61%
Epoch  25/30 | Loss: 0.1564 | Train Acc: 93.80% | Test Acc: 93.88%
Epoch  30/30 | Loss: 0.1532 | Train Acc: 93.90% | Test Acc: 94.17%
------------------------------------------------------------
Total training time: 8.51s

# Training curves
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(train_losses, color='#d73027', linewidth=1.5)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training Loss')

axes[1].plot(train_accs, label='Train', color='#2166ac', linewidth=1.5)
axes[1].plot(test_accs, label='Test', color='#d73027', linewidth=1.5)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Accuracy over Training')
axes[1].legend()
axes[1].set_ylim([0.5, 1.02])

plt.tight_layout()
plt.show()

# Final evaluation
model.eval()
all_preds = []
t0 = time.time()

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(DEVICE)
        outputs = model(X_batch)
        _, predicted = outputs.max(1)
        all_preds.extend(predicted.cpu().numpy())

cnn_pred = np.array(all_preds)
cnn_pred_time = time.time() - t0

cnn_accuracy = accuracy_score(y_test, cnn_pred)
cnn_kappa = cohen_kappa_score(y_test, cnn_pred)

print(f"Overall Accuracy: {cnn_accuracy:.4f} ({cnn_accuracy*100:.2f}%)")
print(f"Cohen's Kappa:    {cnn_kappa:.4f}")
print(f"Prediction time:  {cnn_pred_time:.4f}s")
print(f"\n{classification_report(y_test, cnn_pred, target_names=CLASS_NAMES)}")

Overall Accuracy: 0.9417 (94.17%)
Cohen's Kappa:    0.9227
Prediction time:  0.0767s

              precision    recall  f1-score   support

       Water       1.00      1.00      1.00      4605
      Forest       0.93      0.95      0.94      2610
    Cropland       0.80      0.94      0.86      2010
       Urban       1.00      1.00      1.00      1320
   Bare Soil       1.00      1.00      1.00       825
     Wetland       0.86      0.32      0.47       630

    accuracy                           0.94     12000
   macro avg       0.93      0.87      0.88     12000
weighted avg       0.94      0.94      0.94     12000

# CNN confusion matrix + classification map
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Confusion matrix
cm_cnn = confusion_matrix(y_test, cnn_pred, normalize='true')
sns.heatmap(cm_cnn, annot=True, fmt='.2f', cmap='Reds', xticklabels=CLASS_NAMES,
            yticklabels=CLASS_NAMES, ax=axes[0], cbar=False)
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('True')
axes[0].set_title('1D CNN — Normalized Confusion Matrix')

# Full scene classification map
t0 = time.time()
model.eval()
X_full_tensor = torch.FloatTensor(X_full_scaled).to(DEVICE)
with torch.no_grad():
    # Process in batches to avoid memory issues
    cnn_map_flat = []
    for i in range(0, len(X_full_tensor), BATCH_SIZE * 4):
        batch = X_full_tensor[i:i + BATCH_SIZE * 4]
        outputs = model(batch)
        _, predicted = outputs.max(1)
        cnn_map_flat.extend(predicted.cpu().numpy())
cnn_map = np.array(cnn_map_flat).reshape(HEIGHT, WIDTH)
cnn_full_time = time.time() - t0

cmap = mcolors.ListedColormap(CLASS_COLORS[:N_CLASSES])
bounds = np.arange(N_CLASSES + 1) - 0.5
norm = mcolors.BoundaryNorm(bounds, cmap.N)
im = axes[1].imshow(cnn_map, cmap=cmap, norm=norm)
axes[1].set_title(f'1D CNN Classification Map\nOA={cnn_accuracy*100:.1f}%')
axes[1].axis('off')
cbar = plt.colorbar(im, ax=axes[1], ticks=range(N_CLASSES), shrink=0.8)
cbar.ax.set_yticklabels(CLASS_NAMES)

plt.tight_layout()
plt.show()

# Side-by-side classification maps
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

cmap = mcolors.ListedColormap(CLASS_COLORS[:N_CLASSES])
bounds = np.arange(N_CLASSES + 1) - 0.5
norm_cmap = mcolors.BoundaryNorm(bounds, cmap.N)

# Ground truth
im = axes[0].imshow(labels, cmap=cmap, norm=norm_cmap)
axes[0].set_title('Ground Truth', fontsize=14)
axes[0].axis('off')

# Random Forest
axes[1].imshow(rf_map, cmap=cmap, norm=norm_cmap)
axes[1].set_title(f'Random Forest\nOA={rf_accuracy*100:.1f}% | κ={rf_kappa:.3f}', fontsize=14)
axes[1].axis('off')

# 1D CNN
axes[2].imshow(cnn_map, cmap=cmap, norm=norm_cmap)
axes[2].set_title(f'1D CNN\nOA={cnn_accuracy*100:.1f}% | κ={cnn_kappa:.3f}', fontsize=14)
axes[2].axis('off')

# Shared colorbar
cbar = fig.colorbar(im, ax=axes, ticks=range(N_CLASSES), shrink=0.6,
                    orientation='horizontal', pad=0.08)
cbar.ax.set_xticklabels(CLASS_NAMES, fontsize=10)

plt.suptitle('Land Cover Classification Comparison', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()

# Detailed metrics comparison
kappa_label = "Cohen's Kappa"

print("=" * 65)
print("  COMPARISON SUMMARY")
print("=" * 65)
print(f"{'Metric':<30} {'Random Forest':>15} {'1D CNN':>15}")
print("-" * 65)
print(f"{'Overall Accuracy':<30} {rf_accuracy*100:>14.2f}% {cnn_accuracy*100:>14.2f}%")
print(f"{kappa_label:<30} {rf_kappa:>15.4f} {cnn_kappa:>15.4f}")
print(f"{'Training Time':<30} {rf_train_time:>14.2f}s {cnn_train_time:>14.2f}s")
print(f"{'Prediction Time (test set)':<30} {rf_pred_time:>14.4f}s {cnn_pred_time:>14.4f}s")
print(f"{'Full Scene Prediction':<30} {rf_full_time:>14.3f}s {cnn_full_time:>14.3f}s")
print("-" * 65)

# Per-class comparison
print(f"\n{'Per-Class Accuracy':}")
print(f"{'Class':<20} {'RF':>10} {'CNN':>10} {'Winner':>10}")
print("-" * 55)
rf_cm = confusion_matrix(y_test, rf_pred)
cnn_cm = confusion_matrix(y_test, cnn_pred)

rf_wins = 0
cnn_wins = 0
ties = 0

for i, name in enumerate(CLASS_NAMES):
    rf_class_acc = rf_cm[i, i] / rf_cm[i].sum() if rf_cm[i].sum() > 0 else 0
    cnn_class_acc = cnn_cm[i, i] / cnn_cm[i].sum() if cnn_cm[i].sum() > 0 else 0

    diff = cnn_class_acc - rf_class_acc
    if abs(diff) < 0.005:
        winner = "Tie"
        ties += 1
    elif diff > 0:
        winner = "CNN"
        cnn_wins += 1
    else:
        winner = "RF"
        rf_wins += 1

    print(f"{name:<20} {rf_class_acc*100:>9.1f}% {cnn_class_acc*100:>9.1f}% {winner:>10}")

print("-" * 55)
print(f"\nClass wins:  RF={rf_wins}  CNN={cnn_wins}  Tie={ties}")

# Compute cost comparison
print(f"\n{'Compute Cost':}")
speedup = cnn_train_time / rf_train_time if rf_train_time > 0 else float('inf')
print(f"CNN training took {speedup:.1f}x longer than RF")
print(f"Model parameters: RF has {rf.n_estimators} trees, CNN has {total_params:,} parameters")

=================================================================
  COMPARISON SUMMARY
=================================================================
Metric                           Random Forest          1D CNN
-----------------------------------------------------------------
Overall Accuracy                        96.98%          94.17%
Cohen's Kappa                           0.9600          0.9227
Training Time                            1.58s           8.51s
Prediction Time (test set)             0.0925s         0.0767s
Full Scene Prediction                   0.126s          0.071s
-----------------------------------------------------------------

Per-Class Accuracy
Class                        RF        CNN     Winner
-------------------------------------------------------
Water                    100.0%     100.0%        Tie
Forest                    98.8%      94.6%         RF
Cropland                  97.9%      93.6%         RF
Urban                     99.9%     100.0%        Tie
Bare Soil                100.0%     100.0%        Tie
Wetland                   54.4%      32.1%         RF
-------------------------------------------------------

Class wins:  RF=3  CNN=0  Tie=3

Compute Cost
CNN training took 5.4x longer than RF
Model parameters: RF has 200 trees, CNN has 44,358 parameters

# Where do the models disagree?
agreement = (rf_map == cnn_map)
disagreement_pct = 100 * (1 - agreement.mean())

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Agreement/disagreement map
axes[0].imshow(~agreement, cmap='RdYlGn_r', interpolation='nearest')
axes[0].set_title(f'Model Disagreement Map\n({disagreement_pct:.1f}% of pixels differ)', fontsize=13)
axes[0].axis('off')

# Where do errors cluster?
rf_errors = (rf_map != labels)
cnn_errors = (cnn_map != labels)
error_diff = cnn_errors.astype(int) - rf_errors.astype(int)
# -1 = CNN correct but RF wrong, 0 = both same, +1 = RF correct but CNN wrong
im = axes[1].imshow(error_diff, cmap='RdBu', vmin=-1, vmax=1)
axes[1].set_title('Error Difference\n(Blue = CNN better, Red = RF better)', fontsize=13)
axes[1].axis('off')
cbar = plt.colorbar(im, ax=axes[1], shrink=0.8, ticks=[-1, 0, 1])
cbar.ax.set_yticklabels(['CNN better', 'Same', 'RF better'])

plt.tight_layout()
plt.show()

# Where specifically does each model struggle?
print("Misclassified pixel analysis:")
print(f"  RF total errors:  {rf_errors.sum():,} pixels ({100*rf_errors.mean():.1f}%)")
print(f"  CNN total errors: {cnn_errors.sum():,} pixels ({100*cnn_errors.mean():.1f}%)")
print(f"  Both wrong:       {(rf_errors & cnn_errors).sum():,} pixels")
print(f"  Only RF wrong:    {(rf_errors & ~cnn_errors).sum():,} pixels")
print(f"  Only CNN wrong:   {(~rf_errors & cnn_errors).sum():,} pixels")

Misclassified pixel analysis:
  RF total errors:  373 pixels (0.9%)
  CNN total errors: 2,256 pixels (5.6%)
  Both wrong:       312 pixels
  Only RF wrong:    61 pixels
  Only CNN wrong:   1,944 pixels

Land Cover Classification from Hyperspectral Satellite Imagery: Does Deep Learning Actually Beat Random Forest?¶

The Data¶

About EnMAP¶

Exploring the Scene¶

Preprocessing¶

The Simple Approach: Random Forest¶

The Modern Approach: 1D Convolutional Neural Network¶

Architecture¶

The Honest Comparison¶

What I Took Away¶

What I'd Explore Next¶

Questions for the Community¶