import { defaultInventoryEnvConfig, skuIdToIndex } from "@/lib/inventoryRl/catalog";
import { buildInventoryObservation } from "@/lib/inventoryRl/features";
import type {
  EnvironmentContext,
  ExpertPolicyEvaluation,
  InventoryDatasetRow,
  InventoryEnvConfig,
  SkuDefinition,
  TrainingSample,
  TrainingTransition,
  ValidationError,
  ValidationResult,
} from "@/lib/inventoryRl/types";

const REQUIRED_COLUMNS = [
  "outlet_id",
  "date",
  "hour",
  "sku_id",
  "demand",
  "sales",
  "inventory_start_of_hour",
  "lost_sales",
  "is_weekend",
  "is_holiday",
  "temp",
  "precip",
] as const;

const FIELD_ALIASES: Record<string, keyof InventoryDatasetRow> = {
  outletid: "outletId",
  outlet_id: "outletId",
  date: "date",
  hour: "hour",
  skuid: "skuId",
  sku_id: "skuId",
  demand: "demand",
  sales: "sales",
  inventorystartofhour: "inventoryStartOfHour",
  inventory_start_of_hour: "inventoryStartOfHour",
  inventoryendofhour: "inventoryEndOfHour",
  inventory_end_of_hour: "inventoryEndOfHour",
  lostsales: "lostSales",
  lost_sales: "lostSales",
  isweekend: "isWeekend",
  is_weekend: "isWeekend",
  isholiday: "isHoliday",
  is_holiday: "isHoliday",
  temp: "temp",
  precip: "precip",
  weekday: "weekday",
  ispickuphour: "isPickupHour",
  is_pickup_hour: "isPickupHour",
  ishighdemandhour: "isHighDemandHour",
  is_high_demand_hour: "isHighDemandHour",
  isslowhour: "isSlowHour",
  is_slow_hour: "isSlowHour",
  capacity: "capacity",
  orderqty: "orderQty",
  order_qty: "orderQty",
};

type ParsedOutput = {
  rows: InventoryDatasetRow[];
  validation: ValidationResult;
};

function canonical(name: string): string {
  return name.trim().toLowerCase().replace(/[^a-z0-9_]/g, "");
}

function parseCsvLine(line: string): string[] {
  const result: string[] = [];
  let current = "";
  let inQuotes = false;
  for (let i = 0; i < line.length; i++) {
    const c = line[i];
    const next = line[i + 1];
    if (c === '"' && inQuotes && next === '"') {
      current += '"';
      i += 1;
      continue;
    }
    if (c === '"') {
      inQuotes = !inQuotes;
      continue;
    }
    if (c === "," && !inQuotes) {
      result.push(current);
      current = "";
      continue;
    }
    current += c;
  }
  result.push(current);
  return result.map((v) => v.trim());
}

function toNumber(value: string, fallback = 0): number {
  const n = Number(value);
  return Number.isFinite(n) ? n : fallback;
}

function toBoolean(value: string): boolean {
  const lowered = value.trim().toLowerCase();
  return lowered === "1" || lowered === "true" || lowered === "yes" || lowered === "y";
}

function toIsoDate(value: string): string {
  const d = new Date(value);
  if (Number.isNaN(d.getTime())) {
    return value;
  }
  return d.toISOString().slice(0, 10);
}

function inferWeekday(isoDate: string): number {
  const d = new Date(isoDate);
  if (Number.isNaN(d.getTime())) return 0;
  const jsDay = d.getUTCDay();
  return (jsDay + 6) % 7;
}

function parseRecord(record: Record<string, string>, rowNumber: number): { row?: InventoryDatasetRow; errors: ValidationError[] } {
  const errors: ValidationError[] = [];
  const mapped: Partial<InventoryDatasetRow> = {};

  for (const [rawKey, rawValue] of Object.entries(record)) {
    const alias = FIELD_ALIASES[canonical(rawKey)];
    if (!alias) continue;
    const value = rawValue.trim();
    if (alias === "date") mapped.date = toIsoDate(value);
    else if (alias === "skuId") mapped.skuId = value;
    else if (alias === "isWeekend" || alias === "isHoliday" || alias === "isPickupHour" || alias === "isHighDemandHour" || alias === "isSlowHour") {
      mapped[alias] = toBoolean(value);
    } else if (alias === "outletId" || alias === "hour" || alias === "weekday" || alias === "capacity") {
      mapped[alias] = Math.trunc(toNumber(value));
    } else {
      mapped[alias] = toNumber(value);
    }
  }

  if (mapped.outletId === undefined) errors.push({ row: rowNumber, field: "outlet_id", message: "missing outlet_id" });
  if (!mapped.date) errors.push({ row: rowNumber, field: "date", message: "missing date" });
  if (mapped.hour === undefined) errors.push({ row: rowNumber, field: "hour", message: "missing hour" });
  if (!mapped.skuId) errors.push({ row: rowNumber, field: "sku_id", message: "missing sku_id" });

  if (errors.length > 0) return { errors };

  return {
    row: {
      outletId: mapped.outletId ?? 0,
      date: mapped.date ?? "",
      hour: mapped.hour ?? 0,
      skuId: mapped.skuId ?? "",
      demand: mapped.demand ?? 0,
      sales: mapped.sales ?? 0,
      inventoryStartOfHour: mapped.inventoryStartOfHour ?? 0,
      inventoryEndOfHour: mapped.inventoryEndOfHour,
      lostSales: mapped.lostSales ?? 0,
      isWeekend: mapped.isWeekend ?? false,
      isHoliday: mapped.isHoliday ?? false,
      temp: mapped.temp ?? 60,
      precip: mapped.precip ?? 0,
      weekday: mapped.weekday,
      isPickupHour: mapped.isPickupHour,
      isHighDemandHour: mapped.isHighDemandHour,
      isSlowHour: mapped.isSlowHour,
      capacity: mapped.capacity,
      orderQty: mapped.orderQty,
    },
    errors,
  };
}

function validateRows(rows: InventoryDatasetRow[], skus: SkuDefinition[]): ValidationResult {
  const errors: ValidationError[] = [];
  const warnings: string[] = [];
  const skuSet = new Set<string>(skus.map((s) => s.id));

  for (let i = 0; i < rows.length; i++) {
    const row = rows[i];
    const rowNum = i + 2;
    if (!skuSet.has(row.skuId)) {
      errors.push({ row: rowNum, field: "sku_id", message: `unknown sku_id '${row.skuId}'` });
    }
    if (row.hour < 0 || row.hour > 23) {
      errors.push({ row: rowNum, field: "hour", message: "hour must be in [0,23]" });
    }
    if (row.demand < 0 || row.sales < 0 || row.inventoryStartOfHour < 0 || row.lostSales < 0) {
      errors.push({ row: rowNum, field: "values", message: "demand/sales/inventory/lost_sales must be non-negative" });
    }
    if (row.sales > row.demand + 1e-9) {
      warnings.push(`row ${rowNum}: sales > demand; keeping row but review data quality`);
    }
    if (row.lostSales > row.demand + 1e-9) {
      warnings.push(`row ${rowNum}: lost_sales > demand; keeping row but review data quality`);
    }
  }

  if (rows.length === 0) {
    warnings.push("dataset is empty after parsing");
  }

  return { valid: errors.length === 0, errors, warnings };
}

function parseCsvToRecords(csvText: string): Record<string, string>[] {
  const lines = csvText.replace(/\r\n/g, "\n").replace(/\r/g, "\n").split("\n").filter((line) => line.trim().length > 0);
  if (lines.length === 0) return [];
  const headers = parseCsvLine(lines[0] ?? "");
  const output: Record<string, string>[] = [];
  for (let i = 1; i < lines.length; i++) {
    const values = parseCsvLine(lines[i] ?? "");
    const row: Record<string, string> = {};
    for (let j = 0; j < headers.length; j++) {
      row[headers[j] ?? ""] = values[j] ?? "";
    }
    output.push(row);
  }
  return output;
}

export function parseInventoryDatasetCsv(csvText: string, configRaw: Partial<InventoryEnvConfig> = {}): ParsedOutput {
  const config = { ...defaultInventoryEnvConfig(), ...configRaw };
  const records = parseCsvToRecords(csvText);
  const rows: InventoryDatasetRow[] = [];
  const errors: ValidationError[] = [];

  for (let i = 0; i < records.length; i++) {
    const parsed = parseRecord(records[i] ?? {}, i + 2);
    if (parsed.errors.length > 0) {
      errors.push(...parsed.errors);
      continue;
    }
    if (parsed.row) rows.push(parsed.row);
  }

  const result = validateRows(rows, config.skus);
  return {
    rows: result.valid ? rows : [],
    validation: {
      valid: errors.length === 0 && result.valid,
      errors: [...errors, ...result.errors],
      warnings: result.warnings,
    },
  };
}

export function parseInventoryDatasetJson(
  json: unknown,
  configRaw: Partial<InventoryEnvConfig> = {}
): ParsedOutput {
  const config = { ...defaultInventoryEnvConfig(), ...configRaw };
  if (!Array.isArray(json)) {
    return {
      rows: [],
      validation: {
        valid: false,
        errors: [{ row: 1, field: "json", message: "expected an array of row objects" }],
        warnings: [],
      },
    };
  }

  const rows: InventoryDatasetRow[] = [];
  const parseErrors: ValidationError[] = [];
  for (let i = 0; i < json.length; i++) {
    const item = json[i];
    if (typeof item !== "object" || item === null) {
      parseErrors.push({ row: i + 2, field: "row", message: "row must be an object" });
      continue;
    }
    const record: Record<string, string> = {};
    for (const [k, v] of Object.entries(item)) {
      record[k] = String(v ?? "");
    }
    const parsed = parseRecord(record, i + 2);
    if (parsed.errors.length > 0) {
      parseErrors.push(...parsed.errors);
      continue;
    }
    if (parsed.row) rows.push(parsed.row);
  }

  const result = validateRows(rows, config.skus);
  return {
    rows: result.valid ? rows : [],
    validation: {
      valid: parseErrors.length === 0 && result.valid,
      errors: [...parseErrors, ...result.errors],
      warnings: result.warnings,
    },
  };
}

export function getIngestionRequiredColumns(): string[] {
  return [...REQUIRED_COLUMNS];
}

type HourAggregate = {
  outletId: number;
  date: string;
  hour: number;
  weekday: number;
  isWeekend: boolean;
  isHoliday: boolean;
  temp: number;
  precip: number;
  capacity: number;
  inventoryStart: number[];
  demand: number[];
  sales: number[];
  lostSales: number[];
  orderObserved: number[];
};

type OutletRollingSignals = {
  sales7d: number[];
  lost7d: number[];
  waste7d: number[];
};

function clamp(v: number, min: number, max: number): number {
  if (!Number.isFinite(v)) return min;
  return Math.max(min, Math.min(max, v));
}

function clampInt(v: number, min: number, max: number): number {
  return Math.trunc(clamp(v, min, max));
}

function normalizeOfflineAction(v: number, minOrder: number, maxOrderPerSku: number): number {
  const clipped = clampInt(v, 0, maxOrderPerSku);
  if (clipped <= 0) return 0;
  return Math.max(minOrder, clipped);
}

function buildHourlyAggregates(
  rows: InventoryDatasetRow[],
  config: InventoryEnvConfig
): { byDay: Map<string, HourAggregate[]>; rollingSignalsByOutlet: Map<number, OutletRollingSignals> } {
  const skus = config.skus;
  const skuIndex = skuIdToIndex(skus);
  const grouped = new Map<string, HourAggregate>();
  const outletTotals = new Map<number, { sales: number[]; lost: number[]; waste: number[] }>();
  const outletDates = new Map<number, Set<string>>();

  for (const row of rows) {
    const key = `${row.outletId}|${row.date}|${row.hour}`;
    let agg = grouped.get(key);
    if (!agg) {
      agg = {
        outletId: row.outletId,
        date: row.date,
        hour: row.hour,
        weekday: row.weekday ?? inferWeekday(row.date),
        isWeekend: row.isWeekend,
        isHoliday: row.isHoliday,
        temp: row.temp,
        precip: row.precip,
        capacity: row.capacity ?? config.capacity,
        inventoryStart: new Array<number>(skus.length).fill(0),
        demand: new Array<number>(skus.length).fill(0),
        sales: new Array<number>(skus.length).fill(0),
        lostSales: new Array<number>(skus.length).fill(0),
        orderObserved: new Array<number>(skus.length).fill(NaN),
      };
      grouped.set(key, agg);
    }

    const totals = outletTotals.get(row.outletId) ?? {
      sales: new Array<number>(skus.length).fill(0),
      lost: new Array<number>(skus.length).fill(0),
      waste: new Array<number>(skus.length).fill(0),
    };
    const idx = skuIndex.get(row.skuId);
    if (idx !== undefined) {
      const sales = Math.max(0, row.sales);
      const lost = Math.max(0, row.lostSales);
      const invStart = Math.max(0, row.inventoryStartOfHour);
      const invEnd = Math.max(0, row.inventoryEndOfHour ?? 0);
      // If end inventory is available, use inventory flow proxy; otherwise use spoilage proxy on residual stock.
      const wasteProxy =
        row.inventoryEndOfHour !== undefined ? Math.max(0, invStart - sales - invEnd) : Math.max(0, invStart - sales) * config.wasteRatePerHour;
      totals.sales[idx] += sales;
      totals.lost[idx] += lost;
      totals.waste[idx] += wasteProxy;
      agg.inventoryStart[idx] = Math.max(0, row.inventoryStartOfHour);
      agg.demand[idx] += Math.max(0, row.demand);
      agg.sales[idx] += sales;
      agg.lostSales[idx] += lost;
      if (Number.isFinite(row.orderQty ?? NaN)) {
        agg.orderObserved[idx] = Math.max(0, row.orderQty ?? 0);
      }
    }
    outletTotals.set(row.outletId, totals);

    const dateSet = outletDates.get(row.outletId) ?? new Set<string>();
    dateSet.add(row.date);
    outletDates.set(row.outletId, dateSet);
  }

  const rollingSignalsByOutlet = new Map<number, OutletRollingSignals>();
  for (const [outletId, totals] of outletTotals) {
    const days = Math.max(1, outletDates.get(outletId)?.size ?? 1);
    rollingSignalsByOutlet.set(outletId, {
      sales7d: totals.sales.map((v) => (v / days) * 7),
      lost7d: totals.lost.map((v) => (v / days) * 7),
      waste7d: totals.waste.map((v) => (v / days) * 7),
    });
  }

  const byDay = new Map<string, HourAggregate[]>();
  for (const agg of grouped.values()) {
    const key = `${agg.outletId}|${agg.date}`;
    const arr = byDay.get(key) ?? [];
    arr.push(agg);
    byDay.set(key, arr);
  }
  for (const [dayKey, list] of byDay) {
    const inHours = list
      .filter((x) => x.hour >= config.businessHours.open && x.hour < config.businessHours.close)
      .sort((a, b) => a.hour - b.hour);
    byDay.set(dayKey, inHours);
  }

  return { byDay, rollingSignalsByOutlet };
}

function buildStateFromAggregate(
  agg: HourAggregate,
  config: InventoryEnvConfig,
  rollingSignalsByOutlet: Map<number, OutletRollingSignals>
): number[] {
  const fallbackSales7d = agg.demand.map((v) => Math.max(1, v * 7));
  const signals = rollingSignalsByOutlet.get(agg.outletId);
  const context: EnvironmentContext = {
    hour: agg.hour,
    weekday: agg.weekday,
    isWeekend: agg.isWeekend,
    isHoliday: agg.isHoliday,
    temp: agg.temp,
    precip: agg.precip,
    sales7d: signals?.sales7d ?? fallbackSales7d,
    lost7d: signals?.lost7d ?? new Array<number>(config.skus.length).fill(0),
    waste7d: signals?.waste7d ?? new Array<number>(config.skus.length).fill(0),
    capacity: agg.capacity,
  };
  return buildInventoryObservation({
    inventory: agg.inventoryStart,
    context,
    demandWindows: config.demandWindows,
  });
}

function remainingOpenHours(hour: number, config: InventoryEnvConfig): number {
  const close = clampInt(config.businessHours.close, 1, 24);
  const current = clampInt(hour, 0, 23);
  return Math.max(1, close - current);
}

function capActionByCapacity(action: number[], inventoryStart: number[], capacity: number): number[] {
  const capacityLeft = Math.max(0, capacity - inventoryStart.reduce((acc, v) => acc + Math.max(0, v), 0));
  const total = action.reduce((acc, v) => acc + Math.max(0, v), 0);
  if (total <= capacityLeft) return action.slice();
  if (total <= 0 || capacityLeft <= 0) return action.map(() => 0);
  const ratio = capacityLeft / total;
  return action.map((v) => Math.max(0, Math.floor(v * ratio)));
}

function buildActionFromAggregate(agg: HourAggregate, config: InventoryEnvConfig): number[] {
  const action = new Array<number>(config.skus.length).fill(0);
  const remHours = remainingOpenHours(agg.hour, config);
  const serviceTarget = clamp(config.targetServiceLevel, 0, 1);
  for (let i = 0; i < config.skus.length; i++) {
    const sku = config.skus[i];
    if (!sku) continue;
    const minOrder = sku.minOrder ?? 0;
    const invNow = Math.max(0, agg.inventoryStart[i] ?? 0);
    const demandNow = Math.max(0, agg.demand[i] ?? 0);
    const salesNow = Math.max(0, agg.sales[i] ?? 0);
    const lostNow = Math.max(0, agg.lostSales[i] ?? 0);
    const observed = agg.orderObserved[i];

    // Estimated current waste pressure proxy from overhang inventory.
    const wastePressure = Math.max(0, invNow - salesNow - Math.max(0, demandNow - lostNow));
    const demandSignal = Math.max(1, demandNow, salesNow + lostNow);
    const expectedRemainingDemand = demandSignal * remHours;
    const serviceNeed = expectedRemainingDemand * serviceTarget;
    const recoveryBoost = lostNow * 1.25;
    const safety = Math.max(minOrder, Math.round(demandSignal * 0.1));
    const shelfRisk = Math.min(1, remHours / Math.max(1, sku.shelfLifeHours));
    const wasteGuard = wastePressure * (0.35 + 0.65 * shelfRisk);
    const desiredAvailable = serviceNeed + recoveryBoost + safety - wasteGuard;
    const mathOrder = normalizeOfflineAction(desiredAvailable - invNow, minOrder, config.maxOrderPerSku);

    if (Number.isFinite(observed)) {
      // Keep imitation anchor while biasing toward corrective math order.
      const observedOrder = normalizeOfflineAction(observed ?? 0, minOrder, config.maxOrderPerSku);
      action[i] = normalizeOfflineAction(Math.round(observedOrder * 0.4 + mathOrder * 0.6), minOrder, config.maxOrderPerSku);
    } else {
      action[i] = mathOrder;
    }
  }
  return capActionByCapacity(action, agg.inventoryStart, agg.capacity || config.capacity);
}

function computeStockoutPenalty(args: { demand: number[]; lostSales: number[]; config: InventoryEnvConfig }): number {
  const { demand, lostSales, config } = args;
  let total = 0;
  for (let i = 0; i < config.skus.length; i++) {
    const unitPrice = config.skus[i]?.unitPrice ?? 0;
    const lost = Math.max(0, lostSales[i] ?? 0);
    const requested = Math.max(0, demand[i] ?? 0);
    const basePenalty = unitPrice * config.stockoutPenaltyMultiplier * lost;
    const severityPenalty = requested > 0 ? unitPrice * config.stockoutPenaltyMultiplier * ((lost * lost) / requested) : 0;
    total += basePenalty + severityPenalty;
  }
  return total;
}

function computeServiceShortfallPenalty(args: {
  sales: number[];
  demand: number[];
  config: InventoryEnvConfig;
}): number {
  const { sales, demand, config } = args;
  const target = Math.max(0, Math.min(1, config.targetServiceLevel));
  if (target <= 0) return 0;
  let total = 0;
  for (let i = 0; i < config.skus.length; i++) {
    const unitPrice = config.skus[i]?.unitPrice ?? 0;
    const requested = Math.max(0, demand[i] ?? 0);
    const served = Math.max(0, sales[i] ?? 0);
    const requiredServed = requested * target;
    const shortfall = Math.max(0, requiredServed - served);
    total += unitPrice * config.serviceShortfallPenaltyMultiplier * shortfall;
  }
  return total;
}

function computeExpectedWasteCost(args: {
  inventoryStart: number[];
  sales: number[];
  action: number[];
  hour: number;
  config: InventoryEnvConfig;
}): number {
  const { inventoryStart, sales, action, hour, config } = args;
  const remHours = remainingOpenHours(hour, config);
  let total = 0;
  for (let i = 0; i < config.skus.length; i++) {
    const sku = config.skus[i];
    if (!sku) continue;
    const unitCost = sku.unitCost ?? 0;
    const availableAfterOrder = Math.max(0, (inventoryStart[i] ?? 0) + (action[i] ?? 0));
    const invAfterSales = Math.max(0, availableAfterOrder - (sales[i] ?? 0));
    const shelfRisk = Math.min(1, remHours / Math.max(1, sku.shelfLifeHours));
    // More remaining stock late in the day and short shelf life should raise expected waste cost.
    total += invAfterSales * config.wasteRatePerHour * remHours * shelfRisk * unitCost;
  }
  return total;
}

function computeUnderstockRiskPenalty(args: {
  inventoryStart: number[];
  action: number[];
  demand: number[];
  config: InventoryEnvConfig;
}): number {
  const { inventoryStart, action, demand, config } = args;
  const target = Math.max(0, Math.min(1, config.targetServiceLevel));
  if (target <= 0) return 0;
  let total = 0;
  for (let i = 0; i < config.skus.length; i++) {
    const sku = config.skus[i];
    if (!sku) continue;
    const availableAfterOrder = Math.max(0, (inventoryStart[i] ?? 0) + (action[i] ?? 0));
    const requested = Math.max(0, demand[i] ?? 0);
    const desired = requested * target + Math.max(0, sku.minOrder);
    const gap = Math.max(0, desired - availableAfterOrder);
    total += sku.unitPrice * config.stockoutPenaltyMultiplier * 0.4 * gap;
  }
  return total;
}

function computeWeight(agg: HourAggregate, config: InventoryEnvConfig): number {
  const action = buildActionFromAggregate(agg, config);
  let margin = 0;
  for (let i = 0; i < config.skus.length; i++) {
    margin += ((config.skus[i]?.unitPrice ?? 0) - (config.skus[i]?.unitCost ?? 0)) * (agg.sales[i] ?? 0);
  }
  const stockoutPenalty = computeStockoutPenalty({ demand: agg.demand, lostSales: agg.lostSales, config });
  const servicePenalty = computeServiceShortfallPenalty({ sales: agg.sales, demand: agg.demand, config });
  const expectedWasteCost = computeExpectedWasteCost({
    inventoryStart: agg.inventoryStart,
    sales: agg.sales,
    action,
    hour: agg.hour,
    config,
  });
  const understockRiskPenalty = computeUnderstockRiskPenalty({
    inventoryStart: agg.inventoryStart,
    action,
    demand: agg.demand,
    config,
  });
  const totalPenalty = stockoutPenalty + servicePenalty + expectedWasteCost + understockRiskPenalty;
  return Math.max(0.1, 1 + margin / (1 + margin) - totalPenalty / (1 + totalPenalty));
}

function computeReward(agg: HourAggregate, action: number[], config: InventoryEnvConfig): number {
  let margin = 0;
  let holdingCost = 0;
  for (let i = 0; i < config.skus.length; i++) {
    const sku = config.skus[i];
    margin += ((sku?.unitPrice ?? 0) - (sku?.unitCost ?? 0)) * (agg.sales[i] ?? 0);
    const invAfterSales = Math.max(0, (agg.inventoryStart[i] ?? 0) - (agg.sales[i] ?? 0));
    holdingCost += invAfterSales * config.holdingCostPerUnit;
  }
  const stockoutPenalty = computeStockoutPenalty({ demand: agg.demand, lostSales: agg.lostSales, config });
  const servicePenalty = computeServiceShortfallPenalty({ sales: agg.sales, demand: agg.demand, config });
  const expectedWasteCost = computeExpectedWasteCost({
    inventoryStart: agg.inventoryStart,
    sales: agg.sales,
    action,
    hour: agg.hour,
    config,
  });
  const understockRiskPenalty = computeUnderstockRiskPenalty({
    inventoryStart: agg.inventoryStart,
    action,
    demand: agg.demand,
    config,
  });
  return margin - stockoutPenalty - servicePenalty - understockRiskPenalty - expectedWasteCost - holdingCost;
}

export function buildTrainingSamplesFromRows(
  rows: InventoryDatasetRow[],
  configRaw: Partial<InventoryEnvConfig> = {}
): TrainingSample[] {
  const config = { ...defaultInventoryEnvConfig(), ...configRaw };
  const { byDay, rollingSignalsByOutlet } = buildHourlyAggregates(rows, config);
  const samples: TrainingSample[] = [];
  for (const dayRows of byDay.values()) {
    for (const agg of dayRows) {
      samples.push({
        state: buildStateFromAggregate(agg, config, rollingSignalsByOutlet),
        action: buildActionFromAggregate(agg, config),
        weight: computeWeight(agg, config),
        meta: {
          outletId: agg.outletId,
          date: agg.date,
          hour: agg.hour,
        },
      });
    }
  }

  return samples;
}

export function buildTrainingTransitionsFromRows(
  rows: InventoryDatasetRow[],
  configRaw: Partial<InventoryEnvConfig> = {}
): TrainingTransition[] {
  const config = { ...defaultInventoryEnvConfig(), ...configRaw };
  const { byDay, rollingSignalsByOutlet } = buildHourlyAggregates(rows, config);
  const transitions: TrainingTransition[] = [];

  for (const dayRows of byDay.values()) {
    for (let i = 0; i < dayRows.length; i++) {
      const current = dayRows[i];
      if (!current) continue;
      const action = buildActionFromAggregate(current, config);
      const state = buildStateFromAggregate(current, config, rollingSignalsByOutlet);
      const next = dayRows[i + 1];
      const done = !next;
      const nextState = next ? buildStateFromAggregate(next, config, rollingSignalsByOutlet) : state.slice();
      transitions.push({
        state,
        action,
        reward: computeReward(current, action, config),
        nextState,
        done,
        weight: computeWeight(current, config),
        meta: {
          outletId: current.outletId,
          date: current.date,
          hour: current.hour,
        },
      });
    }
  }

  return transitions;
}

export function evaluateExpertPolicyFromRows(
  rows: InventoryDatasetRow[],
  configRaw: Partial<InventoryEnvConfig> = {}
): ExpertPolicyEvaluation {
  const config = { ...defaultInventoryEnvConfig(), ...configRaw };
  const { byDay } = buildHourlyAggregates(rows, config);
  const dayKeys = [...byDay.keys()].sort((a, b) => a.localeCompare(b));
  const dayRewards: number[] = [];
  let totalReward = 0;
  let steps = 0;

  for (const dayKey of dayKeys) {
    const dayRows = byDay.get(dayKey) ?? [];
    if (dayRows.length === 0) continue;
    let dayTotal = 0;
    for (const agg of dayRows) {
      const action = buildActionFromAggregate(agg, config);
      const reward = computeReward(agg, action, config);
      dayTotal += reward;
      totalReward += reward;
      steps += 1;
    }
    dayRewards.push(dayTotal);
  }

  const days = dayRewards.length;
  return {
    steps,
    days,
    totalReward,
    expectedStepReward: steps > 0 ? totalReward / steps : 0,
    expectedDayReward: days > 0 ? totalReward / days : 0,
    dayRewards,
  };
}
