#!/usr/bin/env python3
"""
dp_frag.py  --  C28x data-page fragmentation analyzer  (v3)
=============================================================

Recommended usage (XML + .out for maximum accuracy):
    python dp_frag.py --xml yourfile_link.xml --out yourfile.out [--section .ebss] [--html report.html]

Map file fallback (less robust format, same accuracy):
    python dp_frag.py yourfile.map --out yourfile.out [--section .ebss] [--html report.html]

Address-delta only (cannot detect intra-page gaps -- not recommended):
    python dp_frag.py yourfile.map [--section .ebss]

How to generate the XML:
    Add  --xml_link_info=yourfile_link.xml  to the linker command line (lnk2000 / cl2000 -z).
    The XML is generated every build alongside the .map file.

Why XML over .map?
------------------
The text .map format is not guaranteed stable across TI toolchain versions -- column
layout and section headers can change.  The --xml_link_info XML has a documented
schema, is machine-parseable without fragile regex, and has been stable across CCS
versions.  The map file parser is kept as a fallback only.

Why you need --out
------------------
Neither the XML nor the .map file contains per-symbol sizes.  They only have addresses.
Address-delta sizing (size = next_addr - this_addr) = actual_size + gap_after -- it
CANNOT distinguish the symbol from the gap following it.

When --out is supplied the script reads DWARF debug info via ofd2000.  DWARF stores
the exact C type of every variable; DW_AT_byte_size gives the true size in 16-bit
words (C28x native unit).  True gaps are then:
    gap = next_addr - (this_addr + dwarf_size)

Requires the .out to be built with -g (debug info on).

ofd2000 search order:
    1. --ofd-path argument
    2. PATH
    3. Common CCS install locations under C:\\ti\\ccs*

C28x DP page = 64 words.  The linker will not split a symbol across a DP boundary,
so any tail too small for the next symbol becomes a permanent hole.
"""

import re
import sys
import os
import shutil
import subprocess
import argparse
import xml.etree.ElementTree as ET
from collections import defaultdict
from pathlib import Path

DP_SIZE = 64  # words per data page on C28x


def _dedup_sorted(pairs):
    """Deduplicate (addr, name) pairs and return sorted by addr."""
    return sorted(set(pairs), key=lambda x: x[0])


# DWARF tags that carry no size themselves — size resolved by following type_ref chain
_FORWARD_TAGS = frozenset({
    'typedef', 'const_type', 'volatile_type', 'restrict_type',
    'TI_far_type', 'TI_restrict_type',
})


# ---------------------------------------------------------------------------
# ofd2000 discovery and DWARF extraction
# ---------------------------------------------------------------------------

def _find_ofd2000():
    """Search PATH then common CCS install locations for ofd2000."""
    found = shutil.which('ofd2000')
    if found:
        return found
    ti = Path('C:/ti')
    if ti.is_dir():
        candidates = sorted(ti.glob('ccs*/ccs/tools/compiler/ti-cgt-c2000_*/bin/ofd2000.exe'),
                            reverse=True)
        if candidates:
            return str(candidates[0])
    return None


def run_ofd2000_dwarf(out_file, ofd_path=None):
    """
    Run ofd2000 with DWARF .debug_info + type display on out_file.
    Returns (text, error_string).  error_string is None on success.
    """
    exe = ofd_path or _find_ofd2000()
    if not exe:
        return None, (
            'ofd2000 not found.  Install CCS or pass --ofd-path.\n'
            'Running without DWARF: sizes will be address-delta estimates.'
        )

    cmd = [
        exe, '-g',
        '--dwarf_display=dinfo,'
        'nodabbrev,nodaranges,nodframe,nodline,nodloc,'
        'nodmacinfo,nodpubnames,nodpubtypes,nodranges,nodstr,nodtypes,'
        'noregtable,types',
        str(out_file),
    ]
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
        text = result.stdout + result.stderr
        if not text.strip():
            return None, f'ofd2000 produced no output for {out_file}'
        return text, None
    except FileNotFoundError:
        return None, f'ofd2000 not found at: {exe}'
    except subprocess.TimeoutExpired:
        return None, 'ofd2000 timed out'


# ---------------------------------------------------------------------------
# DWARF text parser
# ---------------------------------------------------------------------------

# Line patterns
_DIE_RE = re.compile(
    r'^\s+([0-9a-f]+)\s+\d+\s+DW_TAG_(\S+?)(?:\s+\((\.[^\)]+)\))?$'
)
# Attribute line has many spaces where the abbrev number would be
_ATTR_RE = re.compile(r'^\s+[0-9a-f]+\s{5,}(DW_AT_\S+)\s+(.*)')
_TYPEREF_RE = re.compile(r'\+\s*0x([0-9a-f]+)')
_ADDR_RE = re.compile(r'DW_OP_addr\s+0x([0-9a-f]+)')


def _parse_dwarf(text, section='.ebss'):
    """
    Single-pass DWARF text parser.

    Builds type_table = {offset_int: die_dict} for all DIEs and collects
    variable entries for the requested section.

    Returns (type_table, var_list) where
      var_list = [(addr_int, name_str, type_ref_int)]
    """
    type_table = {}
    var_list = []

    cur_offset = None
    cur_tag = None
    cur_section = None
    cur_attrs = {}

    def _flush_die():
        nonlocal cur_offset, cur_tag, cur_section, cur_attrs
        if cur_offset is None:
            return
        die = {'tag': cur_tag}

        if 'DW_AT_byte_size' in cur_attrs:
            try:
                die['byte_size'] = int(cur_attrs['DW_AT_byte_size'])
            except ValueError:
                pass

        if 'DW_AT_type' in cur_attrs:
            m = _TYPEREF_RE.search(cur_attrs['DW_AT_type'])
            if m:
                die['type_ref'] = int(m.group(1), 16)

        if 'DW_AT_address_class' in cur_attrs:
            die['addr_class'] = cur_attrs['DW_AT_address_class'].strip()

        if cur_tag == 'variable' and cur_section == section:
            addr = None
            name = None
            if 'DW_AT_location' in cur_attrs:
                m = _ADDR_RE.search(cur_attrs['DW_AT_location'])
                if m:
                    addr = int(m.group(1), 16)
            if 'DW_AT_TI_symbol_name' in cur_attrs:
                name = cur_attrs['DW_AT_TI_symbol_name'].strip()
            elif 'DW_AT_name' in cur_attrs:
                name = cur_attrs['DW_AT_name'].strip()
            # type_ref already parsed into die above — reuse it
            type_ref = die.get('type_ref')
            if addr is not None and name is not None and type_ref is not None:
                var_list.append((addr, name, type_ref))

        type_table[cur_offset] = die
        cur_offset = None
        cur_tag = None
        cur_section = None
        cur_attrs = {}

    for line in text.splitlines():
        m_die = _DIE_RE.match(line)
        if m_die:
            _flush_die()
            cur_offset = int(m_die.group(1), 16)
            cur_tag = m_die.group(2)
            cur_section = m_die.group(3)  # may be None
            cur_attrs = {}
            continue

        m_attr = _ATTR_RE.match(line)
        if m_attr and cur_offset is not None:
            attr_name = m_attr.group(1)
            attr_val = m_attr.group(2).strip()
            cur_attrs[attr_name] = attr_val

    _flush_die()
    return type_table, var_list


def _resolve_size(offset, type_table, depth=0):
    """
    Resolve a DWARF type offset to its size in C28x words.
    Returns int, or None if not resolvable.
    """
    if depth > 20:
        return None  # cycle guard
    die = type_table.get(offset)
    if die is None:
        return None

    tag = die['tag']

    # Direct size types
    if 'byte_size' in die:
        return die['byte_size']

    # Pointer: no byte_size in DWARF, infer from address class
    if tag == 'pointer_type':
        ac = die.get('addr_class', '')
        return 2 if 'PTR32' in ac else 1

    # Reference type (C++ &) -- same as pointer
    if tag == 'reference_type':
        ac = die.get('addr_class', '')
        return 2 if 'PTR32' in ac else 1

    # Subroutine/function type (bare, not a pointer) -- zero data size
    if tag == 'subroutine_type':
        return 0

    if tag in _FORWARD_TAGS and 'type_ref' in die:
        return _resolve_size(die['type_ref'], type_table, depth + 1)

    return None  # unknown


def get_dwarf_symbol_sizes(out_file, section='.ebss', ofd_path=None):
    """
    Run ofd2000 on out_file and resolve DWARF types to get actual sizes.

    Returns (sizes_dict, warning_str).
      sizes_dict: {addr_int: size_int}  -- actual word counts from DWARF
      warning_str: None on full success, description of degraded mode otherwise.
    """
    text, err = run_ofd2000_dwarf(out_file, ofd_path)
    if text is None:
        return {}, err

    type_table, var_list = _parse_dwarf(text, section=section)

    if not var_list:
        return {}, (
            f'ofd2000 DWARF output parsed but no {section} variables found.\n'
            'Make sure the .out was built with -g (debug info enabled).'
        )

    sizes = {}
    unresolved = []
    for addr, name, type_ref in var_list:
        size = _resolve_size(type_ref, type_table)
        if size is not None and size > 0:
            sizes[addr] = size
        else:
            unresolved.append(name)

    warn = None
    if unresolved:
        warn = (f'DWARF type unresolved for {len(unresolved)} symbol(s): '
                f'{", ".join(unresolved[:5])}{"..." if len(unresolved) > 5 else ""}.\n'
                f'Those symbols will use address-delta sizing.')
    return sizes, warn


# ---------------------------------------------------------------------------
# Parsing -- .map file (primary)
# ---------------------------------------------------------------------------

def _parse_section_ranges(text):
    """
    Return dict {section_name: (start_addr, end_addr)} from the
    SECTION ALLOCATION MAP block.
    """
    ranges = {}
    in_map = False
    pending_name = None

    for line in text.splitlines():
        if 'SECTION ALLOCATION MAP' in line:
            in_map = True
            continue
        if not in_map:
            continue
        if pending_name:
            m = re.match(r'^\*\s+\d+\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)', line)
            if m:
                start = int(m.group(1), 16)
                length = int(m.group(2), 16)
                ranges[pending_name] = (start, start + length)
            pending_name = None
            continue
        m = re.match(r'^(\.?\w+)\s+(\d+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)', line)
        if m:
            name = m.group(1)
            start = int(m.group(3), 16)
            length = int(m.group(4), 16)
            ranges[name] = (start, start + length)
            continue
        m = re.match(r'^([A-Za-z_]\w*)\s*$', line)
        if m:
            candidate = m.group(1)
            if candidate not in ('GLOBAL', 'LOCAL', 'output', 'section',
                                 'attributes', 'MEMORY', 'PAGE'):
                pending_name = candidate
    return ranges


def _parse_global_symbols_by_address(text):
    """
    Return list of (addr, name) from 'GLOBAL SYMBOLS: SORTED BY Symbol Address'.
    Also captures local symbols when --mapfile_contents=sym_defs was used.
    """
    symbols = []
    in_block = False

    for line in text.splitlines():
        if 'SORTED BY Symbol Address' in line:
            in_block = True
            continue
        if not in_block:
            continue
        # Format: "page  00001234  _symbolname"  (standard COFF map)
        m = re.match(r'^\d+\s+([0-9a-fA-F]{8})\s+(\S+)', line)
        if m:
            symbols.append((int(m.group(1), 16), m.group(2)))
    return symbols


def parse_map_file(text, section_filter='.ebss', dwarf_sizes=None):
    """
    Parse a TI COFF linker .map file.
    Returns list of dicts {name, addr, size, size_source, gap_before, section}.

    dwarf_sizes: optional dict {addr_int: actual_size_words} from DWARF.
    When provided, actual sizes are used and gaps between symbols are real.
    When absent, delta-based sizing is used (size = next_addr - this_addr).
    """
    section_ranges = _parse_section_ranges(text)

    if section_filter:
        targets = {k: v for k, v in section_ranges.items()
                   if k == section_filter}
    else:
        targets = section_ranges

    if not targets:
        available = ', '.join(sorted(section_ranges.keys()))
        raise ValueError(
            f"Section '{section_filter}' not found in map file.\n"
            f"Available sections: {available}"
        )

    all_syms_by_addr = _parse_global_symbols_by_address(text)

    symbols = []
    for sec_name, (sec_start, sec_end) in sorted(targets.items(),
                                                  key=lambda x: x[1][0]):
        if sec_end == sec_start:
            continue
        in_sec = [(addr, name) for addr, name in all_syms_by_addr
                  if sec_start <= addr < sec_end]
        unique = _dedup_sorted(in_sec)

        for i, (addr, name) in enumerate(unique):
            if dwarf_sizes and addr in dwarf_sizes:
                size = dwarf_sizes[addr]
                size_source = 'dwarf'
            else:
                # Fallback: address delta
                if i + 1 < len(unique):
                    size = unique[i + 1][0] - addr
                else:
                    size = sec_end - addr
                size_source = 'delta'

            if size > 0:
                symbols.append({
                    'name': name,
                    'addr': addr,
                    'size': size,
                    'size_source': size_source,
                    'section': sec_name,
                })
    return symbols


# ---------------------------------------------------------------------------
# Parsing -- --xml_link_info XML (preferred input, replaces map file)
# ---------------------------------------------------------------------------

def parse_xml_link_info(xml_text, section_filter='.ebss', dwarf_sizes=None):
    """
    Parse TI linker --xml_link_info XML output.

    Structure used:
      <logical_group>  -- output section (name, run_address, size)
      <symbol>         -- symbol (name, value=address, object_component_ref)

    Returns list of dicts {name, addr, size, size_source, section}.
    """
    try:
        root = ET.fromstring(xml_text)
    except ET.ParseError as e:
        raise ValueError(f'XML parse error: {e}')

    # Build object_component id -> section_name + address + size
    # (needed to determine which section a symbol belongs to)
    oc_map = {}  # id -> {'section': name, 'addr': int, 'size': int}
    for oc in root.findall('.//object_component'):
        oc_id = oc.get('id')
        if oc_id is None:
            continue
        name_el = oc.find('name')
        addr_el = oc.find('run_address')
        size_el = oc.find('size')
        if name_el is None or addr_el is None or size_el is None:
            continue
        # section name is the part before any colon (e.g. ".ebss:__lock" -> ".ebss")
        sec_name = name_el.text.split(':')[0] if name_el.text else ''
        try:
            oc_map[oc_id] = {
                'section': sec_name,
                'addr': int(addr_el.text, 16),
                'size': int(size_el.text, 16),
            }
        except (ValueError, TypeError):
            continue

    # Build section address ranges from logical_group entries
    # logical_group has name, run_address, size
    section_ranges = {}
    for lg in root.findall('.//logical_group'):
        name_el = lg.find('name')
        addr_el = lg.find('run_address')
        size_el = lg.find('size')
        if name_el is None or addr_el is None or size_el is None:
            continue
        try:
            sec = name_el.text
            start = int(addr_el.text, 16)
            size = int(size_el.text, 16)
            section_ranges[sec] = (start, start + size)
        except (ValueError, TypeError):
            continue

    if section_filter and section_filter not in section_ranges:
        available = ', '.join(sorted(section_ranges.keys()))
        raise ValueError(
            f"Section '{section_filter}' not found in XML.\n"
            f"Available sections: {available}"
        )

    # Collect symbols that belong to the target section
    target_range = section_ranges.get(section_filter)  # None = all sections

    raw_syms = []  # (addr, name, section_name)
    for sym in root.findall('.//symbol'):
        name_el = sym.find('name')
        val_el = sym.find('value')
        oc_ref_el = sym.find('object_component_ref')
        if name_el is None or val_el is None:
            continue
        try:
            addr = int(val_el.text, 16)
        except (ValueError, TypeError):
            continue

        # Determine section via object_component reference
        sec_name = ''
        if oc_ref_el is not None:
            oc_id = oc_ref_el.get('idref')
            if oc_id and oc_id in oc_map:
                sec_name = oc_map[oc_id]['section']

        # Filter by section
        if section_filter:
            if sec_name != section_filter:
                continue
            if target_range and not (target_range[0] <= addr < target_range[1]):
                continue
        elif sec_name not in section_ranges:
            continue

        raw_syms.append((addr, name_el.text, sec_name))

    if not raw_syms:
        return []

    # Group by section, deduplicate, sort by address
    by_section = defaultdict(list)
    for addr, name, sec in raw_syms:
        by_section[sec].append((addr, name))

    symbols = []
    for sec_name, entries in by_section.items():
        sec_start, sec_end = section_ranges.get(sec_name, (0, 0))
        unique = _dedup_sorted(entries)

        for i, (addr, name) in enumerate(unique):
            if dwarf_sizes and addr in dwarf_sizes:
                size = dwarf_sizes[addr]
                size_source = 'dwarf'
            else:
                if i + 1 < len(unique):
                    size = unique[i + 1][0] - addr
                else:
                    size = sec_end - addr if sec_end > addr else 1
                size_source = 'delta'

            if size > 0:
                symbols.append({
                    'name': name,
                    'addr': addr,
                    'size': size,
                    'size_source': size_source,
                    'section': sec_name,
                })
    return symbols


# ---------------------------------------------------------------------------
# Parsing -- ofd2000 --obj_display=none,symbols (secondary input mode)
# ---------------------------------------------------------------------------

def parse_ofd_symbols(text, section_filter='.ebss'):
    """Parse ofd2000 --obj_display=none,symbols output (no DWARF sizes)."""
    # Columns: id  name  value  kind  section  binding  type
    sym_re = re.compile(
        r'^\s*\d+\s+(\S+)\s+0x([0-9a-fA-F]+)\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)',
        re.MULTILINE
    )
    by_section = defaultdict(list)
    for m in sym_re.finditer(text):
        name, addr_hex, section, binding, typ = m.groups()
        if binding != 'global' or typ != 'object':
            continue
        if section_filter and section != section_filter:
            continue
        by_section[section].append((int(addr_hex, 16), name))

    if not by_section:
        return []

    symbols = []
    for sec_name, entries in by_section.items():
        unique = _dedup_sorted(entries)

        for i, (addr, name) in enumerate(unique):
            size = unique[i + 1][0] - addr if i + 1 < len(unique) else 1
            symbols.append({
                'name': name, 'addr': addr, 'size': size,
                'size_source': 'delta', 'section': sec_name,
            })
    return symbols


# ---------------------------------------------------------------------------
# Analysis
# ---------------------------------------------------------------------------

def assign_pages(symbols):
    pages = defaultdict(list)
    for s in symbols:
        pg = s['addr'] // DP_SIZE
        pages[pg].append(s)
    return dict(sorted(pages.items()))


def analyze_page(page_idx, syms):
    base = page_idx * DP_SIZE
    sorted_syms = sorted(syms, key=lambda s: s['addr'])
    cursor = base
    gaps = []   # list of (gap_addr, gap_words, 'before' symbol name)
    used = 0

    for s in sorted_syms:
        if s['addr'] > cursor:
            gaps.append((cursor, s['addr'] - cursor, s['name']))
        end = s['addr'] + s['size']
        used += s['size']
        cursor = end

    page_end = base + DP_SIZE
    tail = max(0, page_end - cursor)

    return {
        'page': page_idx,
        'base': base,
        'syms': sorted_syms,
        'used': used,
        'gaps': gaps,
        'gap_words': sum(g[1] for g in gaps),
        'tail': tail,
    }


def global_stats(page_analyses):
    total_pages = len(page_analyses)
    total_capacity = total_pages * DP_SIZE
    total_used = sum(p['used'] for p in page_analyses)
    total_gap = sum(p['gap_words'] for p in page_analyses)
    total_tail = sum(p['tail'] for p in page_analyses)
    waste_pct = total_gap / total_capacity * 100 if total_capacity else 0
    return {
        'pages': total_pages,
        'capacity': total_capacity,
        'used': total_used,
        'gap': total_gap,
        'tail': total_tail,
        'waste_pct': waste_pct,
    }


# ---------------------------------------------------------------------------
# Reorder suggestions
# ---------------------------------------------------------------------------

def suggest_reorder(page_analyses):
    suggestions = []
    for pa in page_analyses:
        if pa['gap_words'] == 0:
            continue
        syms = pa['syms']
        # Sort largest-first to minimize cross-page penalties
        reordered = sorted(syms, key=lambda s: s['size'], reverse=True)
        simulated_gap = 0
        cursor = pa['base']
        for s in reordered:
            align = min(8, _next_pow2(s['size']))
            aligned_start = ((cursor + align - 1) // align) * align
            simulated_gap += aligned_start - cursor
            cursor = aligned_start + s['size']
        savings = pa['gap_words'] - simulated_gap
        if savings > 0:
            suggestions.append({
                'page': pa['page'],
                'current_gap': pa['gap_words'],
                'simulated_gap': simulated_gap,
                'savings': savings,
                'order': [s['name'] for s in reordered],
            })
    return sorted(suggestions, key=lambda x: x['savings'], reverse=True)


def _next_pow2(n):
    if n <= 1:
        return 1
    p = 1
    while p < n:
        p <<= 1
    return p


# ---------------------------------------------------------------------------
# Console report
# ---------------------------------------------------------------------------

def print_report(page_analyses, stats, suggestions, has_dwarf, warnings):
    W = 72
    print('=' * W)
    print('  C28x DP Fragmentation Report')
    mode = 'DWARF actual sizes' if has_dwarf else 'address-delta sizes (estimate)'
    print(f'  Size source: {mode}')
    print('=' * W)
    print(f"  Data pages occupied : {stats['pages']}")
    print(f"  Total capacity      : {stats['capacity']} words ({stats['capacity']*2} bytes)")
    print(f"  Used by symbols     : {stats['used']} words")
    print(f"  Alignment gaps      : {stats['gap']} words  ({stats['waste_pct']:.1f}% of capacity)")
    print(f"  Free tail space     : {stats['tail']} words")
    print()

    if warnings:
        for w in warnings:
            print(f'  WARNING: {w}')
        print()

    if stats['gap'] == 0:
        print('  No alignment gaps found.')
        if stats['tail'] > 0:
            print(f"  {stats['tail']} words of tail space remain (partial last page).")
        if not has_dwarf:
            print()
            print('  NOTE: Without --out, gaps WITHIN the last delta cannot be detected.')
            print('  Supply --out yourfile.out (built with -g) for exact gap analysis.')
        return

    print('  Per-page breakdown (pages with gaps only):')
    print('  ' + '-' * 68)
    print(f"  {'DP':>4}  {'base':>8}  {'used':>5}  {'gaps':>5}  {'tail':>5}  map")
    print('  ' + '-' * 68)

    for pa in page_analyses:
        if pa['gap_words'] == 0:
            continue
        bar = _ascii_bar(pa['syms'], pa['base'], width=24)
        print(f"  DP{pa['page']:<4}  0x{pa['base']:04X}     {pa['used']:>4}w  "
              f"{pa['gap_words']:>4}w  {pa['tail']:>4}w  {bar}")
        cursor = pa['base']
        for s in sorted(pa['syms'], key=lambda x: x['addr']):
            if s['addr'] > cursor:
                gw = s['addr'] - cursor
                print(f"           0x{cursor:04X}  {'*** GAP ***':<26} {gw:>3}w  <-- WASTED")
            tag = '' if s['size_source'] == 'dwarf' else ' (delta)'
            print(f"           0x{s['addr']:04X}  {s['name']:<26} {s['size']:>3}w{tag}")
            cursor = s['addr'] + s['size']
        print()

    if suggestions:
        print('  Reorder suggestions (by estimated word savings):')
        print('  ' + '-' * 68)
        for sg in suggestions[:5]:
            print(f"  DP{sg['page']}: current gap={sg['current_gap']}w  "
                  f"after reorder~{sg['simulated_gap']}w  saves~{sg['savings']}w")
            print(f"    suggested order: {', '.join(sg['order'])}")
        print()

    if not has_dwarf:
        print('  NOTE: Sizes from address delta -- gaps WITHIN a delta are invisible.')
        print('  Use --out yourfile.out (built with -g) for exact gap accounting.')
    print('=' * W)


def _ascii_bar(syms, base, width=24):
    bar = ['.'] * width
    for s in syms:
        start = s['addr'] - base
        for i in range(s['size']):
            pos = int((start + i) / DP_SIZE * width)
            if 0 <= pos < width:
                bar[pos] = '#'
    return ''.join(bar)


# ---------------------------------------------------------------------------
# HTML report
# ---------------------------------------------------------------------------

HTML_TEMPLATE = r"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>C28x DP Fragmentation Report</title>
<style>
body{font-family:system-ui,sans-serif;background:#f8f7f4;color:#2c2c2a;margin:0;padding:24px;max-width:900px}
h1{font-size:18px;font-weight:500;margin:0 0 4px}
.mode{font-size:11px;color:#888;margin-bottom:16px}
.stats{display:flex;gap:12px;flex-wrap:wrap;margin-bottom:20px}
.stat{background:#fff;border:0.5px solid #d3d1c7;border-radius:8px;padding:10px 16px;min-width:140px}
.stat-label{font-size:11px;color:#888780}
.stat-val{font-size:22px;font-weight:500;font-family:monospace}
.legend{font-size:11px;color:#888;margin-bottom:12px;display:flex;gap:14px}
.legend span{display:inline-flex;align-items:center;gap:4px}
.swatch{width:12px;height:10px;border-radius:2px;display:inline-block}
.page-block{margin-bottom:16px}
.page-row{display:flex;align-items:center;margin-bottom:4px;gap:0}
.page-label{font-size:11px;color:#888780;width:60px;font-family:monospace;flex-shrink:0}
.bar-wrap{flex:1;height:22px;display:flex;border-radius:4px;overflow:hidden;border:0.5px solid #d3d1c7}
.seg-used{background:#378ADD;height:100%}
.seg-gap{background:repeating-linear-gradient(45deg,#EF9F27,#EF9F27 3px,#FAC775 3px,#FAC775 6px);height:100%}
.seg-free{background:#f1efe8;height:100%}
.pct{font-size:10px;color:#888780;width:36px;text-align:right;font-family:monospace;padding-left:4px;flex-shrink:0}
.detail{margin-left:60px;background:#fff;border:0.5px solid #d3d1c7;border-radius:6px;overflow:hidden}
table{width:100%;border-collapse:collapse;font-size:11px;font-family:monospace}
th{text-align:left;font-size:10px;color:#888;border-bottom:0.5px solid #d3d1c7;padding:4px 8px;font-family:system-ui;background:#fafaf8}
td{padding:3px 8px;border-bottom:0.5px solid #f1efe8}
tr.gap-row{background:#FFF8EC}
tr.gap-row td{color:#854F0B}
tr.tail-row td{color:#aaa;font-style:italic}
.gap-label{font-weight:600}
.badge{font-size:9px;padding:1px 5px;border-radius:3px}
.badge-used{background:#E6F1FB;color:#185FA5}
.badge-delta{background:#EEE;color:#888}
.badge-gap{background:#FAEEDA;color:#854F0B;font-weight:600}
.section-hdr{font-size:12px;font-weight:500;margin:24px 0 8px;color:#444}
.note{font-size:10px;color:#aaa;margin-top:16px}
.warn{font-size:11px;color:#854F0B;background:#FAEEDA;border-radius:4px;padding:6px 10px;margin-bottom:12px}
.clean-note{font-size:10px;color:#aaa;margin-left:60px;margin-top:2px}
</style>
</head>
<body>
<h1>C28x DP Fragmentation Report</h1>
<div class="mode">Size source: SIZE_MODE</div>
WARN_HTML
<div class="stats">
  <div class="stat"><div class="stat-label">alignment waste</div>
    <div class="stat-val" style="color:#BA7517">WASTE_PCT%</div></div>
  <div class="stat"><div class="stat-label">gap words</div>
    <div class="stat-val">GAP_W w</div></div>
  <div class="stat"><div class="stat-label">used words</div>
    <div class="stat-val">USED_W w</div></div>
  <div class="stat"><div class="stat-label">data pages</div>
    <div class="stat-val">N_PAGES</div></div>
</div>
<div class="legend">
  <span><span class="swatch" style="background:#378ADD"></span>used</span>
  <span><span class="swatch" style="background:repeating-linear-gradient(45deg,#EF9F27,#EF9F27 3px,#FAC775 3px,#FAC775 6px)"></span>gap / hole</span>
  <span><span class="swatch" style="background:#f1efe8;border:0.5px solid #d3d1c7"></span>free tail</span>
</div>
PAGE_CONTENT
<p class="note">SIZE_NOTE</p>
</body>
</html>
"""


def _build_page_table(pa):
    """Return an HTML table string for one page analysis."""
    base = pa['base']
    syms_sorted = sorted(pa['syms'], key=lambda s: s['addr'])
    cursor = base
    rows = ''
    for s in syms_sorted:
        if s['addr'] > cursor:
            gw = s['addr'] - cursor
            rows += (
                f'<tr class="gap-row">'
                f'<td class="gap-label">&#9658; GAP</td>'
                f'<td>0x{cursor:04X}</td>'
                f'<td>{gw}w</td>'
                f'<td><span class="badge badge-gap">{gw}w wasted</span></td>'
                f'</tr>\n'
            )
        src = ('<span class="badge badge-used">DWARF</span>'
               if s['size_source'] == 'dwarf'
               else '<span class="badge badge-delta">delta</span>')
        rows += (
            f'<tr>'
            f'<td>{s["name"]}</td>'
            f'<td>0x{s["addr"]:04X}</td>'
            f'<td>{s["size"]}w</td>'
            f'<td>{src}</td>'
            f'</tr>\n'
        )
        cursor = s['addr'] + s['size']
    page_end = base + DP_SIZE
    if cursor < page_end:
        rows += (
            f'<tr class="tail-row">'
            f'<td>[free tail]</td>'
            f'<td>0x{cursor:04X}</td>'
            f'<td></td>'
            f'<td>{page_end - cursor}w free</td>'
            f'</tr>\n'
        )
    return (
        f'<div class="detail">'
        f'<table><thead><tr>'
        f'<th>symbol</th><th>address</th><th>size</th><th>source</th>'
        f'</tr></thead><tbody>\n{rows}</tbody></table>'
        f'</div>\n'
    )


def build_html(page_analyses, stats, has_dwarf, warnings):
    # Separate pages with and without gaps
    gapped = [pa for pa in page_analyses if pa['gap_words'] > 0]
    clean  = [pa for pa in page_analyses if pa['gap_words'] == 0]

    def _bar(pa):
        base = pa['base']
        syms_sorted = sorted(pa['syms'], key=lambda s: s['addr'])
        cursor = base
        segs = ''
        for s in syms_sorted:
            if s['addr'] > cursor:
                w = s['addr'] - cursor
                segs += f'<div class="seg-gap" style="width:{w/DP_SIZE*100:.2f}%" title="GAP {w}w WASTED"></div>'
            segs += (f'<div class="seg-used" style="width:{s["size"]/DP_SIZE*100:.2f}%"'
                     f' title="{s["name"]} ({s["size"]}w)"></div>')
            cursor = s['addr'] + s['size']
        tail = base + DP_SIZE - cursor
        if tail > 0:
            segs += f'<div class="seg-free" style="width:{tail/DP_SIZE*100:.2f}%"></div>'
        waste_pct = round(pa['gap_words'] / DP_SIZE * 100)
        pct_color = '#BA7517' if waste_pct > 0 else '#888'
        return (
            f'<div class="page-row">'
            f'<div class="page-label">DP{pa["page"]}</div>'
            f'<div class="bar-wrap">{segs}</div>'
            f'<div class="pct" style="color:{pct_color}">{waste_pct}%</div>'
            f'</div>\n'
        )

    content = ''

    # --- Pages with gaps: bar + inline table ---
    if gapped:
        content += '<div class="section-hdr">Pages with gaps</div>\n'
        for pa in gapped:
            content += f'<div class="page-block">\n'
            content += _bar(pa)
            content += _build_page_table(pa)
            content += '</div>\n'

    # --- Clean pages: bars only ---
    if clean:
        content += '<div class="section-hdr">Pages with no gaps</div>\n'
        for pa in clean:
            content += f'<div class="page-block">\n'
            content += _bar(pa)
            content += '<div class="clean-note">no holes &mdash; all symbols pack cleanly</div>\n'
            content += '</div>\n'

    mode = 'DWARF actual sizes (exact)' if has_dwarf else 'address-delta estimates (intra-symbol gaps invisible)'
    warn_html = ''
    if warnings:
        for w in warnings:
            warn_html += f'<div class="warn">{w}</div>\n'
    if not has_dwarf:
        note = ('Sizes from address delta. Gaps inside a delta cannot be detected. '
                'Build with -g and supply --out yourfile.out for exact analysis.')
    else:
        note = ('Symbol sizes from DWARF debug info (exact). Gaps shown are true DP holes '
                'the linker cannot backfill. DWARF badge = exact size. delta badge = fallback.')

    html = HTML_TEMPLATE
    html = html.replace('SIZE_MODE', mode)
    html = html.replace('WARN_HTML', warn_html)
    html = html.replace('WASTE_PCT', f"{stats['waste_pct']:.1f}")
    html = html.replace('GAP_W', str(stats['gap']))
    html = html.replace('USED_W', str(stats['used']))
    html = html.replace('N_PAGES', str(stats['pages']))
    html = html.replace('PAGE_CONTENT', content)
    html = html.replace('SIZE_NOTE', note)
    return html


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(
        description='C28x DP fragmentation analyzer (v3, DWARF-aware).',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )
    parser.add_argument('input', nargs='?', default=None,
                        help='Linker .map file (or ofd2000 symbol output with --ofd). '
                             'Not needed when --xml is used.')
    parser.add_argument('--xml', metavar='FILE',
                        help='Linker --xml_link_info XML file (preferred over .map). '
                             'Generate with: lnk2000 --xml_link_info=out_link.xml ...')
    parser.add_argument('--out', metavar='FILE',
                        help='Corresponding .out file (COFF, built with -g). '
                             'Enables exact DWARF-based symbol sizing.')
    parser.add_argument('--ofd-path', metavar='PATH',
                        help='Path to ofd2000 executable (auto-detected if omitted)')
    parser.add_argument('--ofd', action='store_true',
                        help='Input is ofd2000 --obj_display=none,symbols output')
    parser.add_argument('--section', default='.ebss',
                        help='Data section to analyze (default: .ebss)')
    parser.add_argument('--all-sections', action='store_true',
                        help='Analyze all data sections')
    parser.add_argument('--html', metavar='FILE',
                        help='Write standalone HTML report to FILE')
    args = parser.parse_args()

    if args.xml is None and args.input is None and not args.ofd:
        parser.error('Provide a .map file, --xml FILE, or use --ofd with stdin.')

    section = None if args.all_sections else args.section
    warnings = []
    dwarf_sizes = {}
    has_dwarf = False

    # Get DWARF sizes if --out supplied.
    # When --all-sections, default to .ebss for DWARF (most relevant section).
    dwarf_section = section or '.ebss'
    if args.out and not args.ofd:
        print(f'Reading DWARF from {args.out} ...', flush=True)
        dwarf_sizes, warn = get_dwarf_symbol_sizes(
            args.out, section=dwarf_section,
            ofd_path=args.ofd_path
        )
        if warn:
            warnings.append(warn)
        if dwarf_sizes:
            has_dwarf = True
            print(f'  DWARF sizes resolved for {len(dwarf_sizes)} symbol(s).')
        else:
            print('  DWARF unavailable -- falling back to address-delta sizing.')

    if args.xml:
        xml_text = Path(args.xml).read_text(errors='replace')
        try:
            symbols = parse_xml_link_info(
                xml_text, section_filter=section,
                dwarf_sizes=dwarf_sizes if has_dwarf else None
            )
        except ValueError as e:
            print(f'ERROR: {e}')
            sys.exit(1)
        source = 'XML link info'
    elif args.ofd:
        text = sys.stdin.read() if args.input is None else Path(args.input).read_text(errors='replace')
        symbols = parse_ofd_symbols(text, section_filter=section)
        source = 'ofd2000'
    else:
        text = sys.stdin.read() if args.input == '-' or args.input is None else Path(args.input).read_text(errors='replace')
        try:
            symbols = parse_map_file(text, section_filter=section,
                                     dwarf_sizes=dwarf_sizes if has_dwarf else None)
        except ValueError as e:
            print(f'ERROR: {e}')
            sys.exit(1)
        source = 'map file'

    if not symbols:
        print(f'ERROR: no symbols found in {section or "any section"} via {source}.')
        if args.xml:
            print('       Verify the XML was generated with --xml_link_info and the section name is correct.')
        elif not args.ofd:
            print('       Verify the .map file is from the TI COFF linker (lnk2000).')
        print('       Try --all-sections to see all available sections.')
        sys.exit(1)

    dwarf_count = sum(1 for s in symbols if s.get('size_source') == 'dwarf')
    delta_count = len(symbols) - dwarf_count
    print(f'Analyzed {len(symbols)} symbols ({dwarf_count} DWARF, {delta_count} delta)')

    pages = assign_pages(symbols)
    page_analyses = [analyze_page(idx, syms) for idx, syms in pages.items()]
    stats = global_stats(page_analyses)
    suggestions = suggest_reorder(page_analyses)

    print_report(page_analyses, stats, suggestions, has_dwarf, warnings)

    if args.html:
        html = build_html(page_analyses, stats, has_dwarf, warnings)
        Path(args.html).write_text(html, encoding='utf-8')
        print(f'HTML report written to {args.html}')


if __name__ == '__main__':
    main()
