manuais-e-documentacao-itguys/.gemini/standardize_filenames.py

import os
import re
import shutil

ROOT_DIR = os.getcwd()

# Mapping codes to levels
LEVEL_MAP = {
    "ITGCLI": "Nível 0",
    "ITGSUP": "Nível 1",
    "ITGINF": "Nível 2",
    "ITGENG": "Nível 3"
}

def sanitize_filename(name):
    # Remove invalid chars for Windows filenames
    return re.sub(r'[<>:"/\\|?*]', '', name).strip()

def get_metadata(filepath):
    """
    Parses the markdown file to extract:
    1. Title (H1)
    2. Level Code (ITGxxx)
    """
    title = None
    level = None

    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()

        # Regex for Title: # MANUAL TÉCNICO - [TITLE] - [SYSTEM] or just # [TITLE]
        # We want to capture the core title.
        # Pattern 1: # MANUAL TÉCNICO - TITLE - SYSTEM
        match_title_complex = re.search(r'^#\s*MANUAL TÉCNICO\s*-\s*(.*?)\s*-\s*', content, re.MULTILINE | re.IGNORECASE)
        # Pattern 2: # TITLE
        match_title_simple = re.search(r'^#\s*(.+)$', content, re.MULTILINE)

        if match_title_complex:
            title = match_title_complex.group(1).strip()
        elif match_title_simple:
            # If simple, we need to be careful not to include "MANUAL TÉCNICO" if it's there
            raw_title = match_title_simple.group(1).strip()
            if "MANUAL TÉCNICO" in raw_title.upper():
                # Try to extract subpart if possible, or just use it as is but title case
                parts = raw_title.split('-')
                if len(parts) > 1:
                    title = parts[1].strip()
                else:
                    title = raw_title
            else:
                title = raw_title

        # Regex for Code: **Código:** ITGSUP 0001/26
        match_code = re.search(r'\*\*Código:\*\*\s*(ITG[A-Z]{3})', content)
        if match_code:
            code = match_code.group(1).strip()
            level = LEVEL_MAP.get(code, "Nível ?")

    return title, level

def infer_level_from_filename(filename):
    """
    Fallback: Extract level from filename prefixes like N1_, N2_, Nivel_1, etc.
    """
    if re.match(r'^N1_', filename, re.IGNORECASE): return "Nível 1"
    if re.match(r'^N2_', filename, re.IGNORECASE): return "Nível 2"
    if re.match(r'^N3_', filename, re.IGNORECASE): return "Nível 3"
    if re.match(r'^N0_', filename, re.IGNORECASE): return "Nível 0"

    # Try Nivel_X_...
    match = re.match(r'^Nivel_(\d+)_', filename, re.IGNORECASE)
    if match:
        return f"Nível {match.group(1)}"

    return None

def standardize_filenames():
    print("Starting Filename Standardization...")

    count = 0

    for item in os.listdir(ROOT_DIR):
        theme_dir = os.path.join(ROOT_DIR, item)

        # We only care about "documentacao *" folders
        if os.path.isdir(theme_dir) and item.startswith("documentacao "):
            print(f"Scanning: {item}")

            for filename in os.listdir(theme_dir):
                if not filename.endswith(".md") or filename.lower() == "readme.md":
                    continue

                filepath = os.path.join(theme_dir, filename)

                # Check if file is already normalized: Starts with [Nível
                if filename.startswith("[Nível"):
                    continue

                print(f"  Processing: {filename}")
                title, level = get_metadata(filepath)

                # Fallback to filename inference if Metadata missing
                if not level:
                    level = infer_level_from_filename(filename)
                    if level:
                        print(f"    Inferred Level '{level}' from filename.")

                if title and level:
                    # Construct new name: [Nível X] Title.md
                    # Title Case for better readability
                    title_clean = title.title()
                    new_filename = f"[{level}] {title_clean}.md"
                    new_filename = sanitize_filename(new_filename)

                    new_filepath = os.path.join(theme_dir, new_filename)

                    if filepath != new_filepath:
                         # Handle collision
                        if os.path.exists(new_filepath):
                            print(f"    Collision! {new_filename} exists. Skipping rename.")
                        else:
                            try:
                                os.rename(filepath, new_filepath)
                                print(f"    Renamed to: {new_filename}")
                                count += 1

                                # Also rename PDF if it exists
                                pdf_old = filepath.replace(".md", ".pdf")
                                pdf_new = new_filepath.replace(".md", ".pdf")
                                if os.path.exists(pdf_old):
                                    if not os.path.exists(pdf_new):
                                        os.rename(pdf_old, pdf_new)
                            except OSError as e:
                                print(f"    Error renaming: {e}")
                else:
                    print(f"    Skipping {filename}: Could not extract Metadata (Title: {title}, Level: {level})")

    print(f"Done. Renamed {count} files.")

if __name__ == "__main__":
    standardize_filenames()