manuais-e-documentacao-itguys/.gemini/standardize_filenames.py

142 lines
5.4 KiB
Python

import os
import re
import shutil
ROOT_DIR = os.getcwd()
# Mapping codes to levels
LEVEL_MAP = {
"ITGCLI": "Nível 0",
"ITGSUP": "Nível 1",
"ITGINF": "Nível 2",
"ITGENG": "Nível 3"
}
def sanitize_filename(name):
# Remove invalid chars for Windows filenames
return re.sub(r'[<>:"/\\|?*]', '', name).strip()
def get_metadata(filepath):
"""
Parses the markdown file to extract:
1. Title (H1)
2. Level Code (ITGxxx)
"""
title = None
level = None
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# Regex for Title: # MANUAL TÉCNICO - [TITLE] - [SYSTEM] or just # [TITLE]
# We want to capture the core title.
# Pattern 1: # MANUAL TÉCNICO - TITLE - SYSTEM
match_title_complex = re.search(r'^#\s*MANUAL TÉCNICO\s*-\s*(.*?)\s*-\s*', content, re.MULTILINE | re.IGNORECASE)
# Pattern 2: # TITLE
match_title_simple = re.search(r'^#\s*(.+)$', content, re.MULTILINE)
if match_title_complex:
title = match_title_complex.group(1).strip()
elif match_title_simple:
# If simple, we need to be careful not to include "MANUAL TÉCNICO" if it's there
raw_title = match_title_simple.group(1).strip()
if "MANUAL TÉCNICO" in raw_title.upper():
# Try to extract subpart if possible, or just use it as is but title case
parts = raw_title.split('-')
if len(parts) > 1:
title = parts[1].strip()
else:
title = raw_title
else:
title = raw_title
# Regex for Code: **Código:** ITGSUP 0001/26
match_code = re.search(r'\*\*Código:\*\*\s*(ITG[A-Z]{3})', content)
if match_code:
code = match_code.group(1).strip()
level = LEVEL_MAP.get(code, "Nível ?")
return title, level
def infer_level_from_filename(filename):
"""
Fallback: Extract level from filename prefixes like N1_, N2_, Nivel_1, etc.
"""
if re.match(r'^N1_', filename, re.IGNORECASE): return "Nível 1"
if re.match(r'^N2_', filename, re.IGNORECASE): return "Nível 2"
if re.match(r'^N3_', filename, re.IGNORECASE): return "Nível 3"
if re.match(r'^N0_', filename, re.IGNORECASE): return "Nível 0"
# Try Nivel_X_...
match = re.match(r'^Nivel_(\d+)_', filename, re.IGNORECASE)
if match:
return f"Nível {match.group(1)}"
return None
def standardize_filenames():
print("Starting Filename Standardization...")
count = 0
for item in os.listdir(ROOT_DIR):
theme_dir = os.path.join(ROOT_DIR, item)
# We only care about "documentacao *" folders
if os.path.isdir(theme_dir) and item.startswith("documentacao "):
print(f"Scanning: {item}")
for filename in os.listdir(theme_dir):
if not filename.endswith(".md") or filename.lower() == "readme.md":
continue
filepath = os.path.join(theme_dir, filename)
# Check if file is already normalized: Starts with [Nível
if filename.startswith("[Nível"):
continue
print(f" Processing: {filename}")
title, level = get_metadata(filepath)
# Fallback to filename inference if Metadata missing
if not level:
level = infer_level_from_filename(filename)
if level:
print(f" Inferred Level '{level}' from filename.")
if title and level:
# Construct new name: [Nível X] Title.md
# Title Case for better readability
title_clean = title.title()
new_filename = f"[{level}] {title_clean}.md"
new_filename = sanitize_filename(new_filename)
new_filepath = os.path.join(theme_dir, new_filename)
if filepath != new_filepath:
# Handle collision
if os.path.exists(new_filepath):
print(f" Collision! {new_filename} exists. Skipping rename.")
else:
try:
os.rename(filepath, new_filepath)
print(f" Renamed to: {new_filename}")
count += 1
# Also rename PDF if it exists
pdf_old = filepath.replace(".md", ".pdf")
pdf_new = new_filepath.replace(".md", ".pdf")
if os.path.exists(pdf_old):
if not os.path.exists(pdf_new):
os.rename(pdf_old, pdf_new)
except OSError as e:
print(f" Error renaming: {e}")
else:
print(f" Skipping {filename}: Could not extract Metadata (Title: {title}, Level: {level})")
print(f"Done. Renamed {count} files.")
if __name__ == "__main__":
standardize_filenames()