import os import re import shutil ROOT_DIR = os.getcwd() # Mapping codes to levels LEVEL_MAP = { "ITGCLI": "Nível 0", "ITGSUP": "Nível 1", "ITGINF": "Nível 2", "ITGENG": "Nível 3" } def sanitize_filename(name): # Remove invalid chars for Windows filenames return re.sub(r'[<>:"/\\|?*]', '', name).strip() def get_metadata(filepath): """ Parses the markdown file to extract: 1. Title (H1) 2. Level Code (ITGxxx) """ title = None level = None with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # Regex for Title: # MANUAL TÉCNICO - [TITLE] - [SYSTEM] or just # [TITLE] # We want to capture the core title. # Pattern 1: # MANUAL TÉCNICO - TITLE - SYSTEM match_title_complex = re.search(r'^#\s*MANUAL TÉCNICO\s*-\s*(.*?)\s*-\s*', content, re.MULTILINE | re.IGNORECASE) # Pattern 2: # TITLE match_title_simple = re.search(r'^#\s*(.+)$', content, re.MULTILINE) if match_title_complex: title = match_title_complex.group(1).strip() elif match_title_simple: # If simple, we need to be careful not to include "MANUAL TÉCNICO" if it's there raw_title = match_title_simple.group(1).strip() if "MANUAL TÉCNICO" in raw_title.upper(): # Try to extract subpart if possible, or just use it as is but title case parts = raw_title.split('-') if len(parts) > 1: title = parts[1].strip() else: title = raw_title else: title = raw_title # Regex for Code: **Código:** ITGSUP 0001/26 match_code = re.search(r'\*\*Código:\*\*\s*(ITG[A-Z]{3})', content) if match_code: code = match_code.group(1).strip() level = LEVEL_MAP.get(code, "Nível ?") return title, level def infer_level_from_filename(filename): """ Fallback: Extract level from filename prefixes like N1_, N2_, Nivel_1, etc. """ if re.match(r'^N1_', filename, re.IGNORECASE): return "Nível 1" if re.match(r'^N2_', filename, re.IGNORECASE): return "Nível 2" if re.match(r'^N3_', filename, re.IGNORECASE): return "Nível 3" if re.match(r'^N0_', filename, re.IGNORECASE): return "Nível 0" # Try Nivel_X_... match = re.match(r'^Nivel_(\d+)_', filename, re.IGNORECASE) if match: return f"Nível {match.group(1)}" return None def standardize_filenames(): print("Starting Filename Standardization...") count = 0 for item in os.listdir(ROOT_DIR): theme_dir = os.path.join(ROOT_DIR, item) # We only care about "documentacao *" folders if os.path.isdir(theme_dir) and item.startswith("documentacao "): print(f"Scanning: {item}") for filename in os.listdir(theme_dir): if not filename.endswith(".md") or filename.lower() == "readme.md": continue filepath = os.path.join(theme_dir, filename) # Check if file is already normalized: Starts with [Nível if filename.startswith("[Nível"): continue print(f" Processing: {filename}") title, level = get_metadata(filepath) # Fallback to filename inference if Metadata missing if not level: level = infer_level_from_filename(filename) if level: print(f" Inferred Level '{level}' from filename.") if title and level: # Construct new name: [Nível X] Title.md # Title Case for better readability title_clean = title.title() new_filename = f"[{level}] {title_clean}.md" new_filename = sanitize_filename(new_filename) new_filepath = os.path.join(theme_dir, new_filename) if filepath != new_filepath: # Handle collision if os.path.exists(new_filepath): print(f" Collision! {new_filename} exists. Skipping rename.") else: try: os.rename(filepath, new_filepath) print(f" Renamed to: {new_filename}") count += 1 # Also rename PDF if it exists pdf_old = filepath.replace(".md", ".pdf") pdf_new = new_filepath.replace(".md", ".pdf") if os.path.exists(pdf_old): if not os.path.exists(pdf_new): os.rename(pdf_old, pdf_new) except OSError as e: print(f" Error renaming: {e}") else: print(f" Skipping {filename}: Could not extract Metadata (Title: {title}, Level: {level})") print(f"Done. Renamed {count} files.") if __name__ == "__main__": standardize_filenames()