manuais-e-documentacao-itguys/.gemini/convert_to_pdf.py

import sys
import os
import re
from datetime import datetime
import argparse

# Dependency check
try:
    from fpdf import FPDF
    from fpdf.enums import XPos, YPos
    from fpdf.fonts import FontFace
except ImportError:
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "fpdf2"])
    from fpdf import FPDF
    from fpdf.enums import XPos, YPos
    from fpdf.fonts import FontFace

# Assets
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
LOGO_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_main.png")

# Colors (Premium Palette)
COLOR_PRIMARY = (20, 120, 207)    # #1478cf
COLOR_TEXT_MAIN = (50, 60, 70)    # #323C46
COLOR_BG_LIGHT = (250, 250, 252)
COLOR_INFO_BG = (235, 245, 255)
COLOR_INFO_BORDER = (20, 120, 207)
COLOR_WARN_BG = (255, 248, 235)
COLOR_WARN_BORDER = (255, 165, 0)

# Regex Patterns (CommonMark-inspired for robustness)
RE_HEADER = re.compile(r'^(#{1,6})\s+(.*)$')
RE_UNORDERED_LIST = re.compile(r'^\s*[-+*]\s+(.+)$')
RE_ORDERED_LIST = re.compile(r'^\s*(\d+)[.)]\s+(.+)$')
RE_BLOCKQUOTE = re.compile(r'^>\s*(.*)$')
RE_TABLE_SEP = re.compile(r'^[\|\s\-:]+$')
RE_IMAGE = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)')
RE_CODE_FENCE = re.compile(r'^```\s*(\w*)\s*$')
RE_CHECKBOX = re.compile(r'^\s*[-*+]\s*\[([ xX])\]\s+(.+)$')

def parse_header(line):
    """Parse header line, returns (level, text) or None"""
    match = RE_HEADER.match(line.strip())
    if match:
        return len(match.group(1)), match.group(2).strip()
    return None

def parse_list_item(line):
    """Parse list item, returns (type, content) or None
    type: 'ul' for unordered, 'ol' for ordered, 'cb' for checkbox
    """
    # Check checkbox first (more specific)
    cb_match = RE_CHECKBOX.match(line)
    if cb_match:
        checked = cb_match.group(1).lower() == 'x'
        return ('cb', cb_match.group(2), checked)

    ul_match = RE_UNORDERED_LIST.match(line)
    if ul_match:
        return ('ul', ul_match.group(1), None)

    ol_match = RE_ORDERED_LIST.match(line)
    if ol_match:
        return ('ol', ol_match.group(2), ol_match.group(1))

    return None

def parse_callout_type(content):
    """Detect callout type from content (supports multiple formats)"""
    content_upper = content.upper()

    # GitHub style alerts [!NOTE], [!WARNING], etc.
    if '[!WARNING]' in content_upper or '[!CAUTION]' in content_upper:
        return 'WARN', re.sub(r'\[!(WARNING|CAUTION)\]', '', content, flags=re.IGNORECASE).strip()
    if '[!IMPORTANT]' in content_upper:
        return 'WARN', re.sub(r'\[!IMPORTANT\]', '', content, flags=re.IGNORECASE).strip()
    if '[!NOTE]' in content_upper or '[!TIP]' in content_upper or '[!INFO]' in content_upper:
        return 'INFO', re.sub(r'\[!(NOTE|TIP|INFO)\]', '', content, flags=re.IGNORECASE).strip()

    # Traditional format with emoji or bold text
    if 'IMPORTANTE' in content_upper or 'WARNING' in content_upper or 'ATENÇÃO' in content_upper:
        return 'WARN', content
    if 'NOTA' in content_upper or 'NOTE' in content_upper or 'DICA' in content_upper or 'TIP' in content_upper:
        return 'INFO', content

    # Default to INFO for any blockquote
    return 'INFO', content

def normalize_image_path(md_file, img_path):
    """Normalize image path handling spaces, encoding, etc."""
    try:
        from urllib.parse import unquote
        img_path = unquote(img_path.strip())  # Decode %20 etc.
    except:
        pass

    cwd = os.path.dirname(md_file)
    full_path = os.path.join(cwd, img_path)

    # Try normalized path first
    if os.path.exists(full_path):
        return full_path

    # Try absolute path
    if os.path.exists(img_path):
        return img_path

    # Try with forward slashes converted
    alt_path = os.path.join(cwd, img_path.replace('/', os.sep))
    if os.path.exists(alt_path):
        return alt_path

    return None

class UXPDF(FPDF):
    def header(self):
        if self.page_no() > 1:
            self.set_fill_color(255, 255, 255)
            self.rect(0, 0, self.w, 25, 'F')

            if os.path.exists(LOGO_PATH):
                self.image(LOGO_PATH, x=10, y=8, h=10)

            self.set_draw_color(*COLOR_PRIMARY)
            self.set_line_width(0.5)
            self.line(0, 25, self.w, 25)

            self.set_font('Helvetica', 'B', 10)
            self.set_text_color(*COLOR_PRIMARY)
            self.set_y(10)
            self.cell(0, 10, "MANUAL TÉCNICO", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='R')
            self.ln(20)

    def footer(self):
        # Ignora rodapé na capa (página 1)
        if self.page_no() == 1:
            return

        self.set_y(-20)
        self.set_font('Helvetica', 'I', 8)
        self.set_text_color(100, 100, 100)
        self.set_draw_color(220, 220, 220)
        self.line(10, self.h-20, self.w-10, self.h-20)

        self.set_y(-15)
        self.cell(0, 10, 'iT Guys Solutions - Confidencial', 0, align='L')
        self.set_x(0)
        self.cell(0, 10, f'Página {self.page_no()}/{{nb}}', 0, align='R')

def clean_markdown(text):
    text = text.replace('**', '').replace('`', '')
    return text.encode('latin-1', 'replace').decode('latin-1')

def safe_text(text):
    text = text.replace('ℹ️', '').replace('ℹ', '').replace('⚠️', '').replace('🚀', '')
    text = text.replace('“', '"').replace('”', '"').replace('’', "'")
    return text.encode('latin-1', 'replace').decode('latin-1')

def make_links_clickable(text):
    text = re.sub(r'`(https?://[^`]+)`', r'[\1](\1)', text)
    return text

def process_variables(text):
    now = datetime.now()
    replacements = {
        '{{DATA_ATUAL}}': now.strftime("%d/%m/%Y"),
        '{{ANO}}': str(now.year)
    }
    for k, v in replacements.items():
        if k in text:
            text = text.replace(k, v)
    return text

def render_callout(pdf, text, type='INFO'):
    pdf.ln(5)
    saved_x = pdf.get_x()
    saved_y = pdf.get_y()

    if type == 'WARN' or '[IMPORTANTE]' in text:
        bg = COLOR_WARN_BG
        border = COLOR_WARN_BORDER
        label = "IMPORTANTE"
    else:
        bg = COLOR_INFO_BG
        border = COLOR_INFO_BORDER
        label = "NOTA"

    pdf.set_fill_color(*bg)
    pdf.set_draw_color(*bg)

    pdf.set_line_width(1.5)
    pdf.set_draw_color(*border)
    pdf.set_x(pdf.l_margin + 2)

    pdf.set_font('Helvetica', 'B', 9)
    pdf.set_text_color(*border)
    pdf.cell(0, 5, label, new_x=XPos.LMARGIN, new_y=YPos.NEXT)

    pdf.set_font('Helvetica', '', 10)
    pdf.set_text_color(*COLOR_TEXT_MAIN)
    pdf.set_x(pdf.l_margin + 2)
    text = make_links_clickable(text)
    pdf.multi_cell(0, 6, safe_text(text), fill=True, markdown=True)

    end_y = pdf.get_y()
    pdf.set_draw_color(*border)
    pdf.line(pdf.l_margin, saved_y, pdf.l_margin, end_y)

    # Reset colors explicitly to avoid bleeding
    pdf.set_fill_color(255, 255, 255)
    pdf.set_text_color(*COLOR_TEXT_MAIN)
    pdf.ln(5)

def convert(md_file, pdf_file):
    pdf = UXPDF()
    pdf.set_auto_page_break(auto=True, margin=20)
    pdf.set_title("Manual Técnico iT Guys")

    # --- Cover Page ---
    pdf.add_page()
    pdf.set_fill_color(*COLOR_PRIMARY)
    pdf.rect(0, 0, 15, 297, 'F')

    if os.path.exists(LOGO_PATH):
        pdf.image(LOGO_PATH, x=40, y=50, w=100)

    # Extract Title from MD (Assume First H1)
    # Generic placeholder if not found
    doc_title = "Documentação Técnica"
    doc_subtitle = "Guia Oficial iT Guys"

    # Pre-read to find title for Cover
    with open(md_file, 'r', encoding='utf-8') as f:
        pre_lines = f.readlines()
        for line in pre_lines:
            if line.startswith('# '):
                doc_title = line[2:].strip().replace('MANUAL TÉCNICO - ', '') # Cleanup
                break

    pdf.set_y(140)
    pdf.set_x(30)
    pdf.set_font('Helvetica', 'B', 32)
    pdf.set_text_color(*COLOR_PRIMARY)
    pdf.multi_cell(0, 12, safe_text(doc_title), align='L')

    pdf.set_y(180)
    pdf.set_x(30)
    pdf.set_font('Helvetica', '', 16)
    pdf.set_text_color(*COLOR_TEXT_MAIN)
    pdf.multi_cell(0, 8, safe_text(doc_subtitle), align='L')

    pdf.set_y(-30)
    pdf.set_x(30)
    pdf.set_font('Helvetica', 'B', 10)
    pdf.set_text_color(*COLOR_PRIMARY)
    pdf.cell(0, 10, "iT GUYS SOLUTIONS")

    # Content
    pdf.add_page()

    with open(md_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    in_code_block = False
    table_buffer = []
    first_h1_skipped = False

    for i, line in enumerate(lines):
        line = line.strip()
        line = process_variables(line)

        # Robust Reset at start of line processing
        pdf.set_fill_color(255, 255, 255)
        pdf.set_text_color(*COLOR_TEXT_MAIN)
        pdf.set_font('Helvetica', '', 11)

        # --- Tables ---
        if line.startswith('|'):
            table_buffer.append(line)
            continue

        if table_buffer:
            headers = [c.strip() for c in table_buffer[0].split('|') if c.strip()]
            data = []
            for r_line in table_buffer[1:]:
                if RE_TABLE_SEP.match(r_line): continue  # Skip separator line
                cols = [c.strip() for c in r_line.split('|') if c.strip()]
                if cols: data.append(cols)

            table_buffer = []
            if headers:
                pdf.ln(5)
                pdf.set_draw_color(*COLOR_PRIMARY)
                pdf.set_line_width(0.3)

                is_tech_data = ("Campo" in headers[0] or "Valor" in headers[1]) and len(headers) == 2
                col_widths = (30, 70) if is_tech_data else None

                with pdf.table(
                    text_align="LEFT",
                    col_widths=col_widths,
                    line_height=7
                ) as table:
                    row = table.row()
                    for h in headers:
                        h_clean = clean_markdown(h)
                        row.cell(h_clean, style=FontFace(emphasis="BOLD", color=(255,255,255), fill_color=COLOR_PRIMARY))

                    for d_row in data:
                        row = table.row()
                        for idx, d in enumerate(d_row):
                            d_clean = clean_markdown(d)
                            emphasis = None
                            if is_tech_data and idx == 0:
                                emphasis = "BOLD"
                            row.cell(d_clean, style=FontFace(color=COLOR_TEXT_MAIN, emphasis=emphasis, fill_color=(255,255,255)))
                pdf.ln(5)

        if not line:
            if not in_code_block: pdf.ln(3)
            continue

        # Code fences (robust: accepts spaces after ```)
        if RE_CODE_FENCE.match(line) or line.strip().startswith('```'):
            in_code_block = not in_code_block
            continue

        if in_code_block:
            pdf.set_font('Courier', '', 9.5)
            pdf.set_text_color(50, 50, 50)
            pdf.set_fill_color(245, 245, 245)
            pdf.set_x(pdf.l_margin + 5)
            pdf.multi_cell(0, 5, safe_text(line), fill=True, border=0)
            continue

        # Headers (using robust regex parser)
        header = parse_header(line)
        if header:
            level, text = header

            if level == 1:  # H1
                if not first_h1_skipped:
                    first_h1_skipped = True
                    continue

                if pdf.page_no() > 2 or pdf.get_y() > 60:
                    pdf.add_page()

                pdf.set_font('Helvetica', 'B', 20)
                pdf.set_text_color(*COLOR_PRIMARY)
                pdf.multi_cell(0, 10, safe_text(text), fill=False)
                pdf.ln(5)
                y = pdf.get_y()
                pdf.set_draw_color(*COLOR_PRIMARY)
                pdf.line(pdf.l_margin, y, 210-pdf.r_margin, y)
                pdf.ln(10)

            elif level == 2:  # H2
                pdf.ln(8)
                pdf.set_font('Helvetica', 'B', 14)
                pdf.set_text_color(*COLOR_PRIMARY)
                pdf.multi_cell(0, 8, safe_text(text), fill=False)
                pdf.ln(2)

            elif level == 3:  # H3
                pdf.ln(4)
                pdf.set_font('Helvetica', 'B', 12)
                pdf.set_text_color(*COLOR_TEXT_MAIN)
                pdf.multi_cell(0, 6, safe_text(text), fill=False)

            elif level >= 4:  # H4+
                pdf.ln(3)
                pdf.set_font('Helvetica', 'B', 11)
                pdf.set_text_color(*COLOR_TEXT_MAIN)
                pdf.multi_cell(0, 5, safe_text(text), fill=False)

            continue

        # Images (robust path handling)
        img_match = RE_IMAGE.search(line)
        if img_match or (line.startswith('![') and '](' in line):
            if img_match:
                img_path = img_match.group(2)
            else:
                match = re.search(r'\(([^)]+)\)', line)
                img_path = match.group(1) if match else None

            if img_path:
                full_path = normalize_image_path(md_file, img_path)
                if full_path:
                    pdf.ln(5)
                    try:
                        x = (pdf.w - 110)/2
                        pdf.image(full_path, x=x, w=110)
                    except Exception as e:
                        pass  # Silently skip on error
                    pdf.ln(5)
            continue

        # Blockquotes/Callouts (robust detection)
        bq_match = RE_BLOCKQUOTE.match(line)
        if bq_match or line.startswith('>'):
            content = bq_match.group(1) if bq_match else line[1:].strip()
            c_type, clean_content = parse_callout_type(content)
            render_callout(pdf, clean_content, c_type)
            continue

        # Lists and regular text (robust detection)
        pdf.set_fill_color(255, 255, 255)
        pdf.set_font('Helvetica', '', 11)
        pdf.set_text_color(*COLOR_TEXT_MAIN)

        list_item = parse_list_item(line)
        line_processed = make_links_clickable(line)

        if list_item:
            item_type, content, extra = list_item
            pdf.set_x(pdf.l_margin + 6)
            if item_type == 'cb':
                # Checkbox
                checkbox = '[x]' if extra else '[ ]'
                pdf.multi_cell(0, 7, safe_text(f"{checkbox} {content}"), markdown=True, fill=False)
            else:
                pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False)
        else:
            pdf.set_x(pdf.l_margin)
            pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False)

    pdf.output(pdf_file)
    print(f"PDF Generated: {pdf_file}")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python convert_to_pdf.py <input.md> [output.pdf]")
        sys.exit(1)

    md_in = sys.argv[1]
    if len(sys.argv) >= 3:
        pdf_out = sys.argv[2]
    else:
        pdf_out = os.path.splitext(md_in)[0] + ".pdf"

    convert(md_in, pdf_out)