manuais-e-documentacao-itguys/.gemini/convert_to_pdf.py

import sys
import os
import re
from datetime import datetime

# Dependency check
try:
    from fpdf import FPDF
    from fpdf.enums import XPos, YPos
    from fpdf.fonts import FontFace
except ImportError:
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "fpdf2"])
    from fpdf import FPDF
    from fpdf.enums import XPos, YPos
    from fpdf.fonts import FontFace

# Assets
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
LOGO_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_main.png")
LOGO_FOOTER_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_footer.png")

# Colors (Premium Palette)
COLOR_PRIMARY = (20, 120, 207)      # #1478cf (Blue)
COLOR_SECONDARY = (0, 247, 255)     # #00f7ff (Cyan)
COLOR_ACCENT = (46, 204, 113)       # #2ecc71 (Green)
COLOR_TEXT_MAIN = (50, 60, 70)      # Dark Grey (Body)
COLOR_BG_LIGHT = (250, 250, 252)

# Specific Header/Section Colors
COLOR_HEADER_BG = (20, 120, 207)     # #1478cf (Blue)
COLOR_SECTION_BG = (235, 242, 250)   # Light Blue
COLOR_SECTION_TEXT = (20, 80, 140)   # Dark Blue

# Terminal Code Block Colors
COLOR_CODE_BG = (30, 30, 30)         # #1e1e1e (Dark Terminal)
COLOR_CODE_TEXT = (220, 220, 220)    # Off-white
COLOR_CODE_KEYWORD = (86, 156, 214)  # Blue (VSCode-like)
COLOR_CODE_STRING = (206, 145, 120)  # Orange/Red
COLOR_CODE_COMMENT = (106, 153, 85)  # Green

# Callout Colors
COLOR_INFO_BG = (240, 248, 255)      # AliceBlue
COLOR_INFO_BORDER = (20, 120, 207)
COLOR_WARN_BG = (255, 248, 235)
COLOR_WARN_BORDER = (255, 165, 0)

# Regex Patterns
RE_HEADER = re.compile(r'^(#{1,6})\s+(.*)$')
RE_UNORDERED_LIST = re.compile(r'^\s*[-+*]\s+(.+)$')
RE_ORDERED_LIST = re.compile(r'^\s*(\d+)[.)]\s+(.+)$')
RE_BLOCKQUOTE = re.compile(r'^>\s*(.*)$')
RE_TABLE_SEP = re.compile(r'^[\|\s\-:]+$')
RE_IMAGE = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)')
RE_CODE_FENCE = re.compile(r'^```\s*(\w*)\s*$')
RE_CHECKBOX = re.compile(r'^\s*[-*+]\s*\[([ xX])\]\s+(.+)$')
RE_METADATA = re.compile(r'(?:\*\*)?([a-zA-Z0-9çãáéíóúÁÉÍÓÚçÇ\s]+)(?:\*\*)?:\s*(.*?)(?=$|\||\*\*)')

def process_variables(text):
    now = datetime.now()
    replacements = {
        '{{DATA_ATUAL}}': now.strftime("%d/%m/%Y"),
        '{{ANO}}': str(now.year)
    }
    for k, v in replacements.items():
        if k in text:
            text = text.replace(k, v)
    return text

def clean_markdown(text):
    text = text.replace('**', '').replace('`', '')
    return text.encode('latin-1', 'replace').decode('latin-1')

def safe_text(text):
    text = text.replace('ℹ️', '').replace('ℹ', '').replace('⚠️', '').replace('🚀', '')
    text = text.replace('“', '"').replace('”', '"').replace('’', "'").replace('–', '-')
    return text.encode('latin-1', 'replace').decode('latin-1')

def make_links_clickable(text):
    text = re.sub(r'`(https?://[^`]+)`', r'[\1](\1)', text)
    return text

def parse_header(line):
    match = RE_HEADER.match(line.strip())
    if match: return len(match.group(1)), match.group(2).strip()
    return None

def parse_list_item(line):
    cb_match = RE_CHECKBOX.match(line)
    if cb_match:
        checked = cb_match.group(1).lower() == 'x'
        return ('cb', cb_match.group(2), checked)
    ul_match = RE_UNORDERED_LIST.match(line)
    if ul_match: return ('ul', ul_match.group(1), None)
    ol_match = RE_ORDERED_LIST.match(line)
    if ol_match: return ('ol', ol_match.group(2), ol_match.group(1))
    return None

def parse_callout_type(content):
    content_upper = content.upper()
    if any(x in content_upper for x in ['[!WARNING]', '[!CAUTION]', '[!IMPORTANT]', 'IMPORTANTE', 'WARNING', 'ATENÇÃO']):
        clean = re.sub(r'\[!(WARNING|CAUTION|IMPORTANT)\]', '', content, flags=re.IGNORECASE).strip()
        return 'WARN', clean
    clean = re.sub(r'\[!(NOTE|TIP|INFO)\]', '', content, flags=re.IGNORECASE).strip()
    return 'INFO', clean

class UXPDF(FPDF):
    def __init__(self, metadata=None):
        super().__init__()
        self.metadata = metadata or {}

    def header(self):
        # Header rendered inside body logic for flexibility, or simple page header here
        pass

    def footer(self):
        if self.page_no() == 1: return

        self.set_y(-35)
        self.set_draw_color(0, 0, 0)
        self.set_line_width(0.5)
        self.line(10, self.get_y(), self.w-10, self.get_y())

        self.ln(2)
        start_y = self.get_y()

        # Logo Footer (Left)
        if os.path.exists(LOGO_FOOTER_PATH):
            self.image(LOGO_FOOTER_PATH, x=10, y=start_y, h=12)

        # Address Block (Right)
        self.set_font('Helvetica', '', 8)
        self.set_text_color(80, 80, 80)

        address_lines = [
            "IT Guys Consultoria em Informática Ltda.",
            "Rua Tem. Ronald Santoro 183 - Sala 203",
            "CEP 23080-270 - Rio de Janeiro - RJ",
            "Fone: (21) 96634-4698",
            "www.itguys.com.br"
        ]

        self.set_y(start_y)
        for line in address_lines:
            self.cell(0, 3.5, safe_text(line), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='R')

        # Page Number (Bottom Right or Left)
        self.set_y(-10)
        self.set_font('Helvetica', 'I', 8)
        self.cell(0, 10, f'Página {self.page_no()}/{{nb}}', 0, align='R')

    def render_h1(self, text):
        if self.page_no() > 2 or self.get_y() > 200: self.add_page()
        self.ln(5)

        # Blue Bar Background
        self.set_fill_color(*COLOR_HEADER_BG)
        self.rect(10, self.get_y(), self.w-20, 12, 'F')

        # Green Accent
        self.set_fill_color(*COLOR_ACCENT)
        self.rect(10, self.get_y(), 3, 12, 'F')

        # Text
        self.set_xy(16, self.get_y() + 3)
        self.set_font('Helvetica', 'B', 12)
        self.set_text_color(255, 255, 255)
        self.cell(0, 6, safe_text(text).upper(), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
        self.ln(6)

    def render_h2(self, text):
        self.ln(5)

        # Light Blue Bar
        self.set_fill_color(*COLOR_SECTION_BG)
        self.rect(10, self.get_y(), self.w-20, 8, 'F')

        # Green Accent
        self.set_fill_color(*COLOR_ACCENT)
        self.rect(10, self.get_y(), 3, 8, 'F')

        # Text
        self.set_xy(16, self.get_y() + 1.5)
        self.set_font('Helvetica', 'B', 11)
        self.set_text_color(*COLOR_SECTION_TEXT)
        self.cell(0, 6, safe_text(text).upper(), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
        self.ln(4)

    def render_callout_block(self, lines, type='INFO'):
        self.ln(3)
        bg = COLOR_WARN_BG if type == 'WARN' else COLOR_INFO_BG
        border = COLOR_WARN_BORDER if type == 'WARN' else COLOR_INFO_BORDER
        label = "IMPORTANTE" if type == 'WARN' else "NOTA"

        # Calculate Height
        self.set_font('Helvetica', '', 10)
        line_height = 5
        total_height = 0

        # Header height
        total_height += 8

        # Content height estimation
        wrapped_lines = []
        for line in lines:
            # clean callout markers from content
            clean = line
            # Remove > [!NOTE] etc again if strictly needed, but parsed content should be clean
            # We assume 'lines' contains cleaner content

            # Very rough wrap estimation
            total_height += max(1, len(line) // 90 + 1) * line_height

        # Draw Box
        start_y = self.get_y()
        self.set_fill_color(*bg)
        self.set_draw_color(*border)
        self.set_line_width(0.5)

        # Left thick border
        self.set_fill_color(*border)
        self.rect(10, start_y, 2, total_height, 'F')

        # Background
        self.set_fill_color(*bg)
        self.rect(12, start_y, self.w-22, total_height, 'F')

        # Label
        self.set_xy(15, start_y + 2)
        self.set_font('Helvetica', 'B', 9)
        self.set_text_color(*border)
        self.cell(0, 5, label)

        # Content
        self.set_xy(15, start_y + 8)
        self.set_font('Helvetica', '', 10)
        self.set_text_color(*COLOR_TEXT_MAIN)

        for line in lines:
            self.set_x(15)
            self.multi_cell(0, 5, safe_text(line), markdown=True)

        self.set_y(start_y + total_height + 2)
        self.set_text_color(*COLOR_TEXT_MAIN) # Reset

    def render_code_block(self, lines, lang=''):
        self.ln(3)
        self.set_font('Courier', '', 10) # 10pt as requested
        line_height = 5
        padding = 4

        box_width = self.w - 20
        box_height = (len(lines) * line_height) + (padding * 2)

        # Page break check
        if self.get_y() + box_height > self.h - 40: # increased safe zone for footer
            self.add_page()

        start_y = self.get_y()
        start_x = 10

        # Dark Terminal Background
        self.set_fill_color(*COLOR_CODE_BG)
        self.rect(start_x, start_y, box_width, box_height, 'F')

        # Render lines with syntax highlighting
        current_y = start_y + padding
        self.set_x(start_x + padding)

        for line in lines:
            self.set_xy(start_x + padding, current_y)
            self.highlight_code_line(line, lang)
            current_y += line_height

        self.set_y(start_y + box_height + 5)
        self.set_text_color(*COLOR_TEXT_MAIN) # Reset

    def highlight_code_line(self, line, lang):
        # Default Off-White
        self.set_text_color(*COLOR_CODE_TEXT)

        # Simple Regex Highlighting
        # 1. Comments
        comment_match = None
        if '#' in line: comment_match = line.index('#')
        elif '//' in line: comment_match = line.index('//')

        if comment_match is not None:
            code_part = line[:comment_match]
            comm_part = line[comment_match:]
            self.write_code_text(code_part, lang)
            self.set_text_color(*COLOR_CODE_COMMENT)
            self.write(5, safe_text(comm_part))
            return

        self.write_code_text(line, lang)

    def write_code_text(self, text, lang):
        # Tokenizer for keywords/strings (Very basic)
        tokens = re.split(r'(\s+|"[^"]*"|\'[^\']*\'|[-a-zA-Z0-9_]+)', text)
        for token in tokens:
            if not token: continue

            # String
            if(token.startswith('"') or token.startswith("'")):
                self.set_text_color(*COLOR_CODE_STRING)
            # Keywords (Broad set)
            elif token.lower() in ['sudo', 'apt', 'docker', 'install', 'git', 'systemctl', 'service',
                                   'echo', 'cat', 'grep', 'ls', 'cd', 'pwd', 'chmod', 'chown',
                                   'def', 'class', 'return', 'import', 'from', 'if', 'else', 'elif',
                                   'for', 'while', 'try', 'except', 'select', 'insert', 'update', 'delete',
                                   'create', 'table', 'int', 'varchar', 'bool', 'true', 'false', 'null']:
                self.set_text_color(*COLOR_CODE_KEYWORD)
            # Flags
            elif token.startswith('-'):
                self.set_text_color(*COLOR_CODE_KEYWORD)
            # Variables
            elif token.startswith('$'):
                self.set_text_color(*COLOR_CODE_KEYWORD)
            else:
                self.set_text_color(*COLOR_CODE_TEXT)

            self.write(5, safe_text(token))

def convert(md_file, pdf_file):
    # Parse Metadata First
    metadata = {}
    with open(md_file, 'r', encoding='utf-8') as f:
        head = [next(f) for _ in range(20)]

    for line in head:
        # Process variables in header lines too to catch dates
        line = process_variables(line)
        # Split by pipe if exists
        parts = line.split('|')
        for part in parts:
            if ':' in part:
                # Remove ** from potential key
                clean_part = part.strip()
                # Simple split/parse
                if ':' in clean_part:
                     k, v = clean_part.split(':', 1)
                     key = k.replace('*', '').strip().lower().replace('á','a').replace('ç','c')
                     val = v.replace('*', '').strip() # Clean metadata value

                     if 'codigo' in key: metadata['code'] = val
                     elif 'responsavel' in key or 'autor' in key: metadata['author'] = val
                     elif 'classificacao' in key: metadata['class'] = val
                     elif 'data' in key: metadata['date'] = val

    pdf = UXPDF(metadata)
    pdf = UXPDF(metadata)
    pdf.set_auto_page_break(auto=False) # Disable auto-break for manual cover positioning
    pdf.set_title("Manual Técnico iT Guys")

    # --- Cover Page ---
    pdf.add_page()
    pdf.set_fill_color(*COLOR_PRIMARY)
    pdf.rect(0, 0, 15, 297, 'F')

    if os.path.exists(LOGO_PATH):
        pdf.image(LOGO_PATH, x=40, y=50, w=100)

    # Title extraction
    doc_title = "DOCUMENTAÇÃO TÉCNICA"
    with open(md_file, 'r', encoding='utf-8') as f:
        for line in f:
            if line.startswith('# '):
                doc_title = line[2:].strip().replace('MANUAL TÉCNICO - ', '')
                break

    pdf.set_y(140)
    pdf.set_x(30)
    pdf.set_font('Helvetica', 'B', 32)
    pdf.set_text_color(*COLOR_PRIMARY)
    pdf.multi_cell(0, 12, safe_text(doc_title).upper(), align='L')

    # Metadata Block
    pdf.set_y(180)
    pdf.set_x(30)

    meta_lines = []
    if 'code' in metadata: meta_lines.append(f"Código: {metadata['code']}")
    if 'class' in metadata: meta_lines.append(f"Classificação: {metadata['class']}")
    if 'author' in metadata: meta_lines.append(f"Responsável: {metadata['author']}")
    if 'date' in metadata: meta_lines.append(f"Data: {metadata['date']}")

    if meta_lines:
        pdf.set_font('Helvetica', '', 14)
        pdf.set_text_color(80, 80, 80)
        for line in meta_lines:
            pdf.set_x(30)
            pdf.cell(0, 8, safe_text(line), ln=True)

    # Branding
    pdf.set_y(-30)
    pdf.set_x(30)
    pdf.set_font('Helvetica', 'B', 10)
    pdf.set_text_color(*COLOR_PRIMARY)
    pdf.cell(0, 10, "iT GUYS SOLUTIONS")

    # --- Content ---
    pdf.add_page()
    pdf.set_auto_page_break(auto=True, margin=40) # Enable auto-break with safe margin for content

    with open(md_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # Buffers
    code_buffer = []
    in_code = False
    code_lang = ''

    callout_buffer = []
    callout_type = 'INFO'
    in_callout = False

    table_buffer = []

    i = 0
    while i < len(lines):
        line = lines[i].strip()
        line = process_variables(line)
        original_line = process_variables(lines[i]) # Preserve spaces with vars processed

        # 1. Code Blocks
        if line.startswith('```'):
            if in_code:
                # Flush Code
                pdf.render_code_block(code_buffer, code_lang)
                code_buffer = []
                in_code = False
            else:
                # Start Code
                in_code = True
                code_lang = line.replace('```', '').strip()
            i += 1
            continue

        if in_code:
            code_buffer.append(lines[i].rstrip()) # keep indentation
            i += 1
            continue

        # 2. Callouts
        bq_match = RE_BLOCKQUOTE.match(original_line)
        if bq_match:
            content = bq_match.group(1)
            c_type, clean_content = parse_callout_type(content)

            if not in_callout:
                in_callout = True
                callout_type = c_type
                callout_buffer = [clean_content]
            else:
                if c_type == callout_type:
                    callout_buffer.append(clean_content)
                else:
                    # Flush previous, start new
                    pdf.render_callout_block(callout_buffer, callout_type)
                    callout_type = c_type
                    callout_buffer = [clean_content]
            i += 1
            continue
        elif in_callout:
             # Check if next line is empty or not a quote
             if not line:
                 # End of callout block?
                 # Often empty lines separate quotes. If next line is quote, keep going?
                 # Let's peek ahead
                 if i+1 < len(lines) and lines[i+1].strip().startswith('>'):
                      # Just a gap in quotes
                      pass
                 else:
                      pdf.render_callout_block(callout_buffer, callout_type)
                      in_callout = False
                      callout_buffer = []
             else:
                 # Broken block
                 pdf.render_callout_block(callout_buffer, callout_type)
                 in_callout = False
                 callout_buffer = []
                 # Don't increment i, process this line normally
                 continue

             i += 1
             continue

        # 3. Tables
        if line.startswith('|'):
            table_buffer.append(line)
            i += 1
            continue
        elif table_buffer:
            # Flush Table
            headers = [c.strip() for c in table_buffer[0].split('|') if c.strip()]
            data = []
            for r_line in table_buffer[1:]:
                if RE_TABLE_SEP.match(r_line): continue
                cols = [c.strip() for c in r_line.split('|') if c.strip()]
                if cols: data.append(cols)

            pdf.ln(5)
            # Render Table Logic
            # Table Header Style: Blue background, White text
            # Table Body Style: Light Blue/White alternating or just Light Blue to match 'Image 3' style request?
            # User said "Image 2 (Green body) colors don't match Image 3 style (Light Blue)".
            # So let's make the table body Light Blue or White. To be safe/clean: White with Light Blue header?
            # actually Image 3 has Light Blue background. Let's try Light Blue for Header, White for body, or Light Blue for all?
            # Let's go with Blue Header (Primary), White/Light Grey Body for readability.
            # IMPORTANT: Reset fill color before table to avoid leaks!
            pdf.set_fill_color(255, 255, 255)

            with pdf.table(text_align="LEFT", line_height=7) as table:
                row = table.row()
                for h in headers:
                    row.cell(clean_markdown(h), style=FontFace(emphasis="BOLD", color=(255,255,255), fill_color=COLOR_PRIMARY))
                for d_row in data:
                    row = table.row()
                    for d in d_row:
                        # Explicitly white background to fix green leak
                        row.cell(clean_markdown(d), style=FontFace(fill_color=(255, 255, 255), color=COLOR_TEXT_MAIN))
            pdf.ln(5)
            table_buffer = []
            # Don't skip current line processing if it wasn't a table line
            continue

        # 4. Headers
        if line.startswith('#'):
            h_match = RE_HEADER.match(line)
            if h_match:
                level = len(h_match.group(1))
                text = h_match.group(2)
                if level == 1: pdf.render_h1(text)
                elif level == 2: pdf.render_h2(text)
                else:
                    pdf.ln(5)
                    pdf.set_font('Helvetica', 'B', 12)
                    pdf.set_text_color(*COLOR_TEXT_MAIN)
                    pdf.cell(0, 6, safe_text(text), ln=True)
                i += 1
                continue

        # 5. Images
        img_match = RE_IMAGE.search(line)
        if img_match:
            img_path = img_match.group(2)
            # Normalize path logic here (omitted for brevity, assume relative assets/)
            full_path = os.path.join(os.path.dirname(md_file), img_path)
            if os.path.exists(full_path):
                pdf.ln(5)
                pdf.image(full_path, w=110, x=(pdf.w-110)/2)
                pdf.ln(5)
            i += 1
            continue

        # 6. Normal Text
        if line:
            pdf.set_fill_color(255, 255, 255)
            pdf.set_font('Helvetica', '', 11)
            pdf.set_text_color(*COLOR_TEXT_MAIN)
            # List items
            list_match = parse_list_item(line)
            if list_match:
                type_, content, extra = list_match
                pdf.set_x(15)
                prefix = "[x] " if extra else "[ ] " if type_ == 'cb' else ""
                bullet = chr(149) + " " if type_ == 'ul' and not type_ == 'cb' else ""
                if type_ == 'ol': bullet = f"{extra}. "

                pdf.multi_cell(0, 6, safe_text(bullet + prefix + make_links_clickable(content)), markdown=True)
            else:
                pdf.set_x(10)
                pdf.multi_cell(0, 6, safe_text(make_links_clickable(line)), markdown=True)

        i += 1

    pdf.output(pdf_file)
    print(f"PDF Generated: {pdf_file}")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python convert_to_pdf.py <input.md> [output.pdf]")
        sys.exit(1)

    md_in = sys.argv[1]
    pdf_out = sys.argv[2] if len(sys.argv) >= 3 else os.path.splitext(md_in)[0] + ".pdf"
    convert(md_in, pdf_out)