import sys import os import re from datetime import datetime import argparse # Dependency check try: from fpdf import FPDF from fpdf.enums import XPos, YPos from fpdf.fonts import FontFace except ImportError: import subprocess subprocess.check_call([sys.executable, "-m", "pip", "install", "fpdf2"]) from fpdf import FPDF from fpdf.enums import XPos, YPos from fpdf.fonts import FontFace # Assets BASE_DIR = os.path.dirname(os.path.abspath(__file__)) LOGO_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_main.png") # Colors (Premium Palette) COLOR_PRIMARY = (20, 120, 207) # #1478cf COLOR_TEXT_MAIN = (50, 60, 70) # #323C46 COLOR_BG_LIGHT = (250, 250, 252) COLOR_INFO_BG = (235, 245, 255) COLOR_INFO_BORDER = (20, 120, 207) COLOR_WARN_BG = (255, 248, 235) COLOR_WARN_BORDER = (255, 165, 0) # Regex Patterns (CommonMark-inspired for robustness) RE_HEADER = re.compile(r'^(#{1,6})\s+(.*)$') RE_UNORDERED_LIST = re.compile(r'^\s*[-+*]\s+(.+)$') RE_ORDERED_LIST = re.compile(r'^\s*(\d+)[.)]\s+(.+)$') RE_BLOCKQUOTE = re.compile(r'^>\s*(.*)$') RE_TABLE_SEP = re.compile(r'^[\|\s\-:]+$') RE_IMAGE = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)') RE_CODE_FENCE = re.compile(r'^```\s*(\w*)\s*$') RE_CHECKBOX = re.compile(r'^\s*[-*+]\s*\[([ xX])\]\s+(.+)$') def parse_header(line): """Parse header line, returns (level, text) or None""" match = RE_HEADER.match(line.strip()) if match: return len(match.group(1)), match.group(2).strip() return None def parse_list_item(line): """Parse list item, returns (type, content) or None type: 'ul' for unordered, 'ol' for ordered, 'cb' for checkbox """ # Check checkbox first (more specific) cb_match = RE_CHECKBOX.match(line) if cb_match: checked = cb_match.group(1).lower() == 'x' return ('cb', cb_match.group(2), checked) ul_match = RE_UNORDERED_LIST.match(line) if ul_match: return ('ul', ul_match.group(1), None) ol_match = RE_ORDERED_LIST.match(line) if ol_match: return ('ol', ol_match.group(2), ol_match.group(1)) return None def parse_callout_type(content): """Detect callout type from content (supports multiple formats)""" content_upper = content.upper() # GitHub style alerts [!NOTE], [!WARNING], etc. if '[!WARNING]' in content_upper or '[!CAUTION]' in content_upper: return 'WARN', re.sub(r'\[!(WARNING|CAUTION)\]', '', content, flags=re.IGNORECASE).strip() if '[!IMPORTANT]' in content_upper: return 'WARN', re.sub(r'\[!IMPORTANT\]', '', content, flags=re.IGNORECASE).strip() if '[!NOTE]' in content_upper or '[!TIP]' in content_upper or '[!INFO]' in content_upper: return 'INFO', re.sub(r'\[!(NOTE|TIP|INFO)\]', '', content, flags=re.IGNORECASE).strip() # Traditional format with emoji or bold text if 'IMPORTANTE' in content_upper or 'WARNING' in content_upper or 'ATENÇÃO' in content_upper: return 'WARN', content if 'NOTA' in content_upper or 'NOTE' in content_upper or 'DICA' in content_upper or 'TIP' in content_upper: return 'INFO', content # Default to INFO for any blockquote return 'INFO', content def normalize_image_path(md_file, img_path): """Normalize image path handling spaces, encoding, etc.""" try: from urllib.parse import unquote img_path = unquote(img_path.strip()) # Decode %20 etc. except: pass cwd = os.path.dirname(md_file) full_path = os.path.join(cwd, img_path) # Try normalized path first if os.path.exists(full_path): return full_path # Try absolute path if os.path.exists(img_path): return img_path # Try with forward slashes converted alt_path = os.path.join(cwd, img_path.replace('/', os.sep)) if os.path.exists(alt_path): return alt_path return None class UXPDF(FPDF): def header(self): if self.page_no() > 1: self.set_fill_color(255, 255, 255) self.rect(0, 0, self.w, 25, 'F') if os.path.exists(LOGO_PATH): self.image(LOGO_PATH, x=10, y=8, h=10) self.set_draw_color(*COLOR_PRIMARY) self.set_line_width(0.5) self.line(0, 25, self.w, 25) self.set_font('Helvetica', 'B', 10) self.set_text_color(*COLOR_PRIMARY) self.set_y(10) self.cell(0, 10, "MANUAL TÉCNICO", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='R') self.ln(20) def footer(self): # Ignora rodapé na capa (página 1) if self.page_no() == 1: return self.set_y(-20) self.set_font('Helvetica', 'I', 8) self.set_text_color(100, 100, 100) self.set_draw_color(220, 220, 220) self.line(10, self.h-20, self.w-10, self.h-20) self.set_y(-15) self.cell(0, 10, 'iT Guys Solutions - Confidencial', 0, align='L') self.set_x(0) self.cell(0, 10, f'Página {self.page_no()}/{{nb}}', 0, align='R') def clean_markdown(text): text = text.replace('**', '').replace('`', '') return text.encode('latin-1', 'replace').decode('latin-1') def safe_text(text): text = text.replace('ℹ️', '').replace('ℹ', '').replace('⚠️', '').replace('🚀', '') text = text.replace('“', '"').replace('”', '"').replace('’', "'") return text.encode('latin-1', 'replace').decode('latin-1') def make_links_clickable(text): text = re.sub(r'`(https?://[^`]+)`', r'[\1](\1)', text) return text def process_variables(text): now = datetime.now() replacements = { '{{DATA_ATUAL}}': now.strftime("%d/%m/%Y"), '{{ANO}}': str(now.year) } for k, v in replacements.items(): if k in text: text = text.replace(k, v) return text def render_callout(pdf, text, type='INFO'): pdf.ln(5) saved_x = pdf.get_x() saved_y = pdf.get_y() if type == 'WARN' or '[IMPORTANTE]' in text: bg = COLOR_WARN_BG border = COLOR_WARN_BORDER label = "IMPORTANTE" else: bg = COLOR_INFO_BG border = COLOR_INFO_BORDER label = "NOTA" pdf.set_fill_color(*bg) pdf.set_draw_color(*bg) pdf.set_line_width(1.5) pdf.set_draw_color(*border) pdf.set_x(pdf.l_margin + 2) pdf.set_font('Helvetica', 'B', 9) pdf.set_text_color(*border) pdf.cell(0, 5, label, new_x=XPos.LMARGIN, new_y=YPos.NEXT) pdf.set_font('Helvetica', '', 10) pdf.set_text_color(*COLOR_TEXT_MAIN) pdf.set_x(pdf.l_margin + 2) text = make_links_clickable(text) pdf.multi_cell(0, 6, safe_text(text), fill=True, markdown=True) end_y = pdf.get_y() pdf.set_draw_color(*border) pdf.line(pdf.l_margin, saved_y, pdf.l_margin, end_y) # Reset colors explicitly to avoid bleeding pdf.set_fill_color(255, 255, 255) pdf.set_text_color(*COLOR_TEXT_MAIN) pdf.ln(5) def convert(md_file, pdf_file): pdf = UXPDF() pdf.set_auto_page_break(auto=True, margin=20) pdf.set_title("Manual Técnico iT Guys") # --- Cover Page --- pdf.add_page() pdf.set_fill_color(*COLOR_PRIMARY) pdf.rect(0, 0, 15, 297, 'F') if os.path.exists(LOGO_PATH): pdf.image(LOGO_PATH, x=40, y=50, w=100) # Extract Title from MD (Assume First H1) # Generic placeholder if not found doc_title = "Documentação Técnica" doc_subtitle = "Guia Oficial iT Guys" # Pre-read to find title for Cover with open(md_file, 'r', encoding='utf-8') as f: pre_lines = f.readlines() for line in pre_lines: if line.startswith('# '): doc_title = line[2:].strip().replace('MANUAL TÉCNICO - ', '') # Cleanup break pdf.set_y(140) pdf.set_x(30) pdf.set_font('Helvetica', 'B', 32) pdf.set_text_color(*COLOR_PRIMARY) pdf.multi_cell(0, 12, safe_text(doc_title), align='L') pdf.set_y(180) pdf.set_x(30) pdf.set_font('Helvetica', '', 16) pdf.set_text_color(*COLOR_TEXT_MAIN) pdf.multi_cell(0, 8, safe_text(doc_subtitle), align='L') pdf.set_y(-30) pdf.set_x(30) pdf.set_font('Helvetica', 'B', 10) pdf.set_text_color(*COLOR_PRIMARY) pdf.cell(0, 10, "iT GUYS SOLUTIONS") # Content pdf.add_page() with open(md_file, 'r', encoding='utf-8') as f: lines = f.readlines() in_code_block = False table_buffer = [] first_h1_skipped = False for i, line in enumerate(lines): line = line.strip() line = process_variables(line) # Robust Reset at start of line processing pdf.set_fill_color(255, 255, 255) pdf.set_text_color(*COLOR_TEXT_MAIN) pdf.set_font('Helvetica', '', 11) # --- Tables --- if line.startswith('|'): table_buffer.append(line) continue if table_buffer: headers = [c.strip() for c in table_buffer[0].split('|') if c.strip()] data = [] for r_line in table_buffer[1:]: if RE_TABLE_SEP.match(r_line): continue # Skip separator line cols = [c.strip() for c in r_line.split('|') if c.strip()] if cols: data.append(cols) table_buffer = [] if headers: pdf.ln(5) pdf.set_draw_color(*COLOR_PRIMARY) pdf.set_line_width(0.3) is_tech_data = "Campo" in headers[0] or "Valor" in headers[1] col_widths = (30, 70) if is_tech_data else None with pdf.table( text_align="LEFT", col_widths=col_widths, line_height=7 ) as table: row = table.row() for h in headers: h_clean = clean_markdown(h) row.cell(h_clean, style=FontFace(emphasis="BOLD", color=(255,255,255), fill_color=COLOR_PRIMARY)) for d_row in data: row = table.row() for idx, d in enumerate(d_row): d_clean = clean_markdown(d) emphasis = None if is_tech_data and idx == 0: emphasis = "BOLD" row.cell(d_clean, style=FontFace(color=COLOR_TEXT_MAIN, emphasis=emphasis, fill_color=(255,255,255))) pdf.ln(5) if not line: if not in_code_block: pdf.ln(3) continue # Code fences (robust: accepts spaces after ```) if RE_CODE_FENCE.match(line) or line.strip().startswith('```'): in_code_block = not in_code_block continue if in_code_block: pdf.set_font('Courier', '', 9.5) pdf.set_text_color(50, 50, 50) pdf.set_fill_color(245, 245, 245) pdf.set_x(pdf.l_margin + 5) pdf.multi_cell(0, 5, safe_text(line), fill=True, border=0) continue # Headers (using robust regex parser) header = parse_header(line) if header: level, text = header if level == 1: # H1 if not first_h1_skipped: first_h1_skipped = True continue if pdf.page_no() > 2 or pdf.get_y() > 60: pdf.add_page() pdf.set_font('Helvetica', 'B', 20) pdf.set_text_color(*COLOR_PRIMARY) pdf.multi_cell(0, 10, safe_text(text), fill=False) pdf.ln(5) y = pdf.get_y() pdf.set_draw_color(*COLOR_PRIMARY) pdf.line(pdf.l_margin, y, 210-pdf.r_margin, y) pdf.ln(10) elif level == 2: # H2 pdf.ln(8) pdf.set_font('Helvetica', 'B', 14) pdf.set_text_color(*COLOR_PRIMARY) pdf.multi_cell(0, 8, safe_text(text), fill=False) pdf.ln(2) elif level == 3: # H3 pdf.ln(4) pdf.set_font('Helvetica', 'B', 12) pdf.set_text_color(*COLOR_TEXT_MAIN) pdf.multi_cell(0, 6, safe_text(text), fill=False) elif level >= 4: # H4+ pdf.ln(3) pdf.set_font('Helvetica', 'B', 11) pdf.set_text_color(*COLOR_TEXT_MAIN) pdf.multi_cell(0, 5, safe_text(text), fill=False) continue # Images (robust path handling) img_match = RE_IMAGE.search(line) if img_match or (line.startswith('![') and '](' in line): if img_match: img_path = img_match.group(2) else: match = re.search(r'\(([^)]+)\)', line) img_path = match.group(1) if match else None if img_path: full_path = normalize_image_path(md_file, img_path) if full_path: pdf.ln(5) try: x = (pdf.w - 110)/2 pdf.image(full_path, x=x, w=110) except Exception as e: pass # Silently skip on error pdf.ln(5) continue # Blockquotes/Callouts (robust detection) bq_match = RE_BLOCKQUOTE.match(line) if bq_match or line.startswith('>'): content = bq_match.group(1) if bq_match else line[1:].strip() c_type, clean_content = parse_callout_type(content) render_callout(pdf, clean_content, c_type) continue # Lists and regular text (robust detection) pdf.set_fill_color(255, 255, 255) pdf.set_font('Helvetica', '', 11) pdf.set_text_color(*COLOR_TEXT_MAIN) list_item = parse_list_item(line) line_processed = make_links_clickable(line) if list_item: item_type, content, extra = list_item pdf.set_x(pdf.l_margin + 6) if item_type == 'cb': # Checkbox checkbox = '[x]' if extra else '[ ]' pdf.multi_cell(0, 7, safe_text(f"{checkbox} {content}"), markdown=True, fill=False) else: pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False) else: pdf.set_x(pdf.l_margin) pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False) pdf.output(pdf_file) print(f"PDF Generated: {pdf_file}") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python convert_to_pdf.py [output.pdf]") sys.exit(1) md_in = sys.argv[1] if len(sys.argv) >= 3: pdf_out = sys.argv[2] else: pdf_out = os.path.splitext(md_in)[0] + ".pdf" convert(md_in, pdf_out)