manuais-e-documentacao-itguys/.gemini/convert_to_pdf.py

445 lines
15 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
import os
import re
from datetime import datetime
import argparse
# Dependency check
try:
from fpdf import FPDF
from fpdf.enums import XPos, YPos
from fpdf.fonts import FontFace
except ImportError:
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "fpdf2"])
from fpdf import FPDF
from fpdf.enums import XPos, YPos
from fpdf.fonts import FontFace
# Assets
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
LOGO_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_main.png")
# Colors (Premium Palette)
COLOR_PRIMARY = (20, 120, 207) # #1478cf
COLOR_TEXT_MAIN = (50, 60, 70) # #323C46
COLOR_BG_LIGHT = (250, 250, 252)
COLOR_INFO_BG = (235, 245, 255)
COLOR_INFO_BORDER = (20, 120, 207)
COLOR_WARN_BG = (255, 248, 235)
COLOR_WARN_BORDER = (255, 165, 0)
# Regex Patterns (CommonMark-inspired for robustness)
RE_HEADER = re.compile(r'^(#{1,6})\s+(.*)$')
RE_UNORDERED_LIST = re.compile(r'^\s*[-+*]\s+(.+)$')
RE_ORDERED_LIST = re.compile(r'^\s*(\d+)[.)]\s+(.+)$')
RE_BLOCKQUOTE = re.compile(r'^>\s*(.*)$')
RE_TABLE_SEP = re.compile(r'^[\|\s\-:]+$')
RE_IMAGE = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)')
RE_CODE_FENCE = re.compile(r'^```\s*(\w*)\s*$')
RE_CHECKBOX = re.compile(r'^\s*[-*+]\s*\[([ xX])\]\s+(.+)$')
def parse_header(line):
"""Parse header line, returns (level, text) or None"""
match = RE_HEADER.match(line.strip())
if match:
return len(match.group(1)), match.group(2).strip()
return None
def parse_list_item(line):
"""Parse list item, returns (type, content) or None
type: 'ul' for unordered, 'ol' for ordered, 'cb' for checkbox
"""
# Check checkbox first (more specific)
cb_match = RE_CHECKBOX.match(line)
if cb_match:
checked = cb_match.group(1).lower() == 'x'
return ('cb', cb_match.group(2), checked)
ul_match = RE_UNORDERED_LIST.match(line)
if ul_match:
return ('ul', ul_match.group(1), None)
ol_match = RE_ORDERED_LIST.match(line)
if ol_match:
return ('ol', ol_match.group(2), ol_match.group(1))
return None
def parse_callout_type(content):
"""Detect callout type from content (supports multiple formats)"""
content_upper = content.upper()
# GitHub style alerts [!NOTE], [!WARNING], etc.
if '[!WARNING]' in content_upper or '[!CAUTION]' in content_upper:
return 'WARN', re.sub(r'\[!(WARNING|CAUTION)\]', '', content, flags=re.IGNORECASE).strip()
if '[!IMPORTANT]' in content_upper:
return 'WARN', re.sub(r'\[!IMPORTANT\]', '', content, flags=re.IGNORECASE).strip()
if '[!NOTE]' in content_upper or '[!TIP]' in content_upper or '[!INFO]' in content_upper:
return 'INFO', re.sub(r'\[!(NOTE|TIP|INFO)\]', '', content, flags=re.IGNORECASE).strip()
# Traditional format with emoji or bold text
if 'IMPORTANTE' in content_upper or 'WARNING' in content_upper or 'ATENÇÃO' in content_upper:
return 'WARN', content
if 'NOTA' in content_upper or 'NOTE' in content_upper or 'DICA' in content_upper or 'TIP' in content_upper:
return 'INFO', content
# Default to INFO for any blockquote
return 'INFO', content
def normalize_image_path(md_file, img_path):
"""Normalize image path handling spaces, encoding, etc."""
try:
from urllib.parse import unquote
img_path = unquote(img_path.strip()) # Decode %20 etc.
except:
pass
cwd = os.path.dirname(md_file)
full_path = os.path.join(cwd, img_path)
# Try normalized path first
if os.path.exists(full_path):
return full_path
# Try absolute path
if os.path.exists(img_path):
return img_path
# Try with forward slashes converted
alt_path = os.path.join(cwd, img_path.replace('/', os.sep))
if os.path.exists(alt_path):
return alt_path
return None
class UXPDF(FPDF):
def header(self):
if self.page_no() > 1:
self.set_fill_color(255, 255, 255)
self.rect(0, 0, self.w, 25, 'F')
if os.path.exists(LOGO_PATH):
self.image(LOGO_PATH, x=10, y=8, h=10)
self.set_draw_color(*COLOR_PRIMARY)
self.set_line_width(0.5)
self.line(0, 25, self.w, 25)
self.set_font('Helvetica', 'B', 10)
self.set_text_color(*COLOR_PRIMARY)
self.set_y(10)
self.cell(0, 10, "MANUAL TÉCNICO", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='R')
self.ln(20)
def footer(self):
# Ignora rodapé na capa (página 1)
if self.page_no() == 1:
return
self.set_y(-20)
self.set_font('Helvetica', 'I', 8)
self.set_text_color(100, 100, 100)
self.set_draw_color(220, 220, 220)
self.line(10, self.h-20, self.w-10, self.h-20)
self.set_y(-15)
self.cell(0, 10, 'iT Guys Solutions - Confidencial', 0, align='L')
self.set_x(0)
self.cell(0, 10, f'Página {self.page_no()}/{{nb}}', 0, align='R')
def clean_markdown(text):
text = text.replace('**', '').replace('`', '')
return text.encode('latin-1', 'replace').decode('latin-1')
def safe_text(text):
text = text.replace('', '').replace('', '').replace('⚠️', '').replace('🚀', '')
text = text.replace('', '"').replace('', '"').replace('', "'")
return text.encode('latin-1', 'replace').decode('latin-1')
def make_links_clickable(text):
text = re.sub(r'`(https?://[^`]+)`', r'[\1](\1)', text)
return text
def process_variables(text):
now = datetime.now()
replacements = {
'{{DATA_ATUAL}}': now.strftime("%d/%m/%Y"),
'{{ANO}}': str(now.year)
}
for k, v in replacements.items():
if k in text:
text = text.replace(k, v)
return text
def render_callout(pdf, text, type='INFO'):
pdf.ln(5)
saved_x = pdf.get_x()
saved_y = pdf.get_y()
if type == 'WARN' or '[IMPORTANTE]' in text:
bg = COLOR_WARN_BG
border = COLOR_WARN_BORDER
label = "IMPORTANTE"
else:
bg = COLOR_INFO_BG
border = COLOR_INFO_BORDER
label = "NOTA"
pdf.set_fill_color(*bg)
pdf.set_draw_color(*bg)
pdf.set_line_width(1.5)
pdf.set_draw_color(*border)
pdf.set_x(pdf.l_margin + 2)
pdf.set_font('Helvetica', 'B', 9)
pdf.set_text_color(*border)
pdf.cell(0, 5, label, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
pdf.set_font('Helvetica', '', 10)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.set_x(pdf.l_margin + 2)
text = make_links_clickable(text)
pdf.multi_cell(0, 6, safe_text(text), fill=True, markdown=True)
end_y = pdf.get_y()
pdf.set_draw_color(*border)
pdf.line(pdf.l_margin, saved_y, pdf.l_margin, end_y)
# Reset colors explicitly to avoid bleeding
pdf.set_fill_color(255, 255, 255)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.ln(5)
def convert(md_file, pdf_file):
pdf = UXPDF()
pdf.set_auto_page_break(auto=True, margin=20)
pdf.set_title("Manual Técnico iT Guys")
# --- Cover Page ---
pdf.add_page()
pdf.set_fill_color(*COLOR_PRIMARY)
pdf.rect(0, 0, 15, 297, 'F')
if os.path.exists(LOGO_PATH):
pdf.image(LOGO_PATH, x=40, y=50, w=100)
# Extract Title from MD (Assume First H1)
# Generic placeholder if not found
doc_title = "Documentação Técnica"
doc_subtitle = "Guia Oficial iT Guys"
# Pre-read to find title for Cover
with open(md_file, 'r', encoding='utf-8') as f:
pre_lines = f.readlines()
for line in pre_lines:
if line.startswith('# '):
doc_title = line[2:].strip().replace('MANUAL TÉCNICO - ', '') # Cleanup
break
pdf.set_y(140)
pdf.set_x(30)
pdf.set_font('Helvetica', 'B', 32)
pdf.set_text_color(*COLOR_PRIMARY)
pdf.multi_cell(0, 12, safe_text(doc_title), align='L')
pdf.set_y(180)
pdf.set_x(30)
pdf.set_font('Helvetica', '', 16)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.multi_cell(0, 8, safe_text(doc_subtitle), align='L')
pdf.set_y(-30)
pdf.set_x(30)
pdf.set_font('Helvetica', 'B', 10)
pdf.set_text_color(*COLOR_PRIMARY)
pdf.cell(0, 10, "iT GUYS SOLUTIONS")
# Content
pdf.add_page()
with open(md_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
in_code_block = False
table_buffer = []
first_h1_skipped = False
for i, line in enumerate(lines):
line = line.strip()
line = process_variables(line)
# Robust Reset at start of line processing
pdf.set_fill_color(255, 255, 255)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.set_font('Helvetica', '', 11)
# --- Tables ---
if line.startswith('|'):
table_buffer.append(line)
continue
if table_buffer:
headers = [c.strip() for c in table_buffer[0].split('|') if c.strip()]
data = []
for r_line in table_buffer[1:]:
if RE_TABLE_SEP.match(r_line): continue # Skip separator line
cols = [c.strip() for c in r_line.split('|') if c.strip()]
if cols: data.append(cols)
table_buffer = []
if headers:
pdf.ln(5)
pdf.set_draw_color(*COLOR_PRIMARY)
pdf.set_line_width(0.3)
is_tech_data = "Campo" in headers[0] or "Valor" in headers[1]
col_widths = (30, 70) if is_tech_data else None
with pdf.table(
text_align="LEFT",
col_widths=col_widths,
line_height=7
) as table:
row = table.row()
for h in headers:
h_clean = clean_markdown(h)
row.cell(h_clean, style=FontFace(emphasis="BOLD", color=(255,255,255), fill_color=COLOR_PRIMARY))
for d_row in data:
row = table.row()
for idx, d in enumerate(d_row):
d_clean = clean_markdown(d)
emphasis = None
if is_tech_data and idx == 0:
emphasis = "BOLD"
row.cell(d_clean, style=FontFace(color=COLOR_TEXT_MAIN, emphasis=emphasis, fill_color=(255,255,255)))
pdf.ln(5)
if not line:
if not in_code_block: pdf.ln(3)
continue
# Code fences (robust: accepts spaces after ```)
if RE_CODE_FENCE.match(line) or line.strip().startswith('```'):
in_code_block = not in_code_block
continue
if in_code_block:
pdf.set_font('Courier', '', 9.5)
pdf.set_text_color(50, 50, 50)
pdf.set_fill_color(245, 245, 245)
pdf.set_x(pdf.l_margin + 5)
pdf.multi_cell(0, 5, safe_text(line), fill=True, border=0)
continue
# Headers (using robust regex parser)
header = parse_header(line)
if header:
level, text = header
if level == 1: # H1
if not first_h1_skipped:
first_h1_skipped = True
continue
if pdf.page_no() > 2 or pdf.get_y() > 60:
pdf.add_page()
pdf.set_font('Helvetica', 'B', 20)
pdf.set_text_color(*COLOR_PRIMARY)
pdf.multi_cell(0, 10, safe_text(text), fill=False)
pdf.ln(5)
y = pdf.get_y()
pdf.set_draw_color(*COLOR_PRIMARY)
pdf.line(pdf.l_margin, y, 210-pdf.r_margin, y)
pdf.ln(10)
elif level == 2: # H2
pdf.ln(8)
pdf.set_font('Helvetica', 'B', 14)
pdf.set_text_color(*COLOR_PRIMARY)
pdf.multi_cell(0, 8, safe_text(text), fill=False)
pdf.ln(2)
elif level == 3: # H3
pdf.ln(4)
pdf.set_font('Helvetica', 'B', 12)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.multi_cell(0, 6, safe_text(text), fill=False)
elif level >= 4: # H4+
pdf.ln(3)
pdf.set_font('Helvetica', 'B', 11)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.multi_cell(0, 5, safe_text(text), fill=False)
continue
# Images (robust path handling)
img_match = RE_IMAGE.search(line)
if img_match or (line.startswith('![') and '](' in line):
if img_match:
img_path = img_match.group(2)
else:
match = re.search(r'\(([^)]+)\)', line)
img_path = match.group(1) if match else None
if img_path:
full_path = normalize_image_path(md_file, img_path)
if full_path:
pdf.ln(5)
try:
x = (pdf.w - 110)/2
pdf.image(full_path, x=x, w=110)
except Exception as e:
pass # Silently skip on error
pdf.ln(5)
continue
# Blockquotes/Callouts (robust detection)
bq_match = RE_BLOCKQUOTE.match(line)
if bq_match or line.startswith('>'):
content = bq_match.group(1) if bq_match else line[1:].strip()
c_type, clean_content = parse_callout_type(content)
render_callout(pdf, clean_content, c_type)
continue
# Lists and regular text (robust detection)
pdf.set_fill_color(255, 255, 255)
pdf.set_font('Helvetica', '', 11)
pdf.set_text_color(*COLOR_TEXT_MAIN)
list_item = parse_list_item(line)
line_processed = make_links_clickable(line)
if list_item:
item_type, content, extra = list_item
pdf.set_x(pdf.l_margin + 6)
if item_type == 'cb':
# Checkbox
checkbox = '[x]' if extra else '[ ]'
pdf.multi_cell(0, 7, safe_text(f"{checkbox} {content}"), markdown=True, fill=False)
else:
pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False)
else:
pdf.set_x(pdf.l_margin)
pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False)
pdf.output(pdf_file)
print(f"PDF Generated: {pdf_file}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python convert_to_pdf.py <input.md> [output.pdf]")
sys.exit(1)
md_in = sys.argv[1]
if len(sys.argv) >= 3:
pdf_out = sys.argv[2]
else:
pdf_out = os.path.splitext(md_in)[0] + ".pdf"
convert(md_in, pdf_out)