445 lines
15 KiB
Python
445 lines
15 KiB
Python
import sys
|
||
import os
|
||
import re
|
||
from datetime import datetime
|
||
import argparse
|
||
|
||
# Dependency check
|
||
try:
|
||
from fpdf import FPDF
|
||
from fpdf.enums import XPos, YPos
|
||
from fpdf.fonts import FontFace
|
||
except ImportError:
|
||
import subprocess
|
||
subprocess.check_call([sys.executable, "-m", "pip", "install", "fpdf2"])
|
||
from fpdf import FPDF
|
||
from fpdf.enums import XPos, YPos
|
||
from fpdf.fonts import FontFace
|
||
|
||
# Assets
|
||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||
LOGO_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_main.png")
|
||
|
||
# Colors (Premium Palette)
|
||
COLOR_PRIMARY = (20, 120, 207) # #1478cf
|
||
COLOR_TEXT_MAIN = (50, 60, 70) # #323C46
|
||
COLOR_BG_LIGHT = (250, 250, 252)
|
||
COLOR_INFO_BG = (235, 245, 255)
|
||
COLOR_INFO_BORDER = (20, 120, 207)
|
||
COLOR_WARN_BG = (255, 248, 235)
|
||
COLOR_WARN_BORDER = (255, 165, 0)
|
||
|
||
# Regex Patterns (CommonMark-inspired for robustness)
|
||
RE_HEADER = re.compile(r'^(#{1,6})\s+(.*)$')
|
||
RE_UNORDERED_LIST = re.compile(r'^\s*[-+*]\s+(.+)$')
|
||
RE_ORDERED_LIST = re.compile(r'^\s*(\d+)[.)]\s+(.+)$')
|
||
RE_BLOCKQUOTE = re.compile(r'^>\s*(.*)$')
|
||
RE_TABLE_SEP = re.compile(r'^[\|\s\-:]+$')
|
||
RE_IMAGE = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)')
|
||
RE_CODE_FENCE = re.compile(r'^```\s*(\w*)\s*$')
|
||
RE_CHECKBOX = re.compile(r'^\s*[-*+]\s*\[([ xX])\]\s+(.+)$')
|
||
|
||
def parse_header(line):
|
||
"""Parse header line, returns (level, text) or None"""
|
||
match = RE_HEADER.match(line.strip())
|
||
if match:
|
||
return len(match.group(1)), match.group(2).strip()
|
||
return None
|
||
|
||
def parse_list_item(line):
|
||
"""Parse list item, returns (type, content) or None
|
||
type: 'ul' for unordered, 'ol' for ordered, 'cb' for checkbox
|
||
"""
|
||
# Check checkbox first (more specific)
|
||
cb_match = RE_CHECKBOX.match(line)
|
||
if cb_match:
|
||
checked = cb_match.group(1).lower() == 'x'
|
||
return ('cb', cb_match.group(2), checked)
|
||
|
||
ul_match = RE_UNORDERED_LIST.match(line)
|
||
if ul_match:
|
||
return ('ul', ul_match.group(1), None)
|
||
|
||
ol_match = RE_ORDERED_LIST.match(line)
|
||
if ol_match:
|
||
return ('ol', ol_match.group(2), ol_match.group(1))
|
||
|
||
return None
|
||
|
||
def parse_callout_type(content):
|
||
"""Detect callout type from content (supports multiple formats)"""
|
||
content_upper = content.upper()
|
||
|
||
# GitHub style alerts [!NOTE], [!WARNING], etc.
|
||
if '[!WARNING]' in content_upper or '[!CAUTION]' in content_upper:
|
||
return 'WARN', re.sub(r'\[!(WARNING|CAUTION)\]', '', content, flags=re.IGNORECASE).strip()
|
||
if '[!IMPORTANT]' in content_upper:
|
||
return 'WARN', re.sub(r'\[!IMPORTANT\]', '', content, flags=re.IGNORECASE).strip()
|
||
if '[!NOTE]' in content_upper or '[!TIP]' in content_upper or '[!INFO]' in content_upper:
|
||
return 'INFO', re.sub(r'\[!(NOTE|TIP|INFO)\]', '', content, flags=re.IGNORECASE).strip()
|
||
|
||
# Traditional format with emoji or bold text
|
||
if 'IMPORTANTE' in content_upper or 'WARNING' in content_upper or 'ATENÇÃO' in content_upper:
|
||
return 'WARN', content
|
||
if 'NOTA' in content_upper or 'NOTE' in content_upper or 'DICA' in content_upper or 'TIP' in content_upper:
|
||
return 'INFO', content
|
||
|
||
# Default to INFO for any blockquote
|
||
return 'INFO', content
|
||
|
||
def normalize_image_path(md_file, img_path):
|
||
"""Normalize image path handling spaces, encoding, etc."""
|
||
try:
|
||
from urllib.parse import unquote
|
||
img_path = unquote(img_path.strip()) # Decode %20 etc.
|
||
except:
|
||
pass
|
||
|
||
cwd = os.path.dirname(md_file)
|
||
full_path = os.path.join(cwd, img_path)
|
||
|
||
# Try normalized path first
|
||
if os.path.exists(full_path):
|
||
return full_path
|
||
|
||
# Try absolute path
|
||
if os.path.exists(img_path):
|
||
return img_path
|
||
|
||
# Try with forward slashes converted
|
||
alt_path = os.path.join(cwd, img_path.replace('/', os.sep))
|
||
if os.path.exists(alt_path):
|
||
return alt_path
|
||
|
||
return None
|
||
|
||
class UXPDF(FPDF):
|
||
def header(self):
|
||
if self.page_no() > 1:
|
||
self.set_fill_color(255, 255, 255)
|
||
self.rect(0, 0, self.w, 25, 'F')
|
||
|
||
if os.path.exists(LOGO_PATH):
|
||
self.image(LOGO_PATH, x=10, y=8, h=10)
|
||
|
||
self.set_draw_color(*COLOR_PRIMARY)
|
||
self.set_line_width(0.5)
|
||
self.line(0, 25, self.w, 25)
|
||
|
||
self.set_font('Helvetica', 'B', 10)
|
||
self.set_text_color(*COLOR_PRIMARY)
|
||
self.set_y(10)
|
||
self.cell(0, 10, "MANUAL TÉCNICO", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='R')
|
||
self.ln(20)
|
||
|
||
def footer(self):
|
||
# Ignora rodapé na capa (página 1)
|
||
if self.page_no() == 1:
|
||
return
|
||
|
||
self.set_y(-20)
|
||
self.set_font('Helvetica', 'I', 8)
|
||
self.set_text_color(100, 100, 100)
|
||
self.set_draw_color(220, 220, 220)
|
||
self.line(10, self.h-20, self.w-10, self.h-20)
|
||
|
||
self.set_y(-15)
|
||
self.cell(0, 10, 'iT Guys Solutions - Confidencial', 0, align='L')
|
||
self.set_x(0)
|
||
self.cell(0, 10, f'Página {self.page_no()}/{{nb}}', 0, align='R')
|
||
|
||
def clean_markdown(text):
|
||
text = text.replace('**', '').replace('`', '')
|
||
return text.encode('latin-1', 'replace').decode('latin-1')
|
||
|
||
def safe_text(text):
|
||
text = text.replace('ℹ️', '').replace('ℹ', '').replace('⚠️', '').replace('🚀', '')
|
||
text = text.replace('“', '"').replace('”', '"').replace('’', "'")
|
||
return text.encode('latin-1', 'replace').decode('latin-1')
|
||
|
||
def make_links_clickable(text):
|
||
text = re.sub(r'`(https?://[^`]+)`', r'[\1](\1)', text)
|
||
return text
|
||
|
||
def process_variables(text):
|
||
now = datetime.now()
|
||
replacements = {
|
||
'{{DATA_ATUAL}}': now.strftime("%d/%m/%Y"),
|
||
'{{ANO}}': str(now.year)
|
||
}
|
||
for k, v in replacements.items():
|
||
if k in text:
|
||
text = text.replace(k, v)
|
||
return text
|
||
|
||
def render_callout(pdf, text, type='INFO'):
|
||
pdf.ln(5)
|
||
saved_x = pdf.get_x()
|
||
saved_y = pdf.get_y()
|
||
|
||
if type == 'WARN' or '[IMPORTANTE]' in text:
|
||
bg = COLOR_WARN_BG
|
||
border = COLOR_WARN_BORDER
|
||
label = "IMPORTANTE"
|
||
else:
|
||
bg = COLOR_INFO_BG
|
||
border = COLOR_INFO_BORDER
|
||
label = "NOTA"
|
||
|
||
pdf.set_fill_color(*bg)
|
||
pdf.set_draw_color(*bg)
|
||
|
||
pdf.set_line_width(1.5)
|
||
pdf.set_draw_color(*border)
|
||
pdf.set_x(pdf.l_margin + 2)
|
||
|
||
pdf.set_font('Helvetica', 'B', 9)
|
||
pdf.set_text_color(*border)
|
||
pdf.cell(0, 5, label, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
|
||
|
||
pdf.set_font('Helvetica', '', 10)
|
||
pdf.set_text_color(*COLOR_TEXT_MAIN)
|
||
pdf.set_x(pdf.l_margin + 2)
|
||
text = make_links_clickable(text)
|
||
pdf.multi_cell(0, 6, safe_text(text), fill=True, markdown=True)
|
||
|
||
end_y = pdf.get_y()
|
||
pdf.set_draw_color(*border)
|
||
pdf.line(pdf.l_margin, saved_y, pdf.l_margin, end_y)
|
||
|
||
# Reset colors explicitly to avoid bleeding
|
||
pdf.set_fill_color(255, 255, 255)
|
||
pdf.set_text_color(*COLOR_TEXT_MAIN)
|
||
pdf.ln(5)
|
||
|
||
def convert(md_file, pdf_file):
|
||
pdf = UXPDF()
|
||
pdf.set_auto_page_break(auto=True, margin=20)
|
||
pdf.set_title("Manual Técnico iT Guys")
|
||
|
||
# --- Cover Page ---
|
||
pdf.add_page()
|
||
pdf.set_fill_color(*COLOR_PRIMARY)
|
||
pdf.rect(0, 0, 15, 297, 'F')
|
||
|
||
if os.path.exists(LOGO_PATH):
|
||
pdf.image(LOGO_PATH, x=40, y=50, w=100)
|
||
|
||
# Extract Title from MD (Assume First H1)
|
||
# Generic placeholder if not found
|
||
doc_title = "Documentação Técnica"
|
||
doc_subtitle = "Guia Oficial iT Guys"
|
||
|
||
# Pre-read to find title for Cover
|
||
with open(md_file, 'r', encoding='utf-8') as f:
|
||
pre_lines = f.readlines()
|
||
for line in pre_lines:
|
||
if line.startswith('# '):
|
||
doc_title = line[2:].strip().replace('MANUAL TÉCNICO - ', '') # Cleanup
|
||
break
|
||
|
||
pdf.set_y(140)
|
||
pdf.set_x(30)
|
||
pdf.set_font('Helvetica', 'B', 32)
|
||
pdf.set_text_color(*COLOR_PRIMARY)
|
||
pdf.multi_cell(0, 12, safe_text(doc_title), align='L')
|
||
|
||
pdf.set_y(180)
|
||
pdf.set_x(30)
|
||
pdf.set_font('Helvetica', '', 16)
|
||
pdf.set_text_color(*COLOR_TEXT_MAIN)
|
||
pdf.multi_cell(0, 8, safe_text(doc_subtitle), align='L')
|
||
|
||
pdf.set_y(-30)
|
||
pdf.set_x(30)
|
||
pdf.set_font('Helvetica', 'B', 10)
|
||
pdf.set_text_color(*COLOR_PRIMARY)
|
||
pdf.cell(0, 10, "iT GUYS SOLUTIONS")
|
||
|
||
# Content
|
||
pdf.add_page()
|
||
|
||
with open(md_file, 'r', encoding='utf-8') as f:
|
||
lines = f.readlines()
|
||
|
||
in_code_block = False
|
||
table_buffer = []
|
||
first_h1_skipped = False
|
||
|
||
for i, line in enumerate(lines):
|
||
line = line.strip()
|
||
line = process_variables(line)
|
||
|
||
# Robust Reset at start of line processing
|
||
pdf.set_fill_color(255, 255, 255)
|
||
pdf.set_text_color(*COLOR_TEXT_MAIN)
|
||
pdf.set_font('Helvetica', '', 11)
|
||
|
||
# --- Tables ---
|
||
if line.startswith('|'):
|
||
table_buffer.append(line)
|
||
continue
|
||
|
||
if table_buffer:
|
||
headers = [c.strip() for c in table_buffer[0].split('|') if c.strip()]
|
||
data = []
|
||
for r_line in table_buffer[1:]:
|
||
if RE_TABLE_SEP.match(r_line): continue # Skip separator line
|
||
cols = [c.strip() for c in r_line.split('|') if c.strip()]
|
||
if cols: data.append(cols)
|
||
|
||
table_buffer = []
|
||
if headers:
|
||
pdf.ln(5)
|
||
pdf.set_draw_color(*COLOR_PRIMARY)
|
||
pdf.set_line_width(0.3)
|
||
|
||
is_tech_data = ("Campo" in headers[0] or "Valor" in headers[1]) and len(headers) == 2
|
||
col_widths = (30, 70) if is_tech_data else None
|
||
|
||
with pdf.table(
|
||
text_align="LEFT",
|
||
col_widths=col_widths,
|
||
line_height=7
|
||
) as table:
|
||
row = table.row()
|
||
for h in headers:
|
||
h_clean = clean_markdown(h)
|
||
row.cell(h_clean, style=FontFace(emphasis="BOLD", color=(255,255,255), fill_color=COLOR_PRIMARY))
|
||
|
||
for d_row in data:
|
||
row = table.row()
|
||
for idx, d in enumerate(d_row):
|
||
d_clean = clean_markdown(d)
|
||
emphasis = None
|
||
if is_tech_data and idx == 0:
|
||
emphasis = "BOLD"
|
||
row.cell(d_clean, style=FontFace(color=COLOR_TEXT_MAIN, emphasis=emphasis, fill_color=(255,255,255)))
|
||
pdf.ln(5)
|
||
|
||
if not line:
|
||
if not in_code_block: pdf.ln(3)
|
||
continue
|
||
|
||
# Code fences (robust: accepts spaces after ```)
|
||
if RE_CODE_FENCE.match(line) or line.strip().startswith('```'):
|
||
in_code_block = not in_code_block
|
||
continue
|
||
|
||
if in_code_block:
|
||
pdf.set_font('Courier', '', 9.5)
|
||
pdf.set_text_color(50, 50, 50)
|
||
pdf.set_fill_color(245, 245, 245)
|
||
pdf.set_x(pdf.l_margin + 5)
|
||
pdf.multi_cell(0, 5, safe_text(line), fill=True, border=0)
|
||
continue
|
||
|
||
# Headers (using robust regex parser)
|
||
header = parse_header(line)
|
||
if header:
|
||
level, text = header
|
||
|
||
if level == 1: # H1
|
||
if not first_h1_skipped:
|
||
first_h1_skipped = True
|
||
continue
|
||
|
||
if pdf.page_no() > 2 or pdf.get_y() > 60:
|
||
pdf.add_page()
|
||
|
||
pdf.set_font('Helvetica', 'B', 20)
|
||
pdf.set_text_color(*COLOR_PRIMARY)
|
||
pdf.multi_cell(0, 10, safe_text(text), fill=False)
|
||
pdf.ln(5)
|
||
y = pdf.get_y()
|
||
pdf.set_draw_color(*COLOR_PRIMARY)
|
||
pdf.line(pdf.l_margin, y, 210-pdf.r_margin, y)
|
||
pdf.ln(10)
|
||
|
||
elif level == 2: # H2
|
||
pdf.ln(8)
|
||
pdf.set_font('Helvetica', 'B', 14)
|
||
pdf.set_text_color(*COLOR_PRIMARY)
|
||
pdf.multi_cell(0, 8, safe_text(text), fill=False)
|
||
pdf.ln(2)
|
||
|
||
elif level == 3: # H3
|
||
pdf.ln(4)
|
||
pdf.set_font('Helvetica', 'B', 12)
|
||
pdf.set_text_color(*COLOR_TEXT_MAIN)
|
||
pdf.multi_cell(0, 6, safe_text(text), fill=False)
|
||
|
||
elif level >= 4: # H4+
|
||
pdf.ln(3)
|
||
pdf.set_font('Helvetica', 'B', 11)
|
||
pdf.set_text_color(*COLOR_TEXT_MAIN)
|
||
pdf.multi_cell(0, 5, safe_text(text), fill=False)
|
||
|
||
continue
|
||
|
||
# Images (robust path handling)
|
||
img_match = RE_IMAGE.search(line)
|
||
if img_match or (line.startswith(':
|
||
if img_match:
|
||
img_path = img_match.group(2)
|
||
else:
|
||
match = re.search(r'\(([^)]+)\)', line)
|
||
img_path = match.group(1) if match else None
|
||
|
||
if img_path:
|
||
full_path = normalize_image_path(md_file, img_path)
|
||
if full_path:
|
||
pdf.ln(5)
|
||
try:
|
||
x = (pdf.w - 110)/2
|
||
pdf.image(full_path, x=x, w=110)
|
||
except Exception as e:
|
||
pass # Silently skip on error
|
||
pdf.ln(5)
|
||
continue
|
||
|
||
# Blockquotes/Callouts (robust detection)
|
||
bq_match = RE_BLOCKQUOTE.match(line)
|
||
if bq_match or line.startswith('>'):
|
||
content = bq_match.group(1) if bq_match else line[1:].strip()
|
||
c_type, clean_content = parse_callout_type(content)
|
||
render_callout(pdf, clean_content, c_type)
|
||
continue
|
||
|
||
# Lists and regular text (robust detection)
|
||
pdf.set_fill_color(255, 255, 255)
|
||
pdf.set_font('Helvetica', '', 11)
|
||
pdf.set_text_color(*COLOR_TEXT_MAIN)
|
||
|
||
list_item = parse_list_item(line)
|
||
line_processed = make_links_clickable(line)
|
||
|
||
if list_item:
|
||
item_type, content, extra = list_item
|
||
pdf.set_x(pdf.l_margin + 6)
|
||
if item_type == 'cb':
|
||
# Checkbox
|
||
checkbox = '[x]' if extra else '[ ]'
|
||
pdf.multi_cell(0, 7, safe_text(f"{checkbox} {content}"), markdown=True, fill=False)
|
||
else:
|
||
pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False)
|
||
else:
|
||
pdf.set_x(pdf.l_margin)
|
||
pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False)
|
||
|
||
pdf.output(pdf_file)
|
||
print(f"PDF Generated: {pdf_file}")
|
||
|
||
if __name__ == "__main__":
|
||
if len(sys.argv) < 2:
|
||
print("Usage: python convert_to_pdf.py <input.md> [output.pdf]")
|
||
sys.exit(1)
|
||
|
||
md_in = sys.argv[1]
|
||
if len(sys.argv) >= 3:
|
||
pdf_out = sys.argv[2]
|
||
else:
|
||
pdf_out = os.path.splitext(md_in)[0] + ".pdf"
|
||
|
||
convert(md_in, pdf_out)
|