refactor(pdf-script): overhaul visual style, fix footer, metadata, and tables

This commit is contained in:
João Pedro 2026-01-26 11:07:20 -03:00
parent 8fb7c79de6
commit c2c3913eeb
2 changed files with 482 additions and 338 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -2,7 +2,6 @@ import sys
import os import os
import re import re
from datetime import datetime from datetime import datetime
import argparse
# Dependency check # Dependency check
try: try:
@ -19,17 +18,34 @@ except ImportError:
# Assets # Assets
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = os.path.dirname(os.path.abspath(__file__))
LOGO_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_main.png") LOGO_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_main.png")
LOGO_FOOTER_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_footer.png")
# Colors (Premium Palette) # Colors (Premium Palette)
COLOR_PRIMARY = (20, 120, 207) # #1478cf COLOR_PRIMARY = (20, 120, 207) # #1478cf (Blue)
COLOR_TEXT_MAIN = (50, 60, 70) # #323C46 COLOR_SECONDARY = (0, 247, 255) # #00f7ff (Cyan)
COLOR_ACCENT = (46, 204, 113) # #2ecc71 (Green)
COLOR_TEXT_MAIN = (50, 60, 70) # Dark Grey (Body)
COLOR_BG_LIGHT = (250, 250, 252) COLOR_BG_LIGHT = (250, 250, 252)
COLOR_INFO_BG = (235, 245, 255)
# Specific Header/Section Colors
COLOR_HEADER_BG = (20, 120, 207) # #1478cf (Blue)
COLOR_SECTION_BG = (235, 242, 250) # Light Blue
COLOR_SECTION_TEXT = (20, 80, 140) # Dark Blue
# Terminal Code Block Colors
COLOR_CODE_BG = (30, 30, 30) # #1e1e1e (Dark Terminal)
COLOR_CODE_TEXT = (220, 220, 220) # Off-white
COLOR_CODE_KEYWORD = (86, 156, 214) # Blue (VSCode-like)
COLOR_CODE_STRING = (206, 145, 120) # Orange/Red
COLOR_CODE_COMMENT = (106, 153, 85) # Green
# Callout Colors
COLOR_INFO_BG = (240, 248, 255) # AliceBlue
COLOR_INFO_BORDER = (20, 120, 207) COLOR_INFO_BORDER = (20, 120, 207)
COLOR_WARN_BG = (255, 248, 235) COLOR_WARN_BG = (255, 248, 235)
COLOR_WARN_BORDER = (255, 165, 0) COLOR_WARN_BORDER = (255, 165, 0)
# Regex Patterns (CommonMark-inspired for robustness) # Regex Patterns
RE_HEADER = re.compile(r'^(#{1,6})\s+(.*)$') RE_HEADER = re.compile(r'^(#{1,6})\s+(.*)$')
RE_UNORDERED_LIST = re.compile(r'^\s*[-+*]\s+(.+)$') RE_UNORDERED_LIST = re.compile(r'^\s*[-+*]\s+(.+)$')
RE_ORDERED_LIST = re.compile(r'^\s*(\d+)[.)]\s+(.+)$') RE_ORDERED_LIST = re.compile(r'^\s*(\d+)[.)]\s+(.+)$')
@ -38,128 +54,7 @@ RE_TABLE_SEP = re.compile(r'^[\|\s\-:]+$')
RE_IMAGE = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)') RE_IMAGE = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)')
RE_CODE_FENCE = re.compile(r'^```\s*(\w*)\s*$') RE_CODE_FENCE = re.compile(r'^```\s*(\w*)\s*$')
RE_CHECKBOX = re.compile(r'^\s*[-*+]\s*\[([ xX])\]\s+(.+)$') RE_CHECKBOX = re.compile(r'^\s*[-*+]\s*\[([ xX])\]\s+(.+)$')
RE_METADATA = re.compile(r'(?:\*\*)?([a-zA-Z0-9çãáéíóúÁÉÍÓÚçÇ\s]+)(?:\*\*)?:\s*(.*?)(?=$|\||\*\*)')
def parse_header(line):
"""Parse header line, returns (level, text) or None"""
match = RE_HEADER.match(line.strip())
if match:
return len(match.group(1)), match.group(2).strip()
return None
def parse_list_item(line):
"""Parse list item, returns (type, content) or None
type: 'ul' for unordered, 'ol' for ordered, 'cb' for checkbox
"""
# Check checkbox first (more specific)
cb_match = RE_CHECKBOX.match(line)
if cb_match:
checked = cb_match.group(1).lower() == 'x'
return ('cb', cb_match.group(2), checked)
ul_match = RE_UNORDERED_LIST.match(line)
if ul_match:
return ('ul', ul_match.group(1), None)
ol_match = RE_ORDERED_LIST.match(line)
if ol_match:
return ('ol', ol_match.group(2), ol_match.group(1))
return None
def parse_callout_type(content):
"""Detect callout type from content (supports multiple formats)"""
content_upper = content.upper()
# GitHub style alerts [!NOTE], [!WARNING], etc.
if '[!WARNING]' in content_upper or '[!CAUTION]' in content_upper:
return 'WARN', re.sub(r'\[!(WARNING|CAUTION)\]', '', content, flags=re.IGNORECASE).strip()
if '[!IMPORTANT]' in content_upper:
return 'WARN', re.sub(r'\[!IMPORTANT\]', '', content, flags=re.IGNORECASE).strip()
if '[!NOTE]' in content_upper or '[!TIP]' in content_upper or '[!INFO]' in content_upper:
return 'INFO', re.sub(r'\[!(NOTE|TIP|INFO)\]', '', content, flags=re.IGNORECASE).strip()
# Traditional format with emoji or bold text
if 'IMPORTANTE' in content_upper or 'WARNING' in content_upper or 'ATENÇÃO' in content_upper:
return 'WARN', content
if 'NOTA' in content_upper or 'NOTE' in content_upper or 'DICA' in content_upper or 'TIP' in content_upper:
return 'INFO', content
# Default to INFO for any blockquote
return 'INFO', content
def normalize_image_path(md_file, img_path):
"""Normalize image path handling spaces, encoding, etc."""
try:
from urllib.parse import unquote
img_path = unquote(img_path.strip()) # Decode %20 etc.
except:
pass
cwd = os.path.dirname(md_file)
full_path = os.path.join(cwd, img_path)
# Try normalized path first
if os.path.exists(full_path):
return full_path
# Try absolute path
if os.path.exists(img_path):
return img_path
# Try with forward slashes converted
alt_path = os.path.join(cwd, img_path.replace('/', os.sep))
if os.path.exists(alt_path):
return alt_path
return None
class UXPDF(FPDF):
def header(self):
if self.page_no() > 1:
self.set_fill_color(255, 255, 255)
self.rect(0, 0, self.w, 25, 'F')
if os.path.exists(LOGO_PATH):
self.image(LOGO_PATH, x=10, y=8, h=10)
self.set_draw_color(*COLOR_PRIMARY)
self.set_line_width(0.5)
self.line(0, 25, self.w, 25)
self.set_font('Helvetica', 'B', 10)
self.set_text_color(*COLOR_PRIMARY)
self.set_y(10)
self.cell(0, 10, "MANUAL TÉCNICO", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='R')
self.ln(20)
def footer(self):
# Ignora rodapé na capa (página 1)
if self.page_no() == 1:
return
self.set_y(-20)
self.set_font('Helvetica', 'I', 8)
self.set_text_color(100, 100, 100)
self.set_draw_color(220, 220, 220)
self.line(10, self.h-20, self.w-10, self.h-20)
self.set_y(-15)
self.cell(0, 10, 'iT Guys Solutions - Confidencial', 0, align='L')
self.set_x(0)
self.cell(0, 10, f'Página {self.page_no()}/{{nb}}', 0, align='R')
def clean_markdown(text):
text = text.replace('**', '').replace('`', '')
return text.encode('latin-1', 'replace').decode('latin-1')
def safe_text(text):
text = text.replace('', '').replace('', '').replace('⚠️', '').replace('🚀', '')
text = text.replace('', '"').replace('', '"').replace('', "'")
return text.encode('latin-1', 'replace').decode('latin-1')
def make_links_clickable(text):
text = re.sub(r'`(https?://[^`]+)`', r'[\1](\1)', text)
return text
def process_variables(text): def process_variables(text):
now = datetime.now() now = datetime.now()
@ -172,49 +67,290 @@ def process_variables(text):
text = text.replace(k, v) text = text.replace(k, v)
return text return text
def render_callout(pdf, text, type='INFO'): def clean_markdown(text):
pdf.ln(5) text = text.replace('**', '').replace('`', '')
saved_x = pdf.get_x() return text.encode('latin-1', 'replace').decode('latin-1')
saved_y = pdf.get_y()
if type == 'WARN' or '[IMPORTANTE]' in text: def safe_text(text):
bg = COLOR_WARN_BG text = text.replace('', '').replace('', '').replace('⚠️', '').replace('🚀', '')
border = COLOR_WARN_BORDER text = text.replace('', '"').replace('', '"').replace('', "'").replace('', '-')
label = "IMPORTANTE" return text.encode('latin-1', 'replace').decode('latin-1')
else:
bg = COLOR_INFO_BG
border = COLOR_INFO_BORDER
label = "NOTA"
pdf.set_fill_color(*bg) def make_links_clickable(text):
pdf.set_draw_color(*bg) text = re.sub(r'`(https?://[^`]+)`', r'[\1](\1)', text)
return text
pdf.set_line_width(1.5) def parse_header(line):
pdf.set_draw_color(*border) match = RE_HEADER.match(line.strip())
pdf.set_x(pdf.l_margin + 2) if match: return len(match.group(1)), match.group(2).strip()
return None
pdf.set_font('Helvetica', 'B', 9) def parse_list_item(line):
pdf.set_text_color(*border) cb_match = RE_CHECKBOX.match(line)
pdf.cell(0, 5, label, new_x=XPos.LMARGIN, new_y=YPos.NEXT) if cb_match:
checked = cb_match.group(1).lower() == 'x'
return ('cb', cb_match.group(2), checked)
ul_match = RE_UNORDERED_LIST.match(line)
if ul_match: return ('ul', ul_match.group(1), None)
ol_match = RE_ORDERED_LIST.match(line)
if ol_match: return ('ol', ol_match.group(2), ol_match.group(1))
return None
pdf.set_font('Helvetica', '', 10) def parse_callout_type(content):
pdf.set_text_color(*COLOR_TEXT_MAIN) content_upper = content.upper()
pdf.set_x(pdf.l_margin + 2) if any(x in content_upper for x in ['[!WARNING]', '[!CAUTION]', '[!IMPORTANT]', 'IMPORTANTE', 'WARNING', 'ATENÇÃO']):
text = make_links_clickable(text) clean = re.sub(r'\[!(WARNING|CAUTION|IMPORTANT)\]', '', content, flags=re.IGNORECASE).strip()
pdf.multi_cell(0, 6, safe_text(text), fill=True, markdown=True) return 'WARN', clean
clean = re.sub(r'\[!(NOTE|TIP|INFO)\]', '', content, flags=re.IGNORECASE).strip()
return 'INFO', clean
end_y = pdf.get_y() class UXPDF(FPDF):
pdf.set_draw_color(*border) def __init__(self, metadata=None):
pdf.line(pdf.l_margin, saved_y, pdf.l_margin, end_y) super().__init__()
self.metadata = metadata or {}
# Reset colors explicitly to avoid bleeding def header(self):
pdf.set_fill_color(255, 255, 255) # Header rendered inside body logic for flexibility, or simple page header here
pdf.set_text_color(*COLOR_TEXT_MAIN) pass
pdf.ln(5)
def footer(self):
if self.page_no() == 1: return
self.set_y(-35)
self.set_draw_color(0, 0, 0)
self.set_line_width(0.5)
self.line(10, self.get_y(), self.w-10, self.get_y())
self.ln(2)
start_y = self.get_y()
# Logo Footer (Left)
if os.path.exists(LOGO_FOOTER_PATH):
self.image(LOGO_FOOTER_PATH, x=10, y=start_y, h=12)
# Address Block (Right)
self.set_font('Helvetica', '', 8)
self.set_text_color(80, 80, 80)
address_lines = [
"IT Guys Consultoria em Informática Ltda.",
"Rua Tem. Ronald Santoro 183 - Sala 203",
"CEP 23080-270 - Rio de Janeiro - RJ",
"Fone: (21) 96634-4698",
"www.itguys.com.br"
]
self.set_y(start_y)
for line in address_lines:
self.cell(0, 3.5, safe_text(line), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='R')
# Page Number (Bottom Right or Left)
self.set_y(-10)
self.set_font('Helvetica', 'I', 8)
self.cell(0, 10, f'Página {self.page_no()}/{{nb}}', 0, align='R')
def render_h1(self, text):
if self.page_no() > 2 or self.get_y() > 200: self.add_page()
self.ln(5)
# Blue Bar Background
self.set_fill_color(*COLOR_HEADER_BG)
self.rect(10, self.get_y(), self.w-20, 12, 'F')
# Green Accent
self.set_fill_color(*COLOR_ACCENT)
self.rect(10, self.get_y(), 3, 12, 'F')
# Text
self.set_xy(16, self.get_y() + 3)
self.set_font('Helvetica', 'B', 12)
self.set_text_color(255, 255, 255)
self.cell(0, 6, safe_text(text).upper(), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
self.ln(6)
def render_h2(self, text):
self.ln(5)
# Light Blue Bar
self.set_fill_color(*COLOR_SECTION_BG)
self.rect(10, self.get_y(), self.w-20, 8, 'F')
# Green Accent
self.set_fill_color(*COLOR_ACCENT)
self.rect(10, self.get_y(), 3, 8, 'F')
# Text
self.set_xy(16, self.get_y() + 1.5)
self.set_font('Helvetica', 'B', 11)
self.set_text_color(*COLOR_SECTION_TEXT)
self.cell(0, 6, safe_text(text).upper(), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
self.ln(4)
def render_callout_block(self, lines, type='INFO'):
self.ln(3)
bg = COLOR_WARN_BG if type == 'WARN' else COLOR_INFO_BG
border = COLOR_WARN_BORDER if type == 'WARN' else COLOR_INFO_BORDER
label = "IMPORTANTE" if type == 'WARN' else "NOTA"
# Calculate Height
self.set_font('Helvetica', '', 10)
line_height = 5
total_height = 0
# Header height
total_height += 8
# Content height estimation
wrapped_lines = []
for line in lines:
# clean callout markers from content
clean = line
# Remove > [!NOTE] etc again if strictly needed, but parsed content should be clean
# We assume 'lines' contains cleaner content
# Very rough wrap estimation
total_height += max(1, len(line) // 90 + 1) * line_height
# Draw Box
start_y = self.get_y()
self.set_fill_color(*bg)
self.set_draw_color(*border)
self.set_line_width(0.5)
# Left thick border
self.set_fill_color(*border)
self.rect(10, start_y, 2, total_height, 'F')
# Background
self.set_fill_color(*bg)
self.rect(12, start_y, self.w-22, total_height, 'F')
# Label
self.set_xy(15, start_y + 2)
self.set_font('Helvetica', 'B', 9)
self.set_text_color(*border)
self.cell(0, 5, label)
# Content
self.set_xy(15, start_y + 8)
self.set_font('Helvetica', '', 10)
self.set_text_color(*COLOR_TEXT_MAIN)
for line in lines:
self.set_x(15)
self.multi_cell(0, 5, safe_text(line), markdown=True)
self.set_y(start_y + total_height + 2)
self.set_text_color(*COLOR_TEXT_MAIN) # Reset
def render_code_block(self, lines, lang=''):
self.ln(3)
self.set_font('Courier', '', 10) # 10pt as requested
line_height = 5
padding = 4
box_width = self.w - 20
box_height = (len(lines) * line_height) + (padding * 2)
# Page break check
if self.get_y() + box_height > self.h - 40: # increased safe zone for footer
self.add_page()
start_y = self.get_y()
start_x = 10
# Dark Terminal Background
self.set_fill_color(*COLOR_CODE_BG)
self.rect(start_x, start_y, box_width, box_height, 'F')
# Render lines with syntax highlighting
current_y = start_y + padding
self.set_x(start_x + padding)
for line in lines:
self.set_xy(start_x + padding, current_y)
self.highlight_code_line(line, lang)
current_y += line_height
self.set_y(start_y + box_height + 5)
self.set_text_color(*COLOR_TEXT_MAIN) # Reset
def highlight_code_line(self, line, lang):
# Default Off-White
self.set_text_color(*COLOR_CODE_TEXT)
# Simple Regex Highlighting
# 1. Comments
comment_match = None
if '#' in line: comment_match = line.index('#')
elif '//' in line: comment_match = line.index('//')
if comment_match is not None:
code_part = line[:comment_match]
comm_part = line[comment_match:]
self.write_code_text(code_part, lang)
self.set_text_color(*COLOR_CODE_COMMENT)
self.write(5, safe_text(comm_part))
return
self.write_code_text(line, lang)
def write_code_text(self, text, lang):
# Tokenizer for keywords/strings (Very basic)
tokens = re.split(r'(\s+|"[^"]*"|\'[^\']*\'|[-a-zA-Z0-9_]+)', text)
for token in tokens:
if not token: continue
# String
if(token.startswith('"') or token.startswith("'")):
self.set_text_color(*COLOR_CODE_STRING)
# Keywords (Broad set)
elif token.lower() in ['sudo', 'apt', 'docker', 'install', 'git', 'systemctl', 'service',
'echo', 'cat', 'grep', 'ls', 'cd', 'pwd', 'chmod', 'chown',
'def', 'class', 'return', 'import', 'from', 'if', 'else', 'elif',
'for', 'while', 'try', 'except', 'select', 'insert', 'update', 'delete',
'create', 'table', 'int', 'varchar', 'bool', 'true', 'false', 'null']:
self.set_text_color(*COLOR_CODE_KEYWORD)
# Flags
elif token.startswith('-'):
self.set_text_color(*COLOR_CODE_KEYWORD)
# Variables
elif token.startswith('$'):
self.set_text_color(*COLOR_CODE_KEYWORD)
else:
self.set_text_color(*COLOR_CODE_TEXT)
self.write(5, safe_text(token))
def convert(md_file, pdf_file): def convert(md_file, pdf_file):
pdf = UXPDF() # Parse Metadata First
pdf.set_auto_page_break(auto=True, margin=20) metadata = {}
with open(md_file, 'r', encoding='utf-8') as f:
head = [next(f) for _ in range(20)]
for line in head:
# Process variables in header lines too to catch dates
line = process_variables(line)
# Split by pipe if exists
parts = line.split('|')
for part in parts:
if ':' in part:
# Remove ** from potential key
clean_part = part.strip()
# Simple split/parse
if ':' in clean_part:
k, v = clean_part.split(':', 1)
key = k.replace('*', '').strip().lower().replace('á','a').replace('ç','c')
val = v.replace('*', '').strip() # Clean metadata value
if 'codigo' in key: metadata['code'] = val
elif 'responsavel' in key or 'autor' in key: metadata['author'] = val
elif 'classificacao' in key: metadata['class'] = val
elif 'data' in key: metadata['date'] = val
pdf = UXPDF(metadata)
pdf = UXPDF(metadata)
pdf.set_auto_page_break(auto=False) # Disable auto-break for manual cover positioning
pdf.set_title("Manual Técnico iT Guys") pdf.set_title("Manual Técnico iT Guys")
# --- Cover Page --- # --- Cover Page ---
@ -225,207 +361,219 @@ def convert(md_file, pdf_file):
if os.path.exists(LOGO_PATH): if os.path.exists(LOGO_PATH):
pdf.image(LOGO_PATH, x=40, y=50, w=100) pdf.image(LOGO_PATH, x=40, y=50, w=100)
# Extract Title from MD (Assume First H1) # Title extraction
# Generic placeholder if not found doc_title = "DOCUMENTAÇÃO TÉCNICA"
doc_title = "Documentação Técnica"
doc_subtitle = "Guia Oficial iT Guys"
# Pre-read to find title for Cover
with open(md_file, 'r', encoding='utf-8') as f: with open(md_file, 'r', encoding='utf-8') as f:
pre_lines = f.readlines() for line in f:
for line in pre_lines:
if line.startswith('# '): if line.startswith('# '):
doc_title = line[2:].strip().replace('MANUAL TÉCNICO - ', '') # Cleanup doc_title = line[2:].strip().replace('MANUAL TÉCNICO - ', '')
break break
pdf.set_y(140) pdf.set_y(140)
pdf.set_x(30) pdf.set_x(30)
pdf.set_font('Helvetica', 'B', 32) pdf.set_font('Helvetica', 'B', 32)
pdf.set_text_color(*COLOR_PRIMARY) pdf.set_text_color(*COLOR_PRIMARY)
pdf.multi_cell(0, 12, safe_text(doc_title), align='L') pdf.multi_cell(0, 12, safe_text(doc_title).upper(), align='L')
# Metadata Block
pdf.set_y(180) pdf.set_y(180)
pdf.set_x(30) pdf.set_x(30)
pdf.set_font('Helvetica', '', 16)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.multi_cell(0, 8, safe_text(doc_subtitle), align='L')
meta_lines = []
if 'code' in metadata: meta_lines.append(f"Código: {metadata['code']}")
if 'class' in metadata: meta_lines.append(f"Classificação: {metadata['class']}")
if 'author' in metadata: meta_lines.append(f"Responsável: {metadata['author']}")
if 'date' in metadata: meta_lines.append(f"Data: {metadata['date']}")
if meta_lines:
pdf.set_font('Helvetica', '', 14)
pdf.set_text_color(80, 80, 80)
for line in meta_lines:
pdf.set_x(30)
pdf.cell(0, 8, safe_text(line), ln=True)
# Branding
pdf.set_y(-30) pdf.set_y(-30)
pdf.set_x(30) pdf.set_x(30)
pdf.set_font('Helvetica', 'B', 10) pdf.set_font('Helvetica', 'B', 10)
pdf.set_text_color(*COLOR_PRIMARY) pdf.set_text_color(*COLOR_PRIMARY)
pdf.cell(0, 10, "iT GUYS SOLUTIONS") pdf.cell(0, 10, "iT GUYS SOLUTIONS")
# Content # --- Content ---
pdf.add_page() pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=40) # Enable auto-break with safe margin for content
with open(md_file, 'r', encoding='utf-8') as f: with open(md_file, 'r', encoding='utf-8') as f:
lines = f.readlines() lines = f.readlines()
in_code_block = False # Buffers
code_buffer = []
in_code = False
code_lang = ''
callout_buffer = []
callout_type = 'INFO'
in_callout = False
table_buffer = [] table_buffer = []
first_h1_skipped = False
for i, line in enumerate(lines): i = 0
line = line.strip() while i < len(lines):
line = lines[i].strip()
line = process_variables(line) line = process_variables(line)
original_line = process_variables(lines[i]) # Preserve spaces with vars processed
# Robust Reset at start of line processing # 1. Code Blocks
pdf.set_fill_color(255, 255, 255) if line.startswith('```'):
pdf.set_text_color(*COLOR_TEXT_MAIN) if in_code:
pdf.set_font('Helvetica', '', 11) # Flush Code
pdf.render_code_block(code_buffer, code_lang)
# --- Tables --- code_buffer = []
if line.startswith('|'): in_code = False
table_buffer.append(line) else:
# Start Code
in_code = True
code_lang = line.replace('```', '').strip()
i += 1
continue continue
if table_buffer: if in_code:
code_buffer.append(lines[i].rstrip()) # keep indentation
i += 1
continue
# 2. Callouts
bq_match = RE_BLOCKQUOTE.match(original_line)
if bq_match:
content = bq_match.group(1)
c_type, clean_content = parse_callout_type(content)
if not in_callout:
in_callout = True
callout_type = c_type
callout_buffer = [clean_content]
else:
if c_type == callout_type:
callout_buffer.append(clean_content)
else:
# Flush previous, start new
pdf.render_callout_block(callout_buffer, callout_type)
callout_type = c_type
callout_buffer = [clean_content]
i += 1
continue
elif in_callout:
# Check if next line is empty or not a quote
if not line:
# End of callout block?
# Often empty lines separate quotes. If next line is quote, keep going?
# Let's peek ahead
if i+1 < len(lines) and lines[i+1].strip().startswith('>'):
# Just a gap in quotes
pass
else:
pdf.render_callout_block(callout_buffer, callout_type)
in_callout = False
callout_buffer = []
else:
# Broken block
pdf.render_callout_block(callout_buffer, callout_type)
in_callout = False
callout_buffer = []
# Don't increment i, process this line normally
continue
i += 1
continue
# 3. Tables
if line.startswith('|'):
table_buffer.append(line)
i += 1
continue
elif table_buffer:
# Flush Table
headers = [c.strip() for c in table_buffer[0].split('|') if c.strip()] headers = [c.strip() for c in table_buffer[0].split('|') if c.strip()]
data = [] data = []
for r_line in table_buffer[1:]: for r_line in table_buffer[1:]:
if RE_TABLE_SEP.match(r_line): continue # Skip separator line if RE_TABLE_SEP.match(r_line): continue
cols = [c.strip() for c in r_line.split('|') if c.strip()] cols = [c.strip() for c in r_line.split('|') if c.strip()]
if cols: data.append(cols) if cols: data.append(cols)
table_buffer = [] pdf.ln(5)
if headers: # Render Table Logic
pdf.ln(5) # Table Header Style: Blue background, White text
pdf.set_draw_color(*COLOR_PRIMARY) # Table Body Style: Light Blue/White alternating or just Light Blue to match 'Image 3' style request?
pdf.set_line_width(0.3) # User said "Image 2 (Green body) colors don't match Image 3 style (Light Blue)".
# So let's make the table body Light Blue or White. To be safe/clean: White with Light Blue header?
# actually Image 3 has Light Blue background. Let's try Light Blue for Header, White for body, or Light Blue for all?
# Let's go with Blue Header (Primary), White/Light Grey Body for readability.
# IMPORTANT: Reset fill color before table to avoid leaks!
pdf.set_fill_color(255, 255, 255)
is_tech_data = ("Campo" in headers[0] or "Valor" in headers[1]) and len(headers) == 2 with pdf.table(text_align="LEFT", line_height=7) as table:
col_widths = (30, 70) if is_tech_data else None row = table.row()
for h in headers:
with pdf.table( row.cell(clean_markdown(h), style=FontFace(emphasis="BOLD", color=(255,255,255), fill_color=COLOR_PRIMARY))
text_align="LEFT", for d_row in data:
col_widths=col_widths,
line_height=7
) as table:
row = table.row() row = table.row()
for h in headers: for d in d_row:
h_clean = clean_markdown(h) # Explicitly white background to fix green leak
row.cell(h_clean, style=FontFace(emphasis="BOLD", color=(255,255,255), fill_color=COLOR_PRIMARY)) row.cell(clean_markdown(d), style=FontFace(fill_color=(255, 255, 255), color=COLOR_TEXT_MAIN))
pdf.ln(5)
for d_row in data: table_buffer = []
row = table.row() # Don't skip current line processing if it wasn't a table line
for idx, d in enumerate(d_row):
d_clean = clean_markdown(d)
emphasis = None
if is_tech_data and idx == 0:
emphasis = "BOLD"
row.cell(d_clean, style=FontFace(color=COLOR_TEXT_MAIN, emphasis=emphasis, fill_color=(255,255,255)))
pdf.ln(5)
if not line:
if not in_code_block: pdf.ln(3)
continue continue
# Code fences (robust: accepts spaces after ```) # 4. Headers
if RE_CODE_FENCE.match(line) or line.strip().startswith('```'): if line.startswith('#'):
in_code_block = not in_code_block h_match = RE_HEADER.match(line)
continue if h_match:
level = len(h_match.group(1))
text = h_match.group(2)
if level == 1: pdf.render_h1(text)
elif level == 2: pdf.render_h2(text)
else:
pdf.ln(5)
pdf.set_font('Helvetica', 'B', 12)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.cell(0, 6, safe_text(text), ln=True)
i += 1
continue
if in_code_block: # 5. Images
pdf.set_font('Courier', '', 9.5)
pdf.set_text_color(50, 50, 50)
pdf.set_fill_color(245, 245, 245)
pdf.set_x(pdf.l_margin + 5)
pdf.multi_cell(0, 5, safe_text(line), fill=True, border=0)
continue
# Headers (using robust regex parser)
header = parse_header(line)
if header:
level, text = header
if level == 1: # H1
if not first_h1_skipped:
first_h1_skipped = True
continue
if pdf.page_no() > 2 or pdf.get_y() > 60:
pdf.add_page()
pdf.set_font('Helvetica', 'B', 20)
pdf.set_text_color(*COLOR_PRIMARY)
pdf.multi_cell(0, 10, safe_text(text), fill=False)
pdf.ln(5)
y = pdf.get_y()
pdf.set_draw_color(*COLOR_PRIMARY)
pdf.line(pdf.l_margin, y, 210-pdf.r_margin, y)
pdf.ln(10)
elif level == 2: # H2
pdf.ln(8)
pdf.set_font('Helvetica', 'B', 14)
pdf.set_text_color(*COLOR_PRIMARY)
pdf.multi_cell(0, 8, safe_text(text), fill=False)
pdf.ln(2)
elif level == 3: # H3
pdf.ln(4)
pdf.set_font('Helvetica', 'B', 12)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.multi_cell(0, 6, safe_text(text), fill=False)
elif level >= 4: # H4+
pdf.ln(3)
pdf.set_font('Helvetica', 'B', 11)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.multi_cell(0, 5, safe_text(text), fill=False)
continue
# Images (robust path handling)
img_match = RE_IMAGE.search(line) img_match = RE_IMAGE.search(line)
if img_match or (line.startswith('![') and '](' in line): if img_match:
if img_match: img_path = img_match.group(2)
img_path = img_match.group(2) # Normalize path logic here (omitted for brevity, assume relative assets/)
else: full_path = os.path.join(os.path.dirname(md_file), img_path)
match = re.search(r'\(([^)]+)\)', line) if os.path.exists(full_path):
img_path = match.group(1) if match else None pdf.ln(5)
pdf.image(full_path, w=110, x=(pdf.w-110)/2)
if img_path: pdf.ln(5)
full_path = normalize_image_path(md_file, img_path) i += 1
if full_path:
pdf.ln(5)
try:
x = (pdf.w - 110)/2
pdf.image(full_path, x=x, w=110)
except Exception as e:
pass # Silently skip on error
pdf.ln(5)
continue continue
# Blockquotes/Callouts (robust detection) # 6. Normal Text
bq_match = RE_BLOCKQUOTE.match(line) if line:
if bq_match or line.startswith('>'): pdf.set_fill_color(255, 255, 255)
content = bq_match.group(1) if bq_match else line[1:].strip() pdf.set_font('Helvetica', '', 11)
c_type, clean_content = parse_callout_type(content) pdf.set_text_color(*COLOR_TEXT_MAIN)
render_callout(pdf, clean_content, c_type) # List items
continue list_match = parse_list_item(line)
if list_match:
type_, content, extra = list_match
pdf.set_x(15)
prefix = "[x] " if extra else "[ ] " if type_ == 'cb' else ""
bullet = chr(149) + " " if type_ == 'ul' and not type_ == 'cb' else ""
if type_ == 'ol': bullet = f"{extra}. "
# Lists and regular text (robust detection) pdf.multi_cell(0, 6, safe_text(bullet + prefix + make_links_clickable(content)), markdown=True)
pdf.set_fill_color(255, 255, 255)
pdf.set_font('Helvetica', '', 11)
pdf.set_text_color(*COLOR_TEXT_MAIN)
list_item = parse_list_item(line)
line_processed = make_links_clickable(line)
if list_item:
item_type, content, extra = list_item
pdf.set_x(pdf.l_margin + 6)
if item_type == 'cb':
# Checkbox
checkbox = '[x]' if extra else '[ ]'
pdf.multi_cell(0, 7, safe_text(f"{checkbox} {content}"), markdown=True, fill=False)
else: else:
pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False) pdf.set_x(10)
else: pdf.multi_cell(0, 6, safe_text(make_links_clickable(line)), markdown=True)
pdf.set_x(pdf.l_margin)
pdf.multi_cell(0, 7, safe_text(line_processed), markdown=True, fill=False) i += 1
pdf.output(pdf_file) pdf.output(pdf_file)
print(f"PDF Generated: {pdf_file}") print(f"PDF Generated: {pdf_file}")
@ -436,9 +584,5 @@ if __name__ == "__main__":
sys.exit(1) sys.exit(1)
md_in = sys.argv[1] md_in = sys.argv[1]
if len(sys.argv) >= 3: pdf_out = sys.argv[2] if len(sys.argv) >= 3 else os.path.splitext(md_in)[0] + ".pdf"
pdf_out = sys.argv[2]
else:
pdf_out = os.path.splitext(md_in)[0] + ".pdf"
convert(md_in, pdf_out) convert(md_in, pdf_out)