manuais-e-documentacao-itguys/.gemini/convert_to_pdf.py

589 lines
21 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
import os
import re
from datetime import datetime
# Dependency check
try:
from fpdf import FPDF
from fpdf.enums import XPos, YPos
from fpdf.fonts import FontFace
except ImportError:
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "fpdf2"])
from fpdf import FPDF
from fpdf.enums import XPos, YPos
from fpdf.fonts import FontFace
# Assets
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
LOGO_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_main.png")
LOGO_FOOTER_PATH = os.path.join(BASE_DIR, "assets", "itguys_logo_footer.png")
# Colors (Premium Palette)
COLOR_PRIMARY = (20, 120, 207) # #1478cf (Blue)
COLOR_SECONDARY = (0, 247, 255) # #00f7ff (Cyan)
COLOR_ACCENT = (46, 204, 113) # #2ecc71 (Green)
COLOR_TEXT_MAIN = (50, 60, 70) # Dark Grey (Body)
COLOR_BG_LIGHT = (250, 250, 252)
# Specific Header/Section Colors
COLOR_HEADER_BG = (20, 120, 207) # #1478cf (Blue)
COLOR_SECTION_BG = (235, 242, 250) # Light Blue
COLOR_SECTION_TEXT = (20, 80, 140) # Dark Blue
# Terminal Code Block Colors
COLOR_CODE_BG = (30, 30, 30) # #1e1e1e (Dark Terminal)
COLOR_CODE_TEXT = (220, 220, 220) # Off-white
COLOR_CODE_KEYWORD = (86, 156, 214) # Blue (VSCode-like)
COLOR_CODE_STRING = (206, 145, 120) # Orange/Red
COLOR_CODE_COMMENT = (106, 153, 85) # Green
# Callout Colors
COLOR_INFO_BG = (240, 248, 255) # AliceBlue
COLOR_INFO_BORDER = (20, 120, 207)
COLOR_WARN_BG = (255, 248, 235)
COLOR_WARN_BORDER = (255, 165, 0)
# Regex Patterns
RE_HEADER = re.compile(r'^(#{1,6})\s+(.*)$')
RE_UNORDERED_LIST = re.compile(r'^\s*[-+*]\s+(.+)$')
RE_ORDERED_LIST = re.compile(r'^\s*(\d+)[.)]\s+(.+)$')
RE_BLOCKQUOTE = re.compile(r'^>\s*(.*)$')
RE_TABLE_SEP = re.compile(r'^[\|\s\-:]+$')
RE_IMAGE = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)')
RE_CODE_FENCE = re.compile(r'^```\s*(\w*)\s*$')
RE_CHECKBOX = re.compile(r'^\s*[-*+]\s*\[([ xX])\]\s+(.+)$')
RE_METADATA = re.compile(r'(?:\*\*)?([a-zA-Z0-9çãáéíóúÁÉÍÓÚçÇ\s]+)(?:\*\*)?:\s*(.*?)(?=$|\||\*\*)')
def process_variables(text):
now = datetime.now()
replacements = {
'{{DATA_ATUAL}}': now.strftime("%d/%m/%Y"),
'{{ANO}}': str(now.year)
}
for k, v in replacements.items():
if k in text:
text = text.replace(k, v)
return text
def clean_markdown(text):
text = text.replace('**', '').replace('`', '')
return text.encode('latin-1', 'replace').decode('latin-1')
def safe_text(text):
text = text.replace('', '').replace('', '').replace('⚠️', '').replace('🚀', '')
text = text.replace('', '"').replace('', '"').replace('', "'").replace('', '-')
return text.encode('latin-1', 'replace').decode('latin-1')
def make_links_clickable(text):
text = re.sub(r'`(https?://[^`]+)`', r'[\1](\1)', text)
return text
def parse_header(line):
match = RE_HEADER.match(line.strip())
if match: return len(match.group(1)), match.group(2).strip()
return None
def parse_list_item(line):
cb_match = RE_CHECKBOX.match(line)
if cb_match:
checked = cb_match.group(1).lower() == 'x'
return ('cb', cb_match.group(2), checked)
ul_match = RE_UNORDERED_LIST.match(line)
if ul_match: return ('ul', ul_match.group(1), None)
ol_match = RE_ORDERED_LIST.match(line)
if ol_match: return ('ol', ol_match.group(2), ol_match.group(1))
return None
def parse_callout_type(content):
content_upper = content.upper()
if any(x in content_upper for x in ['[!WARNING]', '[!CAUTION]', '[!IMPORTANT]', 'IMPORTANTE', 'WARNING', 'ATENÇÃO']):
clean = re.sub(r'\[!(WARNING|CAUTION|IMPORTANT)\]', '', content, flags=re.IGNORECASE).strip()
return 'WARN', clean
clean = re.sub(r'\[!(NOTE|TIP|INFO)\]', '', content, flags=re.IGNORECASE).strip()
return 'INFO', clean
class UXPDF(FPDF):
def __init__(self, metadata=None):
super().__init__()
self.metadata = metadata or {}
def header(self):
# Header rendered inside body logic for flexibility, or simple page header here
pass
def footer(self):
if self.page_no() == 1: return
self.set_y(-35)
self.set_draw_color(0, 0, 0)
self.set_line_width(0.5)
self.line(10, self.get_y(), self.w-10, self.get_y())
self.ln(2)
start_y = self.get_y()
# Logo Footer (Left)
if os.path.exists(LOGO_FOOTER_PATH):
self.image(LOGO_FOOTER_PATH, x=10, y=start_y, h=12)
# Address Block (Right)
self.set_font('Helvetica', '', 8)
self.set_text_color(80, 80, 80)
address_lines = [
"IT Guys Consultoria em Informática Ltda.",
"Rua Tem. Ronald Santoro 183 - Sala 203",
"CEP 23080-270 - Rio de Janeiro - RJ",
"Fone: (21) 96634-4698",
"www.itguys.com.br"
]
self.set_y(start_y)
for line in address_lines:
self.cell(0, 3.5, safe_text(line), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='R')
# Page Number (Bottom Right or Left)
self.set_y(-10)
self.set_font('Helvetica', 'I', 8)
self.cell(0, 10, f'Página {self.page_no()}/{{nb}}', 0, align='R')
def render_h1(self, text):
if self.page_no() > 2 or self.get_y() > 200: self.add_page()
self.ln(5)
# Blue Bar Background
self.set_fill_color(*COLOR_HEADER_BG)
self.rect(10, self.get_y(), self.w-20, 12, 'F')
# Green Accent
self.set_fill_color(*COLOR_ACCENT)
self.rect(10, self.get_y(), 3, 12, 'F')
# Text
self.set_xy(16, self.get_y() + 3)
self.set_font('Helvetica', 'B', 12)
self.set_text_color(255, 255, 255)
self.cell(0, 6, safe_text(text).upper(), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
self.ln(6)
def render_h2(self, text):
self.ln(5)
# Light Blue Bar
self.set_fill_color(*COLOR_SECTION_BG)
self.rect(10, self.get_y(), self.w-20, 8, 'F')
# Green Accent
self.set_fill_color(*COLOR_ACCENT)
self.rect(10, self.get_y(), 3, 8, 'F')
# Text
self.set_xy(16, self.get_y() + 1.5)
self.set_font('Helvetica', 'B', 11)
self.set_text_color(*COLOR_SECTION_TEXT)
self.cell(0, 6, safe_text(text).upper(), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
self.ln(4)
def render_callout_block(self, lines, type='INFO'):
self.ln(3)
bg = COLOR_WARN_BG if type == 'WARN' else COLOR_INFO_BG
border = COLOR_WARN_BORDER if type == 'WARN' else COLOR_INFO_BORDER
label = "IMPORTANTE" if type == 'WARN' else "NOTA"
# Calculate Height
self.set_font('Helvetica', '', 10)
line_height = 5
total_height = 0
# Header height
total_height += 8
# Content height estimation
wrapped_lines = []
for line in lines:
# clean callout markers from content
clean = line
# Remove > [!NOTE] etc again if strictly needed, but parsed content should be clean
# We assume 'lines' contains cleaner content
# Very rough wrap estimation
total_height += max(1, len(line) // 90 + 1) * line_height
# Draw Box
start_y = self.get_y()
self.set_fill_color(*bg)
self.set_draw_color(*border)
self.set_line_width(0.5)
# Left thick border
self.set_fill_color(*border)
self.rect(10, start_y, 2, total_height, 'F')
# Background
self.set_fill_color(*bg)
self.rect(12, start_y, self.w-22, total_height, 'F')
# Label
self.set_xy(15, start_y + 2)
self.set_font('Helvetica', 'B', 9)
self.set_text_color(*border)
self.cell(0, 5, label)
# Content
self.set_xy(15, start_y + 8)
self.set_font('Helvetica', '', 10)
self.set_text_color(*COLOR_TEXT_MAIN)
for line in lines:
self.set_x(15)
self.multi_cell(0, 5, safe_text(line), markdown=True)
self.set_y(start_y + total_height + 2)
self.set_text_color(*COLOR_TEXT_MAIN) # Reset
def render_code_block(self, lines, lang=''):
self.ln(3)
self.set_font('Courier', '', 10) # 10pt as requested
line_height = 5
padding = 4
box_width = self.w - 20
box_height = (len(lines) * line_height) + (padding * 2)
# Page break check
if self.get_y() + box_height > self.h - 40: # increased safe zone for footer
self.add_page()
start_y = self.get_y()
start_x = 10
# Dark Terminal Background
self.set_fill_color(*COLOR_CODE_BG)
self.rect(start_x, start_y, box_width, box_height, 'F')
# Render lines with syntax highlighting
current_y = start_y + padding
self.set_x(start_x + padding)
for line in lines:
self.set_xy(start_x + padding, current_y)
self.highlight_code_line(line, lang)
current_y += line_height
self.set_y(start_y + box_height + 5)
self.set_text_color(*COLOR_TEXT_MAIN) # Reset
def highlight_code_line(self, line, lang):
# Default Off-White
self.set_text_color(*COLOR_CODE_TEXT)
# Simple Regex Highlighting
# 1. Comments
comment_match = None
if '#' in line: comment_match = line.index('#')
elif '//' in line: comment_match = line.index('//')
if comment_match is not None:
code_part = line[:comment_match]
comm_part = line[comment_match:]
self.write_code_text(code_part, lang)
self.set_text_color(*COLOR_CODE_COMMENT)
self.write(5, safe_text(comm_part))
return
self.write_code_text(line, lang)
def write_code_text(self, text, lang):
# Tokenizer for keywords/strings (Very basic)
tokens = re.split(r'(\s+|"[^"]*"|\'[^\']*\'|[-a-zA-Z0-9_]+)', text)
for token in tokens:
if not token: continue
# String
if(token.startswith('"') or token.startswith("'")):
self.set_text_color(*COLOR_CODE_STRING)
# Keywords (Broad set)
elif token.lower() in ['sudo', 'apt', 'docker', 'install', 'git', 'systemctl', 'service',
'echo', 'cat', 'grep', 'ls', 'cd', 'pwd', 'chmod', 'chown',
'def', 'class', 'return', 'import', 'from', 'if', 'else', 'elif',
'for', 'while', 'try', 'except', 'select', 'insert', 'update', 'delete',
'create', 'table', 'int', 'varchar', 'bool', 'true', 'false', 'null']:
self.set_text_color(*COLOR_CODE_KEYWORD)
# Flags
elif token.startswith('-'):
self.set_text_color(*COLOR_CODE_KEYWORD)
# Variables
elif token.startswith('$'):
self.set_text_color(*COLOR_CODE_KEYWORD)
else:
self.set_text_color(*COLOR_CODE_TEXT)
self.write(5, safe_text(token))
def convert(md_file, pdf_file):
# Parse Metadata First
metadata = {}
with open(md_file, 'r', encoding='utf-8') as f:
head = [next(f) for _ in range(20)]
for line in head:
# Process variables in header lines too to catch dates
line = process_variables(line)
# Split by pipe if exists
parts = line.split('|')
for part in parts:
if ':' in part:
# Remove ** from potential key
clean_part = part.strip()
# Simple split/parse
if ':' in clean_part:
k, v = clean_part.split(':', 1)
key = k.replace('*', '').strip().lower().replace('á','a').replace('ç','c')
val = v.replace('*', '').strip() # Clean metadata value
if 'codigo' in key: metadata['code'] = val
elif 'responsavel' in key or 'autor' in key: metadata['author'] = val
elif 'classificacao' in key: metadata['class'] = val
elif 'data' in key: metadata['date'] = val
pdf = UXPDF(metadata)
pdf = UXPDF(metadata)
pdf.set_auto_page_break(auto=False) # Disable auto-break for manual cover positioning
pdf.set_title("Manual Técnico iT Guys")
# --- Cover Page ---
pdf.add_page()
pdf.set_fill_color(*COLOR_PRIMARY)
pdf.rect(0, 0, 15, 297, 'F')
if os.path.exists(LOGO_PATH):
pdf.image(LOGO_PATH, x=40, y=50, w=100)
# Title extraction
doc_title = "DOCUMENTAÇÃO TÉCNICA"
with open(md_file, 'r', encoding='utf-8') as f:
for line in f:
if line.startswith('# '):
doc_title = line[2:].strip().replace('MANUAL TÉCNICO - ', '')
break
pdf.set_y(140)
pdf.set_x(30)
pdf.set_font('Helvetica', 'B', 32)
pdf.set_text_color(*COLOR_PRIMARY)
pdf.multi_cell(0, 12, safe_text(doc_title).upper(), align='L')
# Metadata Block
pdf.set_y(180)
pdf.set_x(30)
meta_lines = []
if 'code' in metadata: meta_lines.append(f"Código: {metadata['code']}")
if 'class' in metadata: meta_lines.append(f"Classificação: {metadata['class']}")
if 'author' in metadata: meta_lines.append(f"Responsável: {metadata['author']}")
if 'date' in metadata: meta_lines.append(f"Data: {metadata['date']}")
if meta_lines:
pdf.set_font('Helvetica', '', 14)
pdf.set_text_color(80, 80, 80)
for line in meta_lines:
pdf.set_x(30)
pdf.cell(0, 8, safe_text(line), ln=True)
# Branding
pdf.set_y(-30)
pdf.set_x(30)
pdf.set_font('Helvetica', 'B', 10)
pdf.set_text_color(*COLOR_PRIMARY)
pdf.cell(0, 10, "iT GUYS SOLUTIONS")
# --- Content ---
pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=40) # Enable auto-break with safe margin for content
with open(md_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Buffers
code_buffer = []
in_code = False
code_lang = ''
callout_buffer = []
callout_type = 'INFO'
in_callout = False
table_buffer = []
i = 0
while i < len(lines):
line = lines[i].strip()
line = process_variables(line)
original_line = process_variables(lines[i]) # Preserve spaces with vars processed
# 1. Code Blocks
if line.startswith('```'):
if in_code:
# Flush Code
pdf.render_code_block(code_buffer, code_lang)
code_buffer = []
in_code = False
else:
# Start Code
in_code = True
code_lang = line.replace('```', '').strip()
i += 1
continue
if in_code:
code_buffer.append(lines[i].rstrip()) # keep indentation
i += 1
continue
# 2. Callouts
bq_match = RE_BLOCKQUOTE.match(original_line)
if bq_match:
content = bq_match.group(1)
c_type, clean_content = parse_callout_type(content)
if not in_callout:
in_callout = True
callout_type = c_type
callout_buffer = [clean_content]
else:
if c_type == callout_type:
callout_buffer.append(clean_content)
else:
# Flush previous, start new
pdf.render_callout_block(callout_buffer, callout_type)
callout_type = c_type
callout_buffer = [clean_content]
i += 1
continue
elif in_callout:
# Check if next line is empty or not a quote
if not line:
# End of callout block?
# Often empty lines separate quotes. If next line is quote, keep going?
# Let's peek ahead
if i+1 < len(lines) and lines[i+1].strip().startswith('>'):
# Just a gap in quotes
pass
else:
pdf.render_callout_block(callout_buffer, callout_type)
in_callout = False
callout_buffer = []
else:
# Broken block
pdf.render_callout_block(callout_buffer, callout_type)
in_callout = False
callout_buffer = []
# Don't increment i, process this line normally
continue
i += 1
continue
# 3. Tables
if line.startswith('|'):
table_buffer.append(line)
i += 1
continue
elif table_buffer:
# Flush Table
headers = [c.strip() for c in table_buffer[0].split('|') if c.strip()]
data = []
for r_line in table_buffer[1:]:
if RE_TABLE_SEP.match(r_line): continue
cols = [c.strip() for c in r_line.split('|') if c.strip()]
if cols: data.append(cols)
pdf.ln(5)
# Render Table Logic
# Table Header Style: Blue background, White text
# Table Body Style: Light Blue/White alternating or just Light Blue to match 'Image 3' style request?
# User said "Image 2 (Green body) colors don't match Image 3 style (Light Blue)".
# So let's make the table body Light Blue or White. To be safe/clean: White with Light Blue header?
# actually Image 3 has Light Blue background. Let's try Light Blue for Header, White for body, or Light Blue for all?
# Let's go with Blue Header (Primary), White/Light Grey Body for readability.
# IMPORTANT: Reset fill color before table to avoid leaks!
pdf.set_fill_color(255, 255, 255)
with pdf.table(text_align="LEFT", line_height=7) as table:
row = table.row()
for h in headers:
row.cell(clean_markdown(h), style=FontFace(emphasis="BOLD", color=(255,255,255), fill_color=COLOR_PRIMARY))
for d_row in data:
row = table.row()
for d in d_row:
# Explicitly white background to fix green leak
row.cell(clean_markdown(d), style=FontFace(fill_color=(255, 255, 255), color=COLOR_TEXT_MAIN))
pdf.ln(5)
table_buffer = []
# Don't skip current line processing if it wasn't a table line
continue
# 4. Headers
if line.startswith('#'):
h_match = RE_HEADER.match(line)
if h_match:
level = len(h_match.group(1))
text = h_match.group(2)
if level == 1: pdf.render_h1(text)
elif level == 2: pdf.render_h2(text)
else:
pdf.ln(5)
pdf.set_font('Helvetica', 'B', 12)
pdf.set_text_color(*COLOR_TEXT_MAIN)
pdf.cell(0, 6, safe_text(text), ln=True)
i += 1
continue
# 5. Images
img_match = RE_IMAGE.search(line)
if img_match:
img_path = img_match.group(2)
# Normalize path logic here (omitted for brevity, assume relative assets/)
full_path = os.path.join(os.path.dirname(md_file), img_path)
if os.path.exists(full_path):
pdf.ln(5)
pdf.image(full_path, w=110, x=(pdf.w-110)/2)
pdf.ln(5)
i += 1
continue
# 6. Normal Text
if line:
pdf.set_fill_color(255, 255, 255)
pdf.set_font('Helvetica', '', 11)
pdf.set_text_color(*COLOR_TEXT_MAIN)
# List items
list_match = parse_list_item(line)
if list_match:
type_, content, extra = list_match
pdf.set_x(15)
prefix = "[x] " if extra else "[ ] " if type_ == 'cb' else ""
bullet = chr(149) + " " if type_ == 'ul' and not type_ == 'cb' else ""
if type_ == 'ol': bullet = f"{extra}. "
pdf.multi_cell(0, 6, safe_text(bullet + prefix + make_links_clickable(content)), markdown=True)
else:
pdf.set_x(10)
pdf.multi_cell(0, 6, safe_text(make_links_clickable(line)), markdown=True)
i += 1
pdf.output(pdf_file)
print(f"PDF Generated: {pdf_file}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python convert_to_pdf.py <input.md> [output.pdf]")
sys.exit(1)
md_in = sys.argv[1]
pdf_out = sys.argv[2] if len(sys.argv) >= 3 else os.path.splitext(md_in)[0] + ".pdf"
convert(md_in, pdf_out)