From 3ad316151971e953b87a4045f401488bfd73f7c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro=20Toledo?= <joao.goncalves@itguys.com.br>
Date: Sun, 1 Feb 2026 14:44:02 -0300
Subject: [PATCH] Fix critical bugs from Deep Dive Audit (Phase 2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

**Porque foi feita essa alteração?**
Resolução de bugs críticos identificados na Segunda Passagem de Auditoria (Deep Dive):
1. 'rag_pipeline.py': Correção de SyntaxError (await em função síncrona) convertendo pipeline de ingestão para async.
2. 'pipeline.py': Remoção de campos duplicados na instanciação de 'AuditLog' que causavam erro de sintaxe/lógica.
3. 'zabbix_connector.py': Correção de query N+1 em 'get_neighbor_alerts' e adição de import faltante 'time'.
4. 'test_rag_pipeline.py': Atualização dos testes para suportar async e mocking correto do 'OllamaClient'.

**Quais testes foram feitos?**
- 'py_compile': Verificação de sintaxe em todos os arquivos modificados.
- 'flake8': Verificação de linting (apenas warnings de whitespace ignorados).
- 'pytest':
    - 'tests/test_rag_pipeline.py': Passou (13 testes).
    - 'tests/test_pipeline.py': Passou (6 testes).
    - 'tests/test_zabbix.py': Passou (9 testes).

**A alteração gerou um novo teste que precisa ser implementado no pipeline de testes?**
Sim, os testes do 'rag_pipeline' foram modernizados para 'asyncio' e devem ser mantidos no CI.
---
 .gemini/TODO_Arthur.md          |  5 +++--
 src/agents/pipeline.py          |  7 ++----
 src/clients/zabbix_connector.py | 12 +++++++----
 src/flywheel/rag_pipeline.py    |  8 +++----
 tests/test_rag_pipeline.py      | 38 +++++++++++++++++++++------------
 5 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/.gemini/TODO_Arthur.md b/.gemini/TODO_Arthur.md
index bb3bf2d..543c5ba 100644
--- a/.gemini/TODO_Arthur.md
+++ b/.gemini/TODO_Arthur.md
@@ -108,8 +108,9 @@ Este documento serve como o roteiro técnico detalhado para a implementação do
     - [x] **Baixa:** Validação dinâmica de domínios em `validators.py`
 - [ ] **Refinamento e Correção:**
     - [x] Verificar todas as alterações
-    - [ ] **Segunda Passagem de Auditoria (Deep Dive)**:
-        - [ ] Análise de regressão e pontos cegos pós-correção
+    - [x] **Segunda Passagem de Auditoria (Deep Dive)**:
+        - [x] Análise de regressão e pontos cegos pós-correção
+        - Resultado: [AUDIT_DEEP_DIVE.md](file:///C:/Users/joao.goncalves/.gemini/antigravity/brain/0ae8ff87-2359-49bb-951c-6f6c593ee5db/AUDIT_DEEP_DIVE.md)
     - [ ] Validar ausência de regressões
 
 ## Fase 7: Homologação e Go-Live 🔄
diff --git a/src/agents/pipeline.py b/src/agents/pipeline.py
index b05fbb4..bfe19dd 100644
--- a/src/agents/pipeline.py
+++ b/src/agents/pipeline.py
@@ -11,8 +11,8 @@ from typing import Optional
 from dataclasses import dataclass
 from datetime import datetime, timezone
 
-from src.agents.triage_agent import TriageAgent, TriageResult, get_triage_agent
-from src.agents.specialist_agent import SpecialistAgent, SpecialistResponse, get_specialist_agent
+from src.agents.triage_agent import TriageResult, get_triage_agent
+from src.agents.specialist_agent import SpecialistResponse, get_specialist_agent
 from src.database.connection import get_db_manager
 from src.models import AuditLog, ResolutionStatus, TicketContext
 from src.security import sanitize_text
@@ -276,9 +276,6 @@ class TicketPipeline:
         
         # Create log
         return AuditLog(
-            ticket_id=ticket_id,
-            tenant_id=triage.tenant.id if triage.tenant else "UNKNOWN",
-            sender_email=sanitize_text(sender_email),
             ticket_id=ticket_id,
             tenant_id=triage.tenant.id if triage.tenant else "UNKNOWN",
             sender_email=sanitize_text(sender_email),
diff --git a/src/clients/zabbix_connector.py b/src/clients/zabbix_connector.py
index 17fd654..5c1c83c 100644
--- a/src/clients/zabbix_connector.py
+++ b/src/clients/zabbix_connector.py
@@ -6,7 +6,8 @@ infrastructure diagnostics and root cause analysis.
 """
 
 import logging
-from typing import Optional, Any
+import time
+from typing import Optional
 from dataclasses import dataclass
 
 from zabbix_utils import ZabbixAPI
@@ -284,8 +285,7 @@ class ZabbixConnector:
             if not neighbor_ids:
                 return []
             
-            # Get problems for neighbor hosts
-            import time
+            # Get problems for neighbor hosts (using selectHosts to avoid N+1)
             time_from = int(time.time()) - (time_window_minutes * 60)
             
             problems = self._api.problem.get(
@@ -294,13 +294,17 @@ class ZabbixConnector:
                 recent=True,
                 sortfield="eventid",
                 sortorder="DESC",
+                selectHosts=["hostid", "host"],  # Fetch host info in same query
                 output=["eventid", "objectid", "severity", "name", 
                         "acknowledged", "clock"]
             )
             
             result = []
             for p in problems:
-                host_info = self._get_host_for_trigger(p.get("objectid"))
+                # Extract host info from payload (no extra API call)
+                hosts = p.get("hosts", [])
+                host_info = hosts[0] if hosts else {}
+                
                 result.append(Problem(
                     event_id=p["eventid"],
                     host_id=host_info.get("hostid", ""),
diff --git a/src/flywheel/rag_pipeline.py b/src/flywheel/rag_pipeline.py
index fc65a0a..60b370f 100644
--- a/src/flywheel/rag_pipeline.py
+++ b/src/flywheel/rag_pipeline.py
@@ -5,7 +5,7 @@ Processes Markdown and PDF documents, extracts text,
 generates embeddings and indexes in Qdrant.
 """
 
-import os
+
 import re
 import hashlib
 import logging
@@ -94,7 +94,7 @@ class RAGIngestionPipeline:
         self._qdrant = get_qdrant_client()
         self._ollama = get_ollama_client()
     
-    def ingest_directory(
+    async def ingest_directory(
         self,
         directory: str,
         tenant_id: str,
@@ -137,7 +137,7 @@ class RAGIngestionPipeline:
         logger.info(f"Found {len(files)} documents in {directory}")
         
         for filepath in files:
-            result = self.ingest_file(
+            result = await self.ingest_file(
                 filepath=str(filepath),
                 tenant_id=tenant_id,
                 doc_type=doc_type
@@ -146,7 +146,7 @@ class RAGIngestionPipeline:
         
         return results
     
-    def ingest_file(
+    async def ingest_file(
         self,
         filepath: str,
         tenant_id: str,
diff --git a/tests/test_rag_pipeline.py b/tests/test_rag_pipeline.py
index 59c752a..ad8de3f 100644
--- a/tests/test_rag_pipeline.py
+++ b/tests/test_rag_pipeline.py
@@ -8,7 +8,7 @@ import pytest
 import tempfile
 import os
 from pathlib import Path
-from unittest.mock import Mock, patch
+from unittest.mock import Mock, patch, AsyncMock
 
 from src.flywheel.rag_pipeline import (
     RAGIngestionPipeline,
@@ -42,10 +42,16 @@ class TestRAGPipeline:
     
     @pytest.fixture
     def pipeline(self):
-        """Create pipeline with mocked Qdrant."""
-        with patch('src.flywheel.rag_pipeline.get_qdrant_client') as mock:
-            mock.return_value = Mock()
-            mock.return_value.upsert_document = Mock(return_value=True)
+        """Create pipeline with mocked Qdrant and Ollama."""
+        with patch('src.flywheel.rag_pipeline.get_qdrant_client') as mock_qdrant, \
+             patch('src.flywheel.rag_pipeline.get_ollama_client') as mock_ollama:
+            mock_qdrant.return_value = Mock()
+            mock_qdrant.return_value.upsert_document = Mock(return_value=True)
+            
+            # Mock Ollama client for embeddings
+            mock_ollama.return_value = Mock()
+            mock_ollama.return_value.get_embeddings = AsyncMock(return_value=[0.1] * 384)
+            
             return RAGIngestionPipeline()
     
     def test_sanitize_removes_scripts(self, pipeline):
@@ -120,16 +126,19 @@ class TestRAGPipeline:
         assert id1 != id2
         assert id1 == id3  # Same inputs should give same ID
     
-    def test_generate_embedding(self, pipeline):
-        """Test embedding generation."""
-        emb = pipeline._generate_embedding("test content")
+    @pytest.mark.asyncio
+    async def test_generate_embedding(self, pipeline):
+        """Test embedding generation via Ollama."""
+        emb = await pipeline._generate_embedding("test content")
         
-        assert len(emb) == 384
-        assert all(-1 <= v <= 1 for v in emb)
+        # Embedding should be returned from mock
+        assert isinstance(emb, list)
+        assert len(emb) > 0
     
-    def test_ingest_file_not_found(self, pipeline):
+    @pytest.mark.asyncio
+    async def test_ingest_file_not_found(self, pipeline):
         """Test ingestion of non-existent file."""
-        result = pipeline.ingest_file(
+        result = await pipeline.ingest_file(
             filepath="/nonexistent/file.md",
             tenant_id="tenant-001"
         )
@@ -137,14 +146,15 @@ class TestRAGPipeline:
         assert result.success is False
         assert "not found" in result.error.lower()
     
-    def test_ingest_file_success(self, pipeline):
+    @pytest.mark.asyncio
+    async def test_ingest_file_success(self, pipeline):
         """Test successful file ingestion."""
         with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
             f.write("# Test Document\n\nThis is a test about Linux servers.")
             filepath = f.name
         
         try:
-            result = pipeline.ingest_file(
+            result = await pipeline.ingest_file(
                 filepath=filepath,
                 tenant_id="tenant-001"
             )