""" Tests for DLP Filter (Data Loss Prevention). Validates regex patterns for sensitive data redaction. """ import pytest from src.security.dlp_filter import DLPFilter, sanitize_text class TestDLPFilter: """Tests for DLP Filter functionality.""" @pytest.fixture def dlp(self): """Create a fresh DLP filter for each test.""" return DLPFilter() def test_password_redaction_simple(self, dlp): """Test simple password redaction.""" text = "My password is: secret123" result = dlp.sanitize(text) # The sensitive value should be removed assert "secret123" not in result # Some form of redaction should be present assert "REDACTED" in result or "password" in result.lower() def test_password_redaction_various_formats(self, dlp): """Test password redaction in various formats.""" cases = [ ("password=mypass123", "mypass123"), ("senha: minhasenha456", "minhasenha456"), ("pwd: abc123def", "abc123def"), ('secret="topsecret789"', "topsecret789"), ] for text, sensitive_value in cases: result = dlp.sanitize(text) # Original sensitive value should be gone assert sensitive_value not in result, f"Value '{sensitive_value}' still in result" def test_api_key_redaction(self, dlp): """Test API key redaction.""" text = "api_key=sk-proj-1234567890abcdefghij" result = dlp.sanitize(text) assert "sk-proj-1234567890abcdefghij" not in result assert "[REDACTED]" in result def test_cpf_redaction(self, dlp): """Test Brazilian CPF redaction.""" cases = [ "CPF: 123.456.789-00", "cpf=12345678900", "O CPF 123.456.789-00 está cadastrado", ] for text in cases: result = dlp.sanitize(text) assert "[CPF_REDACTED]" in result def test_cnpj_redaction(self, dlp): """Test Brazilian CNPJ redaction.""" cases = [ "CNPJ: 12.345.678/0001-90", "cnpj=12345678000190", ] for text in cases: result = dlp.sanitize(text) assert "[CNPJ_REDACTED]" in result def test_credit_card_redaction(self, dlp): """Test credit card number redaction.""" cases = [ "Card: 4111-1111-1111-1111", "Number: 4111 1111 1111 1111", "Cartão: 4111111111111111", ] for text in cases: result = dlp.sanitize(text) assert "[CARD_REDACTED]" in result def test_email_partial_redaction(self, dlp): """Test partial email redaction (keep domain).""" text = "Contact joao.silva@empresa.com.br for help" result = dlp.sanitize(text) assert "joao.silva" not in result assert "empresa.com.br" in result # Domain kept assert "[USER]@empresa.com.br" in result def test_private_key_redaction(self, dlp): """Test SSH private key redaction.""" text = """ -----BEGIN RSA PRIVATE KEY----- MIIEpAIBAAKCAQEA0Z... -----END RSA PRIVATE KEY----- """ result = dlp.sanitize(text) assert "MIIEpAIBAAKCAQEA0Z" not in result assert "[KEY_REDACTED]" in result def test_normal_text_unchanged(self, dlp): """Test that normal text is not modified.""" text = "Hello, this is a normal support message about server performance." result = dlp.sanitize(text) assert result == text def test_mixed_content(self, dlp): """Test text with both sensitive and normal content.""" text = """ Olá, preciso de ajuda com o servidor srv-app01. senha: admin123 O CPF do responsável é 123.456.789-00 Por favor, me ajudem! """ result = dlp.sanitize(text) # Normal content preserved assert "srv-app01" in result assert "Por favor" in result # Sensitive content redacted assert "admin123" not in result assert "[CPF_REDACTED]" in result def test_statistics_tracking(self, dlp): """Test that statistics are tracked correctly.""" dlp.sanitize("password=test123") dlp.sanitize("CPF: 123.456.789-00") dlp.sanitize("Normal text") stats = dlp.get_stats() assert stats["total_processed"] == 3 assert stats["total_redacted"] >= 2 def test_convenience_function(self): """Test the sanitize_text convenience function.""" result = sanitize_text("password=secret") assert "secret" not in result assert "[REDACTED]" in result def test_empty_input(self, dlp): """Test handling of empty input.""" assert dlp.sanitize("") == "" assert dlp.sanitize(None) is None