minions-ai-agents/tests/test_dlp.py

155 lines
4.9 KiB
Python

"""
Tests for DLP Filter (Data Loss Prevention).
Validates regex patterns for sensitive data redaction.
"""
import pytest
from src.security.dlp_filter import DLPFilter, sanitize_text
class TestDLPFilter:
"""Tests for DLP Filter functionality."""
@pytest.fixture
def dlp(self):
"""Create a fresh DLP filter for each test."""
return DLPFilter()
def test_password_redaction_simple(self, dlp):
"""Test simple password redaction."""
text = "My password is: secret123"
result = dlp.sanitize(text)
# The sensitive value should be removed
assert "secret123" not in result
# Some form of redaction should be present
assert "REDACTED" in result or "password" in result.lower()
def test_password_redaction_various_formats(self, dlp):
"""Test password redaction in various formats."""
cases = [
("password=mypass123", "mypass123"),
("senha: minhasenha456", "minhasenha456"),
("pwd: abc123def", "abc123def"),
('secret="topsecret789"', "topsecret789"),
]
for text, sensitive_value in cases:
result = dlp.sanitize(text)
# Original sensitive value should be gone
assert sensitive_value not in result, f"Value '{sensitive_value}' still in result"
def test_api_key_redaction(self, dlp):
"""Test API key redaction."""
text = "api_key=sk-proj-1234567890abcdefghij"
result = dlp.sanitize(text)
assert "sk-proj-1234567890abcdefghij" not in result
assert "[REDACTED]" in result
def test_cpf_redaction(self, dlp):
"""Test Brazilian CPF redaction."""
cases = [
"CPF: 123.456.789-00",
"cpf=12345678900",
"O CPF 123.456.789-00 está cadastrado",
]
for text in cases:
result = dlp.sanitize(text)
assert "[CPF_REDACTED]" in result
def test_cnpj_redaction(self, dlp):
"""Test Brazilian CNPJ redaction."""
cases = [
"CNPJ: 12.345.678/0001-90",
"cnpj=12345678000190",
]
for text in cases:
result = dlp.sanitize(text)
assert "[CNPJ_REDACTED]" in result
def test_credit_card_redaction(self, dlp):
"""Test credit card number redaction."""
cases = [
"Card: 4111-1111-1111-1111",
"Number: 4111 1111 1111 1111",
"Cartão: 4111111111111111",
]
for text in cases:
result = dlp.sanitize(text)
assert "[CARD_REDACTED]" in result
def test_email_partial_redaction(self, dlp):
"""Test partial email redaction (keep domain)."""
text = "Contact joao.silva@empresa.com.br for help"
result = dlp.sanitize(text)
assert "joao.silva" not in result
assert "empresa.com.br" in result # Domain kept
assert "[USER]@empresa.com.br" in result
def test_private_key_redaction(self, dlp):
"""Test SSH private key redaction."""
text = """
-----BEGIN RSA PRIVATE KEY-----
MIIEpAIBAAKCAQEA0Z...
-----END RSA PRIVATE KEY-----
"""
result = dlp.sanitize(text)
assert "MIIEpAIBAAKCAQEA0Z" not in result
assert "[KEY_REDACTED]" in result
def test_normal_text_unchanged(self, dlp):
"""Test that normal text is not modified."""
text = "Hello, this is a normal support message about server performance."
result = dlp.sanitize(text)
assert result == text
def test_mixed_content(self, dlp):
"""Test text with both sensitive and normal content."""
text = """
Olá, preciso de ajuda com o servidor srv-app01.
senha: admin123
O CPF do responsável é 123.456.789-00
Por favor, me ajudem!
"""
result = dlp.sanitize(text)
# Normal content preserved
assert "srv-app01" in result
assert "Por favor" in result
# Sensitive content redacted
assert "admin123" not in result
assert "[CPF_REDACTED]" in result
def test_statistics_tracking(self, dlp):
"""Test that statistics are tracked correctly."""
dlp.sanitize("password=test123")
dlp.sanitize("CPF: 123.456.789-00")
dlp.sanitize("Normal text")
stats = dlp.get_stats()
assert stats["total_processed"] == 3
assert stats["total_redacted"] >= 2
def test_convenience_function(self):
"""Test the sanitize_text convenience function."""
result = sanitize_text("password=secret")
assert "secret" not in result
assert "[REDACTED]" in result
def test_empty_input(self, dlp):
"""Test handling of empty input."""
assert dlp.sanitize("") == ""
assert dlp.sanitize(None) is None