155 lines
4.9 KiB
Python
155 lines
4.9 KiB
Python
"""
|
|
Tests for DLP Filter (Data Loss Prevention).
|
|
|
|
Validates regex patterns for sensitive data redaction.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from src.security.dlp_filter import DLPFilter, sanitize_text
|
|
|
|
|
|
class TestDLPFilter:
|
|
"""Tests for DLP Filter functionality."""
|
|
|
|
@pytest.fixture
|
|
def dlp(self):
|
|
"""Create a fresh DLP filter for each test."""
|
|
return DLPFilter()
|
|
|
|
def test_password_redaction_simple(self, dlp):
|
|
"""Test simple password redaction."""
|
|
text = "My password is: secret123"
|
|
result = dlp.sanitize(text)
|
|
|
|
# The sensitive value should be removed
|
|
assert "secret123" not in result
|
|
# Some form of redaction should be present
|
|
assert "REDACTED" in result or "password" in result.lower()
|
|
|
|
def test_password_redaction_various_formats(self, dlp):
|
|
"""Test password redaction in various formats."""
|
|
cases = [
|
|
("password=mypass123", "mypass123"),
|
|
("senha: minhasenha456", "minhasenha456"),
|
|
("pwd: abc123def", "abc123def"),
|
|
('secret="topsecret789"', "topsecret789"),
|
|
]
|
|
|
|
for text, sensitive_value in cases:
|
|
result = dlp.sanitize(text)
|
|
# Original sensitive value should be gone
|
|
assert sensitive_value not in result, f"Value '{sensitive_value}' still in result"
|
|
|
|
def test_api_key_redaction(self, dlp):
|
|
"""Test API key redaction."""
|
|
text = "api_key=sk-proj-1234567890abcdefghij"
|
|
result = dlp.sanitize(text)
|
|
|
|
assert "sk-proj-1234567890abcdefghij" not in result
|
|
assert "[REDACTED]" in result
|
|
|
|
def test_cpf_redaction(self, dlp):
|
|
"""Test Brazilian CPF redaction."""
|
|
cases = [
|
|
"CPF: 123.456.789-00",
|
|
"cpf=12345678900",
|
|
"O CPF 123.456.789-00 está cadastrado",
|
|
]
|
|
|
|
for text in cases:
|
|
result = dlp.sanitize(text)
|
|
assert "[CPF_REDACTED]" in result
|
|
|
|
def test_cnpj_redaction(self, dlp):
|
|
"""Test Brazilian CNPJ redaction."""
|
|
cases = [
|
|
"CNPJ: 12.345.678/0001-90",
|
|
"cnpj=12345678000190",
|
|
]
|
|
|
|
for text in cases:
|
|
result = dlp.sanitize(text)
|
|
assert "[CNPJ_REDACTED]" in result
|
|
|
|
def test_credit_card_redaction(self, dlp):
|
|
"""Test credit card number redaction."""
|
|
cases = [
|
|
"Card: 4111-1111-1111-1111",
|
|
"Number: 4111 1111 1111 1111",
|
|
"Cartão: 4111111111111111",
|
|
]
|
|
|
|
for text in cases:
|
|
result = dlp.sanitize(text)
|
|
assert "[CARD_REDACTED]" in result
|
|
|
|
def test_email_partial_redaction(self, dlp):
|
|
"""Test partial email redaction (keep domain)."""
|
|
text = "Contact joao.silva@empresa.com.br for help"
|
|
result = dlp.sanitize(text)
|
|
|
|
assert "joao.silva" not in result
|
|
assert "empresa.com.br" in result # Domain kept
|
|
assert "[USER]@empresa.com.br" in result
|
|
|
|
def test_private_key_redaction(self, dlp):
|
|
"""Test SSH private key redaction."""
|
|
text = """
|
|
-----BEGIN RSA PRIVATE KEY-----
|
|
MIIEpAIBAAKCAQEA0Z...
|
|
-----END RSA PRIVATE KEY-----
|
|
"""
|
|
result = dlp.sanitize(text)
|
|
|
|
assert "MIIEpAIBAAKCAQEA0Z" not in result
|
|
assert "[KEY_REDACTED]" in result
|
|
|
|
def test_normal_text_unchanged(self, dlp):
|
|
"""Test that normal text is not modified."""
|
|
text = "Hello, this is a normal support message about server performance."
|
|
result = dlp.sanitize(text)
|
|
|
|
assert result == text
|
|
|
|
def test_mixed_content(self, dlp):
|
|
"""Test text with both sensitive and normal content."""
|
|
text = """
|
|
Olá, preciso de ajuda com o servidor srv-app01.
|
|
senha: admin123
|
|
O CPF do responsável é 123.456.789-00
|
|
Por favor, me ajudem!
|
|
"""
|
|
result = dlp.sanitize(text)
|
|
|
|
# Normal content preserved
|
|
assert "srv-app01" in result
|
|
assert "Por favor" in result
|
|
|
|
# Sensitive content redacted
|
|
assert "admin123" not in result
|
|
assert "[CPF_REDACTED]" in result
|
|
|
|
def test_statistics_tracking(self, dlp):
|
|
"""Test that statistics are tracked correctly."""
|
|
dlp.sanitize("password=test123")
|
|
dlp.sanitize("CPF: 123.456.789-00")
|
|
dlp.sanitize("Normal text")
|
|
|
|
stats = dlp.get_stats()
|
|
|
|
assert stats["total_processed"] == 3
|
|
assert stats["total_redacted"] >= 2
|
|
|
|
def test_convenience_function(self):
|
|
"""Test the sanitize_text convenience function."""
|
|
result = sanitize_text("password=secret")
|
|
|
|
assert "secret" not in result
|
|
assert "[REDACTED]" in result
|
|
|
|
def test_empty_input(self, dlp):
|
|
"""Test handling of empty input."""
|
|
assert dlp.sanitize("") == ""
|
|
assert dlp.sanitize(None) is None
|