adobe-to-docusign-migrator/tests/test_security.py

"""
Tests for Phase 12: security — log sanitization and audit trail.
"""

import hashlib
import json
import logging

import pytest

from src.utils.log_sanitizer import (
    SanitizingFilter,
    install_sanitizing_filter,
    redact,
    redact_dict,
)


class TestRedact:
    def test_bearer_token_redacted(self):
        text = "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.abc.def"
        result = redact(text)
        assert "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9" not in result
        assert "[REDACTED]" in result

    def test_access_token_assignment_redacted(self):
        text = 'access_token: "super_secret_value_12345"'
        result = redact(text)
        assert "super_secret_value_12345" not in result
        assert "[REDACTED]" in result

    def test_password_redacted(self):
        text = "password=hunter2supersecure"
        result = redact(text)
        assert "hunter2supersecure" not in result

    def test_safe_text_unchanged(self):
        text = "Template migrated successfully: NDA v2"
        result = redact(text)
        assert result == text

    def test_long_base64_redacted(self):
        # Simulate a long PDF base64 payload being logged
        b64 = "A" * 600
        result = redact(b64)
        assert "A" * 100 not in result
        assert "[REDACTED]" in result

    def test_short_base64_not_redacted(self):
        # Short base64 (e.g. an ID) should not be redacted
        short_b64 = "dGVzdA=="  # "test" base64
        result = redact(short_b64)
        assert "dGVzdA" in result


class TestRedactDict:
    def test_token_key_redacted(self):
        d = {"access_token": "secret123", "name": "My Template"}
        result = redact_dict(d)
        assert result["access_token"] == "[REDACTED]"
        assert result["name"] == "My Template"

    def test_nested_dict_redacted(self):
        d = {"auth": {"token": "secret123", "user": "alice"}}
        result = redact_dict(d)
        assert result["auth"]["token"] == "[REDACTED]"
        assert result["auth"]["user"] == "alice"

    def test_document_base64_redacted(self):
        d = {"documentBase64": "A" * 200}
        result = redact_dict(d)
        assert result["documentBase64"] == "[REDACTED]"

    def test_list_of_dicts_redacted(self):
        d = {"items": [{"token": "abc123xyz", "id": "1"}]}
        result = redact_dict(d)
        assert result["items"][0]["token"] == "[REDACTED]"
        assert result["items"][0]["id"] == "1"

    def test_safe_dict_unchanged(self):
        d = {"template_name": "NDA", "status": "success", "count": 3}
        result = redact_dict(d)
        assert result == d


class TestSanitizingFilter:
    def test_filter_redacts_log_message(self):
        record = logging.LogRecord(
            name="test", level=logging.INFO,
            pathname="", lineno=0,
            msg="Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature",
            args=(), exc_info=None,
        )
        f = SanitizingFilter()
        f.filter(record)
        assert "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9" not in record.msg

    def test_filter_redacts_args(self):
        record = logging.LogRecord(
            name="test", level=logging.INFO,
            pathname="", lineno=0,
            msg="Token: %s",
            args=("access_token=supersecretvalue123456",),
            exc_info=None,
        )
        f = SanitizingFilter()
        f.filter(record)
        assert "supersecretvalue123456" not in str(record.args)

    def test_install_sanitizing_filter_idempotent(self):
        install_sanitizing_filter()
        install_sanitizing_filter()  # second call should not add duplicate
        root = logging.getLogger()
        sanitizing_filters = [f for f in root.filters if isinstance(f, SanitizingFilter)]
        assert len(sanitizing_filters) == 1
        # Clean up
        for f in sanitizing_filters:
            root.removeFilter(f)


class TestPdfChecksum:
    def test_checksum_matches_content(self):
        from src.services.mapping_service import adobe_folder_to_normalized
        from pathlib import Path

        downloads = Path(__file__).parent.parent / "downloads" / "David Tag Demo Form__CBJCHBCA"
        if not downloads.exists():
            pytest.skip("Downloads fixtures not present")

        norm, _ = adobe_folder_to_normalized(str(downloads))
        assert norm.documents, "Expected at least one document"

        doc = norm.documents[0]
        # Recompute checksum from source path to verify
        import base64
        pdf_bytes = Path(doc.source_path).read_bytes()
        expected_checksum = hashlib.sha256(pdf_bytes).hexdigest()
        assert doc.checksum_sha256 == expected_checksum