Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ A Python client library for Microsoft Dataverse that provides a unified interfac
- **📦 Batch Operations**: Send multiple CRUD, table metadata, and SQL query operations in a single HTTP request with optional transactional changesets
- **🔐 Azure Identity**: Built-in authentication using Azure Identity credential providers with comprehensive support
- **🛡️ Error Handling**: Structured exception hierarchy with detailed error context and retry guidance
- **📋 HTTP Diagnostics Logging**: Opt-in file-based logging of all HTTP requests and responses with automatic redaction of sensitive headers (e.g. `Authorization`)

## Getting started

Expand Down Expand Up @@ -672,6 +673,58 @@ For optimal performance in production environments:
| **Production Credentials** | Use `ClientSecretCredential` or `CertificateCredential` for unattended operations |
| **Error Handling** | Implement retry logic for transient errors (`e.is_transient`) |

### HTTP diagnostics logging

Enable file-based HTTP logging to capture all requests and responses for debugging. Sensitive headers (e.g. `Authorization`) are automatically redacted.

```python
from PowerPlatform.Dataverse.client import DataverseClient
from PowerPlatform.Dataverse.core.config import DataverseConfig
from PowerPlatform.Dataverse.core.log_config import LogConfig

log_cfg = LogConfig(
log_folder="./my_logs", # Directory for log files (created if missing)
log_file_prefix="crm_debug", # Filename prefix; timestamp appended automatically
max_body_bytes=4096, # Bytes of body to capture per entry — 0 (default) disables body capture
)
config = DataverseConfig(log_config=log_cfg)
client = DataverseClient("https://yourorg.crm.dynamics.com", credential, config=config)
```

Each log file is timestamped and rotated automatically (default 10 MB per file, 5 backups). Sample output:

```
[2026-04-11T15:27:31-0700] DEBUG >>> REQUEST POST https://yourorg.crm.dynamics.com/api/data/v9.2/accounts
Authorization: [REDACTED]
Accept: application/json
Content-Type: application/json
OData-MaxVersion: 4.0
OData-Version: 4.0
User-Agent: DataverseSvcPythonClient:0.1.0b8
x-ms-client-request-id: 7050c4d0-6bcc-48e3-a310-b4e8fa18ac69
x-ms-correlation-id: 4cace77d-e4ee-4419-8c65-fc62beed6e71
Body: {"name":"Contoso Ltd"}
[2026-04-11T15:27:31-0700] DEBUG <<< RESPONSE 204 POST https://yourorg.crm.dynamics.com/api/data/v9.2/accounts (78.0ms)
Content-Type: application/json; odata.metadata=minimal
OData-Version: 4.0
x-ms-service-request-id: a6d0b6c4-5dd1-47cb-83eb-b6fccf754216
x-ms-ratelimit-burst-remaining-xrm-requests: 7998
```

> **Security note:** This feature is intended for development and debugging only.
> Log files are **plaintext** and may contain PII, sensitive business data, and
> Dataverse record IDs — even with `max_body_bytes=0` (the default), request URLs
> can include filter values and record identifiers.
>
> - **Never enable in production.** If required for production diagnostics, keep
> `max_body_bytes=0` and treat log files as regulated data under your organization's
> data handling policy.
> - **Restrict access.** Set file system permissions so only the process user can
> read log files. Use an encrypted volume or folder in sensitive environments.
> - **Control retention.** Log rotation keeps up to 5 files by default (`backup_count`).
> Delete logs after the debugging session; use secure deletion for regulated data.
> - **Prevent source control leaks.** Add the log folder to `.gitignore` immediately.

### Limitations

- SQL queries are **read-only** and support a limited subset of SQL syntax
Expand Down
54 changes: 51 additions & 3 deletions src/PowerPlatform/Dataverse/core/_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@
from __future__ import annotations

import time
from typing import Any, Optional
from typing import TYPE_CHECKING, Any, Optional

import requests

if TYPE_CHECKING:
from ._http_logger import _HttpLogger


class _HttpClient:
"""
Expand All @@ -34,6 +37,9 @@ class _HttpClient:
When provided, all requests use this session (enabling TCP/TLS reuse).
When ``None``, each request uses ``requests.request()`` directly.
:type session: :class:`requests.Session` | None
:param logger: Optional HTTP diagnostics logger. When provided, all requests,
responses, and transport errors are logged with automatic header redaction.
:type logger: ~PowerPlatform.Dataverse.core._http_logger._HttpLogger | None
"""

def __init__(
Expand All @@ -42,11 +48,13 @@ def __init__(
backoff: Optional[float] = None,
timeout: Optional[float] = None,
session: Optional[requests.Session] = None,
logger: Optional["_HttpLogger"] = None,
) -> None:
self.max_attempts = retries if retries is not None else 5
self.base_delay = backoff if backoff is not None else 0.5
self.default_timeout: Optional[float] = timeout
self._session = session
self._logger = logger

def _request(self, method: str, url: str, **kwargs: Any) -> requests.Response:
"""
Expand All @@ -73,12 +81,52 @@ def _request(self, method: str, url: str, **kwargs: Any) -> requests.Response:
m = (method or "").lower()
kwargs["timeout"] = 120 if m in ("post", "delete") else 10

# Log outbound request once (before retry loop).
# Use explicit key presence checks so falsy values (e.g. {}) are logged correctly.
if self._logger is not None:
if "json" in kwargs:
req_body = kwargs["json"]
elif "data" in kwargs:
req_body = kwargs["data"]
else:
req_body = None
self._logger.log_request(
method,
url,
headers=kwargs.get("headers"),
body=req_body,
)

# Small backoff retry on network errors only
requester = self._session.request if self._session is not None else requests.request
for attempt in range(self.max_attempts):
try:
return requester(method, url, **kwargs)
except requests.exceptions.RequestException:
t0 = time.monotonic()
resp = requester(method, url, **kwargs)
elapsed_ms = (time.monotonic() - t0) * 1000

if self._logger is not None:
# Only decode resp.text when body logging is enabled — avoids
# unnecessary overhead for large payloads when max_body_bytes == 0.
resp_body = resp.text if self._logger.body_logging_enabled else None
self._logger.log_response(
method,
url,
status_code=resp.status_code,
headers=dict(resp.headers),
body=resp_body,
elapsed_ms=elapsed_ms,
)
return resp
except requests.exceptions.RequestException as exc:
if self._logger is not None:
self._logger.log_error(
method,
url,
exc,
attempt=attempt + 1,
max_attempts=self.max_attempts,
)
if attempt == self.max_attempts - 1:
raise
delay = self.base_delay * (2**attempt)
Expand Down
169 changes: 169 additions & 0 deletions src/PowerPlatform/Dataverse/core/_http_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
Internal HTTP logger that writes redacted request/response diagnostics to local files.
"""

from __future__ import annotations

import json as _json
import logging
import os
import uuid
from datetime import datetime, timezone
from logging.handlers import RotatingFileHandler
from typing import Any, Dict, Optional

from .log_config import LogConfig


class _HttpLogger:
"""Structured HTTP diagnostic logger with automatic header redaction."""

def __init__(self, config: LogConfig) -> None:
self._config = config
self._redacted = {h.lower() for h in config.redacted_headers}

# Ensure folder exists
os.makedirs(config.log_folder, exist_ok=True)

# Build timestamped filename — random suffix guarantees uniqueness even when
# multiple clients are created within the same microsecond (e.g. in tests).
ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
filename = f"{config.log_file_prefix}_{ts}_{uuid.uuid4().hex[:6]}.log"
filepath = os.path.join(config.log_folder, filename)

# Create a dedicated named logger (not root) to avoid side effects
logger_name = f"PowerPlatform.Dataverse.http.{uuid.uuid4().hex[:8]}"
self._logger = logging.getLogger(logger_name)
self._logger.setLevel(getattr(logging, config.log_level.upper(), logging.DEBUG))
self._logger.propagate = False # don't bubble to root

self._handler = RotatingFileHandler(
filepath,
maxBytes=config.max_file_bytes,
backupCount=config.backup_count,
encoding="utf-8",
)
formatter = logging.Formatter(
"[%(asctime)s] %(levelname)s %(message)s",
datefmt="%Y-%m-%dT%H:%M:%S%z",
)
self._handler.setFormatter(formatter)
self._logger.addHandler(self._handler)

# Write a one-time config summary so readers know what is and isn't captured.
body_status = (
f"{config.max_body_bytes} bytes"
if config.max_body_bytes > 0
else "0 (disabled — set max_body_bytes > 0 to enable)"
)
redacted = ", ".join(sorted(config.redacted_headers))
self._logger.debug(
"\n".join(
[
"=== Dataverse HTTP Diagnostics ===",
f" log_level: {config.log_level.upper()}",
f" max_body_bytes: {body_status}",
f" redacted_headers: {redacted}",
"==================================",
]
)
)

def log_request(
self,
method: str,
url: str,
headers: Optional[Dict[str, str]] = None,
body: Any = None,
) -> None:
"""Log an outbound HTTP request."""
safe_headers = self._redact_headers(headers or {})
body_text = self._truncate_body(body)
lines = [f">>> REQUEST {method.upper()} {url}"]
lines += [f" {k}: {v}" for k, v in safe_headers.items()]
if body_text:
lines.append(f" Body: {body_text}")
self._logger.debug("\n".join(lines))

def log_response(
self,
method: str,
url: str,
status_code: int,
headers: Optional[Dict[str, str]] = None,
body: Any = None,
elapsed_ms: Optional[float] = None,
) -> None:
"""Log an inbound HTTP response."""
safe_headers = self._redact_headers(headers or {})
body_text = self._truncate_body(body)
elapsed_str = f" ({elapsed_ms:.1f}ms)" if elapsed_ms is not None else ""
lines = [f"<<< RESPONSE {status_code} {method.upper()} {url}{elapsed_str}"]
lines += [f" {k}: {v}" for k, v in safe_headers.items()]
if body_text:
lines.append(f" Body: {body_text}")
elif not self.body_logging_enabled:
# Body capture is disabled. Emit an explicit marker when the response
# has a Content-Length > 0 so readers know content exists but was not captured.
raw_headers = headers or {}
content_length = raw_headers.get("Content-Length") or raw_headers.get("content-length", "0")
try:
has_body = int(content_length) > 0
except (ValueError, TypeError):
has_body = False
if has_body:
lines.append(" Body: [not captured — set max_body_bytes > 0 to enable]")
self._logger.debug("\n".join(lines))

def log_error(
self,
method: str,
url: str,
error: Exception,
attempt: Optional[int] = None,
max_attempts: Optional[int] = None,
) -> None:
"""Log an HTTP transport error."""
attempt_str = f" [attempt {attempt}/{max_attempts}]" if attempt is not None and max_attempts is not None else ""
self._logger.error(f"!!! ERROR {method.upper()} {url}{attempt_str} - {type(error).__name__}: {error}")

@property
def body_logging_enabled(self) -> bool:
"""Return True if body capture is enabled (max_body_bytes > 0)."""
return self._config.max_body_bytes != 0

def close(self) -> None:
"""Flush and close the underlying file handler. Safe to call multiple times."""
self._handler.flush()
self._handler.close()
self._logger.removeHandler(self._handler)

def _redact_headers(self, headers: Dict[str, str]) -> Dict[str, str]:
return {k: ("[REDACTED]" if k.lower() in self._redacted else v) for k, v in headers.items()}

def _truncate_body(self, body: Any) -> str:
if body is None:
return ""
if isinstance(body, (bytes, bytearray)):
text = body.decode("utf-8", errors="replace")
elif not isinstance(body, str):
try:
text = _json.dumps(body, default=str, ensure_ascii=False)
except (TypeError, ValueError):
text = str(body)
else:
text = body

limit = self._config.max_body_bytes
if limit == 0:
return ""
encoded = text.encode("utf-8")
if len(encoded) > limit:
# Truncate on byte boundary, then decode safely to avoid splitting
# multi-byte characters. Report the true byte length, not char count.
truncated = encoded[:limit].decode("utf-8", errors="ignore")
return truncated + f"... [truncated, {len(encoded)} bytes total]"
return text
12 changes: 11 additions & 1 deletion src/PowerPlatform/Dataverse/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Optional
from typing import TYPE_CHECKING, Optional

if TYPE_CHECKING:
from .log_config import LogConfig


@dataclass(frozen=True)
Expand All @@ -28,6 +31,10 @@ class DataverseConfig:
:type http_backoff: :class:`float` or None
:param http_timeout: Optional request timeout in seconds. Reserved for future use.
:type http_timeout: :class:`float` or None
:param log_config: Optional local HTTP diagnostics logging configuration.
When provided, all HTTP requests and responses are logged to timestamped
``.log`` files with automatic redaction of sensitive headers.
:type log_config: ~PowerPlatform.Dataverse.core.log_config.LogConfig or None
"""

language_code: int = 1033
Expand All @@ -37,6 +44,8 @@ class DataverseConfig:
http_backoff: Optional[float] = None
http_timeout: Optional[float] = None

log_config: Optional["LogConfig"] = None

@classmethod
def from_env(cls) -> "DataverseConfig":
"""
Expand All @@ -51,4 +60,5 @@ def from_env(cls) -> "DataverseConfig":
http_retries=None,
http_backoff=None,
http_timeout=None,
log_config=None,
)
Loading
Loading