diff --git a/ddtrace/_trace/processor/__init__.py b/ddtrace/_trace/processor/__init__.py index a2b3922cedf..5f479997638 100644 --- a/ddtrace/_trace/processor/__init__.py +++ b/ddtrace/_trace/processor/__init__.py @@ -1,6 +1,7 @@ import abc from collections import defaultdict from itertools import chain +import os from os import environ from threading import RLock from typing import Dict @@ -164,7 +165,10 @@ def process_trace(self, trace: List[Span]) -> Optional[List[Span]]: # only trace sample if we haven't already sampled if root_ctx and root_ctx.sampling_priority is None: + log.info("sampling trace in process_trace %s", chunk_root._pprint()) self.sampler.sample(trace[0]) + else: + log.info("trace %s already sampled with context %s", chunk_root._pprint(), root_ctx) # When stats computation is enabled in the tracer then we can # safely drop the traces. if self._compute_stats_enabled and not self.apm_opt_out: @@ -307,6 +311,8 @@ def __init__( "spans_created": defaultdict(int), "spans_finished": defaultdict(int), } + # Cache debug span name to avoid calling os.getenv on every span finish + self._debug_span_name = os.getenv("_DD_TRACE_DEBUG_SPAN", "run_job") super(SpanAggregator, self).__init__() def __repr__(self) -> str: @@ -332,6 +338,9 @@ def on_span_start(self, span: Span) -> None: self._queue_span_count_metrics("spans_created", "integration_name") def on_span_finish(self, span: Span) -> None: + if self._debug_span_name == span.name: + log.info("finishing debug span in SpanAggregator %s", span._pprint()) + with self._lock: integration_name = span._meta.get(COMPONENT, span._span_api) self._span_metrics["spans_finished"][integration_name] += 1 @@ -342,7 +351,7 @@ def on_span_finish(self, span: Span) -> None: if span.trace_id not in self._traces: log_msg = "finished span not connected to a trace" telemetry.telemetry_writer.add_log(TELEMETRY_LOG_LEVEL.ERROR, log_msg) - log.debug("%s: %s", log_msg, span) + log.info("%s: %s", log_msg, span._pprint()) return trace = self._traces[span.trace_id] @@ -399,6 +408,8 @@ def on_span_finish(self, span: Span) -> None: if span.service: # report extra service name as it may have been set after the span creation by the customer config._add_extra_service(span.service) + if self._debug_span_name == span.name: + log.info("writing debug span in SpanAggregator %s", span._pprint()) self.writer.write(spans) return diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index 5c8cc270194..5405737bac0 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -632,8 +632,7 @@ def _on_span_finish(self, span: Span) -> None: dispatch("trace.span_finish", (span,)) - if log.isEnabledFor(logging.DEBUG): - log.debug("finishing span %s (enabled:%s)", span._pprint(), self.enabled) + log.info("finishing span %s (enabled:%s)", span._pprint(), self.enabled) def _log_compat(self, level, msg): """Logs a message for the given level. diff --git a/ddtrace/internal/writer/writer.py b/ddtrace/internal/writer/writer.py index 1f78a1e48e3..896489410a8 100644 --- a/ddtrace/internal/writer/writer.py +++ b/ddtrace/internal/writer/writer.py @@ -237,11 +237,11 @@ def _put(self, data: bytes, headers: Dict[str, str], client: WriterClientBase, n sw.start() with self._conn_lck: if self._conn is None: - log.debug("creating new intake connection to %s with timeout %d", self.intake_url, self._timeout) + log.info("creating new intake connection to %s with timeout %d", self.intake_url, self._timeout) self._conn = get_connection(self._intake_url(client), self._timeout) setattr(self._conn, _HTTPLIB_NO_TRACE_REQUEST, no_trace) try: - log.debug("Sending request: %s %s %s", self.HTTP_METHOD, client.ENDPOINT, headers) + log.info("Sending request: %s %s %s", self.HTTP_METHOD, client.ENDPOINT, headers) self._conn.request( self.HTTP_METHOD, client.ENDPOINT, @@ -249,13 +249,13 @@ def _put(self, data: bytes, headers: Dict[str, str], client: WriterClientBase, n headers, ) resp = self._conn.getresponse() - log.debug("Got response: %s %s", resp.status, resp.reason) + log.info("Got response: %s %s", resp.status, resp.reason) t = sw.elapsed() if t >= self.interval: log_level = logging.WARNING else: - log_level = logging.DEBUG - log.log(log_level, "sent %s in %.5fs to %s", _human_size(len(data)), t, self._intake_endpoint(client)) + log_level = logging.INFO + log.info(log_level, "sent %s in %.5fs to %s", _human_size(len(data)), t, self._intake_endpoint(client)) except Exception: # Always reset the connection when an exception occurs self._reset_connection() @@ -331,12 +331,13 @@ def _write_with_client(self, client, spans=None): self._metrics_dist("writer.accepted.traces") self._metrics["accepted_traces"] += 1 self._set_keep_rate(spans) - + encoding_successful = False try: client.encoder.put(spans) + encoding_successful = True except BufferItemTooLarge as e: payload_size = e.args[0] - log.warning( + log.error( "trace (%db) larger than payload buffer item limit (%db), dropping", payload_size, client.encoder.max_item_size, @@ -345,7 +346,7 @@ def _write_with_client(self, client, spans=None): self._metrics_dist("buffer.dropped.bytes", payload_size, tags=["reason:t_too_big"]) except BufferFull as e: payload_size = e.args[0] - log.warning( + log.error( "trace buffer (%s traces %db/%db) cannot fit trace of size %db, dropping (writer status: %s)", len(client.encoder), client.encoder.size, @@ -360,6 +361,9 @@ def _write_with_client(self, client, spans=None): else: self._metrics_dist("buffer.accepted.traces", 1) self._metrics_dist("buffer.accepted.spans", len(spans)) + if not encoding_successful: + failed_to_encode = ", ".join([span.name for span in spans]) + log.info("failed to encode spans: %s", failed_to_encode) def flush_queue(self, raise_exc: bool = False): try: diff --git a/ddtrace/settings/_config.py b/ddtrace/settings/_config.py index 38d04f1a301..e58abe64aba 100644 --- a/ddtrace/settings/_config.py +++ b/ddtrace/settings/_config.py @@ -478,7 +478,7 @@ def __init__(self): self._trace_writer_connection_reuse = _get_config( "DD_TRACE_WRITER_REUSE_CONNECTIONS", DEFAULT_REUSE_CONNECTIONS, asbool ) - self._trace_writer_log_err_payload = _get_config("_DD_TRACE_WRITER_LOG_ERROR_PAYLOADS", False, asbool) + self._trace_writer_log_err_payload = _get_config("_DD_TRACE_WRITER_LOG_ERROR_PAYLOADS", True, asbool) # TODO: Remove the configurations below. ddtrace.internal.agent.config should be used instead. self._trace_agent_url = _get_config("DD_TRACE_AGENT_URL")