prestodb · tdcmeehan · Jun 17, 2025 · Jun 14, 2025
diff --git a/website/siteConfig.js b/website/siteConfig.js
@@ -168,6 +168,7 @@ const siteConfig = {
     "static/sphinx_immaterial_theme.1c495b8425a51fa92.min.css",
     "static/sphinx_immaterial_theme.9d9c7b7aac7cac9f3.min.css",
     "static/sphinx_immaterial_theme.8bc14be58eddc636f.min.css",
+    "static/sphinx_immaterial_theme.d8e0294d85080e1cf.min.css",
     "static/presto.css",
     "static/pygments.css",
     "static/fonts/material-icons.css",

diff --git a/...tatic/docs/0.293/_downloads/2521fcd06b5ed05da0190616e1b8af39/Presto_SQL_on_Everything.pdf b/...tatic/docs/0.293/_downloads/2521fcd06b5ed05da0190616e1b8af39/Presto_SQL_on_Everything.pdf
diff --git a/website/static/docs/0.293/_images/functions_color_bar.png b/website/static/docs/0.293/_images/functions_color_bar.png
diff --git a/website/static/docs/0.293/_images/materialized_shuffle_execution.png b/website/static/docs/0.293/_images/materialized_shuffle_execution.png
diff --git a/website/static/docs/0.293/_images/presto_console.png b/website/static/docs/0.293/_images/presto_console.png
diff --git a/website/static/docs/0.293/_images/query_details_json.png b/website/static/docs/0.293/_images/query_details_json.png
diff --git a/website/static/docs/0.293/_images/rpc_shuffle_execution.png b/website/static/docs/0.293/_images/rpc_shuffle_execution.png
diff --git a/website/static/docs/0.293/_images/serialized-page-array-column.png b/website/static/docs/0.293/_images/serialized-page-array-column.png
diff --git a/website/static/docs/0.293/_images/serialized-page-header.png b/website/static/docs/0.293/_images/serialized-page-header.png
diff --git a/website/static/docs/0.293/_images/serialized-page-int-array.png b/website/static/docs/0.293/_images/serialized-page-int-array.png
diff --git a/website/static/docs/0.293/_images/serialized-page-int-column.png b/website/static/docs/0.293/_images/serialized-page-int-column.png
diff --git a/website/static/docs/0.293/_images/serialized-page-layout.png b/website/static/docs/0.293/_images/serialized-page-layout.png
diff --git a/website/static/docs/0.293/_images/serialized-page-map-column.png b/website/static/docs/0.293/_images/serialized-page-map-column.png
diff --git a/website/static/docs/0.293/_images/serialized-page-nulls.png b/website/static/docs/0.293/_images/serialized-page-nulls.png
diff --git a/website/static/docs/0.293/_images/serialized-page-row-column.png b/website/static/docs/0.293/_images/serialized-page-row-column.png
diff --git a/website/static/docs/0.293/_images/serialized-page-string-column.png b/website/static/docs/0.293/_images/serialized-page-string-column.png
diff --git a/website/static/docs/0.293/_images/worker-protocol-output-buffers.png b/website/static/docs/0.293/_images/worker-protocol-output-buffers.png
diff --git a/website/static/docs/0.293/_images/worker-protocol-results.png b/website/static/docs/0.293/_images/worker-protocol-results.png
diff --git a/website/static/docs/0.293/_sources/admin.rst.txt b/website/static/docs/0.293/_sources/admin.rst.txt
@@ -0,0 +1,22 @@
+**************
+Administration
+**************
+
+.. toctree::
+    :maxdepth: 1
+
+    admin/web-interface
+    admin/tuning
+    admin/benchmark-driver
+    admin/properties
+    admin/properties-session
+    admin/spill
+    admin/exchange-materialization
+    admin/cte-materialization
+    admin/resource-groups
+    admin/session-property-managers
+    admin/function-namespace-managers
+    admin/dist-sort
+    admin/spark
+    admin/verifier
+    admin/grafana-cloud
diff --git a/website/static/docs/0.293/_sources/admin/benchmark-driver.rst.txt b/website/static/docs/0.293/_sources/admin/benchmark-driver.rst.txt
@@ -0,0 +1,113 @@
+================
+Benchmark Driver
+================
+
+The benchmark driver measures the performance of queries in a Presto cluster.
+It is used to continuously evaluate the performance of trunk.
+
+Installation
+------------
+
+Download :maven_download:`benchmark-driver`.
+
+Rename the JAR file to ``presto-benchmark-driver`` with the following command
+(replace ``*`` with the version number of the downloaded jar file):
+
+.. code-block:: none
+
+    mv presto-benchmark-driver-*-executable.jar presto-benchmark-driver
+
+Use ``chmod +x`` to make the renamed file executable:
+
+.. code-block:: none
+
+    chmod +x presto-benchmark-driver
+
+Suites
+------
+
+Create a ``suite.json`` file:
+
+.. code-block:: json
+
+    {
+        "file_formats": {
+            "query": ["single_.*", "tpch_.*"],
+            "schema": [ "tpch_sf(?<scale>.*)_(?<format>.*)_(?<compression>.*?)" ],
+            "session": {}
+        },
+        "legacy_orc": {
+            "query": ["single_.*", "tpch_.*"],
+            "schema": [ "tpch_sf(?<scale>.*)_(?<format>orc)_(?<compression>.*?)" ],
+            "session": {
+                "hive.optimized_reader_enabled": "false"
+            }
+        }
+    }
+
+This example contains two suites ``file_formats`` and ``legacy_orc``. The
+``file_formats`` suite will run queries with names matching the regular expression
+``single_.*`` or ``tpch_.*`` in all schemas matching the regular expression
+``tpch_sf.*_.*_.*?``. The ``legacy_orc`` suite adds a session property to
+disable the optimized ORC reader and only runs in the ``tpch_sf.*_orc_.*?``
+schema.
+
+Queries
+-------
+
+The SQL files are contained in a directory named ``sql`` and must have the
+``.sql`` file extension. The name of the query is the name of the file
+without the extension.
+
+Output
+------
+
+The benchmark driver will measure the wall time, total CPU time used by
+all Presto processes and the CPU time used by the query. For each timing, the
+driver reports median, mean and standard deviation of the query runs. The
+difference between process and query CPU times is the query overhead, which
+is normally from garbage collections. The following is the output from the
+``file_formats`` suite above:
+
+.. code-block:: none
+
+    suite        query          compression format scale wallTimeP50 wallTimeMean wallTimeStd processCpuTimeP50 processCpuTimeMean processCpuTimeStd queryCpuTimeP50 queryCpuTimeMean queryCpuTimeStd
+    ============ ============== =========== ====== ===== =========== ============ =========== ================= ================== ================= =============== ================ ===============
+    file_formats single_varchar none        orc    100   597         642          101         100840            97180              6373              98296           94610            6628
+    file_formats single_bigint  none        orc    100   238         242          12          33930             34050              697               32452           32417            460
+    file_formats single_varchar snappy      orc    100   530         525          14          99440             101320             7713              97317           99139            7682
+    file_formats single_bigint  snappy      orc    100   218         238          35          34650             34606              83                33198           33188            83
+    file_formats single_varchar zlib        orc    100   547         543          38          105680            103373             4038              103029          101021           3773
+    file_formats single_bigint  zlib        orc    100   282         269          23          38990             39030              282               37574           37496            156
+
+Note that the above output has been reformatted for readability from the
+standard TSV that the driver outputs.
+
+The driver can add additional columns to the output by extracting values from
+the schema name or SQL files. In the suite file above, the schema names
+contain named regular expression capturing groups for ``compression``,
+``format``, and ``scale``, so if we ran the queries in a catalog containing the
+schemas ``tpch_sf100_orc_none``, ``tpch_sf100_orc_snappy``, and
+``tpch_sf100_orc_zlib``, we get the above output.
+
+Another way to create additional output columns is by adding tags to the
+SQL files. For example, the following SQL file declares two tags,
+``projection`` and ``filter``:
+
+.. code-block:: none
+
+    projection=true
+    filter=false
+    =================
+    SELECT SUM(LENGTH(comment))
+    FROM lineitem
+
+This will cause the driver to output these values for each run of this query.
+
+CLI Arguments
+-------------
+
+The ``presto-benchmark-driver`` program contains many CLI arguments to control
+which suites and queries to run, the number of warm-up runs and the number
+of measurement runs. All of the command line arguments can be seen with the
+``--help`` option.
diff --git a/website/static/docs/0.293/_sources/admin/cte-materialization.rst.txt b/website/static/docs/0.293/_sources/admin/cte-materialization.rst.txt
@@ -0,0 +1,147 @@
+===================
+CTE Materialization
+===================
+
+Common Table Expressions (CTEs) are subqueries that appear in a WITH clause provided by the user.
+Their repeated usage in a query can lead to redundant computations, excessive data retrieval, and high resource consumption.
+
+To address this, Presto supports CTE Materialization allowing intermediate CTEs to be reused within the scope of the same query.
+Materializing CTEs can improve performance when the same CTE is used multiple times in a query by reducing recomputation of the CTE. However, there is also a cost to writing to and reading from disk, so the optimization may not be beneficial for very simple CTEs
+or CTEs that are not used many times in a query.
+
+Materialized CTEs are stored in temporary tables that are bucketed based on random hashing.
+To use this feature, the connector used by the query must support the creation of temporary tables. Currently, only the :doc:`/connector/hive` offers this capability.
+The QueryStats (com.facebook.presto.spi.eventlistener.QueryStatistics#writtenIntermediateBytes) expose a metric to the event listener to monitor the bytes written to intermediate storage by temporary tables.
+
+How to use CTE Materialization
+------------------------------
+
+The following configurations and session properties enable CTE materialization and modify its settings.
+
+``cte-materialization-strategy``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Allowed values:** ``ALL``, ``NONE``, ``HEURISTIC``, ``HEURISTIC_COMPLEX_QUERIES_ONLY``
+* **Default value:** ``NONE``
+
+Specifies the strategy for materializing Common Table Expressions (CTEs) in queries.
+
+``NONE`` - no CTEs will be materialized.
+
+``ALL``  - all CTEs in the query will be materialized.
+
+``HEURISTIC`` - greedily materializes the earliest parent CTE, which is repeated >= ``cte_heuristic_replication_threshold`` times.
+
+``HEURISTIC_COMPLEX_QUERIES_ONLY`` greedily materializes the earliest parent CTE which meets the ``HEURISTIC`` criteria and has a join or aggregate.
+
+Use the ``cte_materialization_strategy`` session property to set on a per-query basis.
+
+``cte-heuristic-replication-threshold``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``integer``
+* **Minimum value:** ``0``
+* **Default value:** ``4``
+
+When ``cte-materialization-strategy`` is set to ``HEURISTIC`` or ``HEURISTIC_COMPLEX_QUERIES_ONLY``, then CTEs will be materialized if they appear in a query at least ``cte-heuristic-replication-threshold`` number of times.
+
+Use the ``cte_heuristic_replication_threshold`` session property to set on a per-query basis.
+
+``query.cte-partitioning-provider-catalog``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Default value:** ``system``
+
+The name of the catalog that provides custom partitioning for CTE materialization.
+This setting specifies which catalog should be used for CTE materialization.
+
+Use the ``cte_partitioning_provider_catalog`` session property to set on a per-query basis.
+
+``cte-filter-and-projection-pushdown-enabled``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``boolean``
+* **Default value:** ``true``
+
+Flag to enable or disable the pushdown of common filters and projects into the materialized CTE.
+
+Use the ``cte_filter_and_projection_pushdown_enabled`` session property to set on a per-query basis.
+
+``hive.cte-virtual-bucket-count``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``integer``
+* **Default value:** ``128``
+
+The number of buckets to be used for materializing CTEs in queries.
+This setting determines how many buckets are used when materializing the CTEs, potentially affecting the performance of queries involving CTE materialization.
+A higher number of buckets might improve parallelism but also increases overhead in terms of memory and network communication.
+
+Recommended value: 4 - 10 times the size of the cluster.
+
+Use the ``hive.cte_virtual_bucket_count`` session property to set on a per-query basis.
+
+``hive.temporary-table-storage-format``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Allowed values:** ``PAGEFILE``, ``ORC``, ``DWRF``, ``ALPHA``, ``PARQUET``, ``AVRO``, ``RCBINARY``, ``RCTEXT``, ``SEQUENCEFILE``, ``JSON``, ``TEXTFILE``, ``CSV``
+* **Default value:** ``ORC``
+
+This setting determines the data format for temporary tables generated by CTE materialization. The recommended value is ``PAGEFILE`` :doc:`/develop/serialized-page`, as it is the most performant,
+since it avoids serialization and deserialization during reads and writes, allowing for direct storage of Presto pages.
+
+Use the ``hive.temporary_table_storage_format`` session property to set on a per-query basis.
+
+``hive.temporary-table-compression-codec``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Allowed values:** ``SNAPPY``, ``NONE``, ``GZIP``, ``LZ4``, ``ZSTD``
+* **Default value:** ``SNAPPY``
+
+This property defines the compression codec to be used for temporary tables generated by CTE materialization.
+
+Use the ``hive.temporary_table_compression_codec`` session property to set on a per-query basis.
+
+``hive.bucket-function-type-for-cte-materialization``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Allowed values:** ``HIVE_COMPATIBLE``, ``PRESTO_NATIVE``
+* **Default value:** ``PRESTO_NATIVE``
+
+This setting specifies the Hash function type for CTE materialization.
+
+Use the ``hive.bucket_function_type_for_cte_materialization`` session property to set on a per-query basis.
+
+``query.max-written-intermediate-bytes``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``DataSize``
+* **Default value:** ``2TB``
+
+This setting defines a cap on the amount of data that can be written during CTE Materialization. If a query exceeds this limit, it will fail.
+
+Use the ``query_max_written_intermediate_bytes`` session property to set on a per-query basis.
+
+``enhanced-cte-scheduling-enabled``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+    * **Type:** ``boolean``
+    * **Default value:** ``true``
+
+Flag to enable or disable the enhanced-cte-blocking during CTE Materialization. Enhanced CTE blocking restricts only the table scan stages of the CTE TableScan, rather than blocking entire plan sections, including the main query, until the query completes.
+This approach can improve latency in scenarios where parts of the query can execute concurrently with CTE materialization writes.
+
+Use the ``enhanced_cte_scheduling_enabled`` session property to set on a per-query basis.
+
+
+How to Participate in Development
+---------------------------------
+
+List of issues - (https://github.com/prestodb/presto/labels/cte_materialization)
+
+
diff --git a/website/static/docs/0.293/_sources/admin/dist-sort.rst.txt b/website/static/docs/0.293/_sources/admin/dist-sort.rst.txt
@@ -0,0 +1,17 @@
+================
+Distributed sort
+================
+
+Distributed sort allows to sort data which exceeds ``query.max-memory-per-node``.
+Distributed sort is enabled via ``distributed_sort`` session property or
+``distributed-sort`` configuration property set in
+``etc/config.properties`` of the coordinator. Distributed sort is enabled by
+default.
+
+When distributed sort is enabled, sort operator executes in parallel on multiple
+nodes in the cluster. Partially sorted data from each Presto worker node is then streamed
+to a single worker node for a final merge. This technique allows to utilize memory of multiple
+Presto worker nodes for sorting. The primary purpose of distributed sort is to allow for sorting
+of data sets which don't normally fit into single node memory. Performance improvement
+can be expected, but it won't scale linearly with the number of nodes since the
+data needs to be merged by a single node.
diff --git a/website/static/docs/0.293/_sources/admin/exchange-materialization.rst.txt b/website/static/docs/0.293/_sources/admin/exchange-materialization.rst.txt
@@ -0,0 +1,59 @@
+========================
+Exchange Materialization
+========================
+
+Presto allows exchange materialization to support memory intensive queries.
+This mechanism brings MapReduce-style execution to Presto's MPP architecture runtime,
+and can be applied together with :doc:`/admin/spill`.
+
+Introduction
+------------
+
+As with other MPP databases, Presto leverages RPC shuffle to achieve efficient and
+low-latency query execution for join and aggregation. However, RPC shuffle
+also requires all the producers and consumers to be executed concurrently until the
+query is finished.
+
+To illustrates this, consider the aggregation query:
+
+.. code-block:: sql
+
+    SELECT custkey, SUM(totalprice)
+    FROM orders
+    GROUP BY custkey
+
+
+The following figure demonstrates how this query executes in Presto classic mode:
+
+.. figure:: ../images/rpc_shuffle_execution.png
+   :align: center
+
+With exchange materialization, the intermediate shuffle data is written to disk (currently,
+it is always a temporary Hive bucketed table). This opens the opportunity for flexible scheduling policies
+on the aggregation side, as only a subset of aggregation data needs to be held in memory at the
+same time -- this execution strategy is called "grouped execution" in Presto.
+
+.. figure:: ../images/materialized_shuffle_execution.png
+   :align: center
+
+Using Exchange Materialization
+------------------------------
+
+Exchange materialization can be enabled on per-query basis by setting the following 3 session properties:
+``exchange_materialization_strategy``, ``partitioning_provider_catalog`` and ``hash_partition_count``:
+
+.. code-block:: sql
+
+    SET SESSION exchange_materialization_strategy='ALL';
+
+    -- Set partitioning_provider_catalog to the Hive connector catalog
+    SET SESSION partitioning_provider_catalog='hive';
+
+    -- We recommend setting hash_partition_count to be at least 5X-10X about the cluster size
+    -- when exchange materialization is enabled.
+    SET SESSION hash_partition_count = 4096;
+
+To make it easy for user to use exchange materialization, the admin can leverage :doc:`/admin/session-property-managers`
+to set the session properties automatically based on client tags. The example in :doc:`/admin/session-property-managers`
+demonstrates how to automatically enable exchange materialization for queries with ``high_mem_etl`` tag.
+