From 6b599f91453fbac67d9285708b3b814c6eed3c58 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 21 May 2024 11:27:47 -0500
Subject: [PATCH 1/3] perf: decrease the threshold in which we use the BQ
 Storage Read API

---
 google/cloud/bigquery/table.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index ad1253195..f3e3de31c 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -108,7 +108,10 @@
 
 # How many of the total rows need to be downloaded already for us to skip
 # calling the BQ Storage API?
-ALMOST_COMPLETELY_CACHED_RATIO = 0.333
+# In microbenchmarks, the first full page will be about 10 MB. If a page is
+# about 2 MB, then it's the same speed or faster to download all the results
+# using the BigQuery Storage Read API.
+ALMOST_COMPLETELY_CACHED_RATIO = 10 / (10 + 2)
 
 
 def _reference_getter(table):

From 3e53c4cd415483c8e93f255dc2b0a2a93f185313 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 21 May 2024 12:00:10 -0500
Subject: [PATCH 2/3] fix unit test

---
 tests/unit/test_table.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 099529f95..fcbba03aa 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -2307,9 +2307,17 @@ def test__is_almost_completely_cached_returns_true_with_some_rows_remaining(self
         rows = [
             {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
             {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+            {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
+            {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
+            {"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]},
+            {"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]},
+            {"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]},
+            {"f": [{"v": "Perry Masonry"}, {"v": "33"}]},
         ]
         first_page = {"pageToken": "next-page", "rows": rows}
-        iterator = self._make_one(first_page_response=first_page, total_rows=6)
+        iterator = self._make_one(
+            first_page_response=first_page, total_rows=len(rows) + 1
+        )
         self.assertTrue(iterator._is_almost_completely_cached())
 
     def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self):

From 54466434a47099cdde54f1b6015bb6a5a66d58d7 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 21 May 2024 14:41:54 -0500
Subject: [PATCH 3/3] update comment

---
 google/cloud/bigquery/table.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index f3e3de31c..6ebb0709a 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -108,10 +108,17 @@
 
 # How many of the total rows need to be downloaded already for us to skip
 # calling the BQ Storage API?
-# In microbenchmarks, the first full page will be about 10 MB. If a page is
-# about 2 MB, then it's the same speed or faster to download all the results
-# using the BigQuery Storage Read API.
-ALMOST_COMPLETELY_CACHED_RATIO = 10 / (10 + 2)
+#
+# In microbenchmarks on 2024-05-21, I (tswast@) measure that at about 2 MB of
+# remaining results, it's faster to use the BQ Storage Read API to download
+# the results than use jobs.getQueryResults. Since we don't have a good way to
+# know the remaining bytes, we estimate by remaining number of rows.
+#
+# Except when rows themselves are larger, I observe that the a single page of
+# results will be around 10 MB. Therefore, the proportion of rows already
+# downloaded should be 10 (first page) / 12 (all results) or less for it to be
+# worth it to make a call to jobs.getQueryResults.
+ALMOST_COMPLETELY_CACHED_RATIO = 0.833333
 
 
 def _reference_getter(table):