store: Enable using large Bytes field

lutter · lutter · commit a5274d2045d0 · 2022-03-17T17:34:50.000-07:00
diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs
@@ -58,6 +58,7 @@ const DELETE_OPERATION_CHUNK_SIZE: usize = 1_000;
 /// This also makes sure that we do not put strings into a BTree index that's
 /// bigger than Postgres' limit on such strings which is about 2k
 pub const STRING_PREFIX_SIZE: usize = 256;
+pub const BYTE_ARRAY_PREFIX_SIZE: usize = 64;
 
 lazy_static! {
     /// Deprecated; use 'graphman stats account-like' instead. A list of
@@ -1163,7 +1164,9 @@ impl Column {
     /// lengths. Such columns may contain very large values and need to be
     /// handled specially for indexing
     pub fn has_arbitrary_size(&self) -> bool {
-        !self.is_primary_key() && !self.is_list() && self.column_type == ColumnType::String
+        !self.is_primary_key()
+            && !self.is_list()
+            && (self.column_type == ColumnType::String || self.column_type == ColumnType::Bytes)
     }
 
     pub fn is_assignable_from(&self, source: &Self, object: &EntityType) -> Option<String> {
@@ -1486,12 +1489,23 @@ impl Table {
                         ("gist", index_expr)
                     }
                 } else {
-                    // Attributes that are plain strings are indexed with a BTree; but
-                    // they can be too large for Postgres' limit on values that can go
-                    // into a BTree. For those attributes, only index the first
-                    // STRING_PREFIX_SIZE characters
+                    // Attributes that are plain strings or bytes are
+                    // indexed with a BTree; but they can be too large for
+                    // Postgres' limit on values that can go into a BTree.
+                    // For those attributes, only index the first
+                    // STRING_PREFIX_SIZE or BYTE_ARRAY_PREFIX_SIZE characters
                     let index_expr = if column.has_arbitrary_size() {
-                        format!("left({}, {})", column.name.quoted(), STRING_PREFIX_SIZE)
+                        match column.column_type {
+                            ColumnType::String => {
+                                format!("left({}, {})", column.name.quoted(), STRING_PREFIX_SIZE)
+                            }
+                            ColumnType::Bytes => format!(
+                                "substring({}, 1, {})",
+                                column.name.quoted(),
+                                BYTE_ARRAY_PREFIX_SIZE
+                            ),
+                            _ => unreachable!("only String and Bytes can have arbitrary size"),
+                        }
                     } else {
                         column.name.quoted()
                     };
@@ -1895,7 +1909,7 @@ create index attr_1_3_scalar_big_decimal
 create index attr_1_4_scalar_string
     on sgd0815.\"scalar\" using btree(left(\"string\", 256));
 create index attr_1_5_scalar_bytes
-    on sgd0815.\"scalar\" using btree(\"bytes\");
+    on sgd0815.\"scalar\" using btree(substring(\"bytes\", 1, 64));
 create index attr_1_6_scalar_big_int
     on sgd0815.\"scalar\" using btree(\"big_int\");
 create index attr_1_7_scalar_color
diff --git a/store/postgres/src/relational_queries.rs b/store/postgres/src/relational_queries.rs
@@ -32,7 +32,8 @@ use std::iter::FromIterator;
 use std::str::FromStr;
 
 use crate::relational::{
-    Column, ColumnType, IdType, Layout, SqlName, Table, PRIMARY_KEY_COLUMN, STRING_PREFIX_SIZE,
+    Column, ColumnType, IdType, Layout, SqlName, Table, BYTE_ARRAY_PREFIX_SIZE, PRIMARY_KEY_COLUMN,
+    STRING_PREFIX_SIZE,
 };
 use crate::sql_value::SqlValue;
 use crate::{
@@ -665,40 +666,110 @@ impl Comparison {
     }
 }
 
+enum PrefixType<'a> {
+    String(&'a Column),
+    Bytes(&'a Column),
+}
+
+impl<'a> PrefixType<'a> {
+    fn new(column: &'a Column) -> QueryResult<Self> {
+        match column.column_type {
+            ColumnType::String => Ok(PrefixType::String(column)),
+            ColumnType::Bytes => Ok(PrefixType::Bytes(column)),
+            _ => Err(constraint_violation!(
+                "cannot setup prefix comparison for column {} of type {}",
+                column.name(),
+                column.column_type().sql_type()
+            )),
+        }
+    }
+
+    /// Push the SQL expression for a prefix of values in our column. That
+    /// should be the same expression that we used when creating an index
+    /// for the column
+    fn push_column_prefix(&self, out: &mut AstPass<Pg>) -> QueryResult<()> {
+        match self {
+            PrefixType::String(column) => {
+                out.push_sql("left(");
+                out.push_identifier(column.name.as_str())?;
+                out.push_sql(", ");
+                out.push_sql(&STRING_PREFIX_SIZE.to_string());
+                out.push_sql(")");
+            }
+            PrefixType::Bytes(column) => {
+                out.push_sql("substring(");
+                out.push_identifier(column.name.as_str())?;
+                out.push_sql(", 1, ");
+                out.push_sql(&BYTE_ARRAY_PREFIX_SIZE.to_string());
+                out.push_sql(")");
+            }
+        }
+        Ok(())
+    }
+
+    fn is_large(&self, value: &Value) -> Result<bool, ()> {
+        match (self, value) {
+            (PrefixType::String(_), Value::String(s)) => Ok(s.len() > STRING_PREFIX_SIZE - 1),
+            (PrefixType::Bytes(_), Value::Bytes(b)) => Ok(b.len() > BYTE_ARRAY_PREFIX_SIZE - 1),
+            (PrefixType::Bytes(_), Value::String(s)) => {
+                let len = if s.starts_with("0x") {
+                    (s.len() - 2) / 2
+                } else {
+                    s.len() / 2
+                };
+                Ok(len > BYTE_ARRAY_PREFIX_SIZE - 1)
+            }
+            _ => Err(()),
+        }
+    }
+}
+
 /// Produce a comparison between the string column `column` and the string
 /// value `text` that makes it obvious to Postgres' optimizer that it can
 /// first consult the partial index on `left(column, STRING_PREFIX_SIZE)`
 /// instead of going straight to a sequential scan of the underlying table.
 /// We do this by writing the comparison `column op text` in a way that
 /// involves `left(column, STRING_PREFIX_SIZE)`
-#[derive(Constructor)]
 struct PrefixComparison<'a> {
     op: Comparison,
+    kind: PrefixType<'a>,
     column: &'a Column,
     text: &'a Value,
 }
 
 impl<'a> PrefixComparison<'a> {
-    fn push_column_prefix(column: &Column, mut out: AstPass<Pg>) -> QueryResult<()> {
-        out.push_sql("left(");
-        out.push_identifier(column.name.as_str())?;
-        out.push_sql(", ");
-        out.push_sql(&STRING_PREFIX_SIZE.to_string());
-        out.push_sql(")");
-        Ok(())
+    fn new(op: Comparison, column: &'a Column, text: &'a Value) -> QueryResult<Self> {
+        let kind = PrefixType::new(column)?;
+        Ok(Self {
+            op,
+            kind,
+            column,
+            text,
+        })
     }
 
     fn push_value_prefix(&self, mut out: AstPass<Pg>) -> QueryResult<()> {
-        out.push_sql("left(");
-        QueryValue(self.text, &self.column.column_type).walk_ast(out.reborrow())?;
-        out.push_sql(", ");
-        out.push_sql(&STRING_PREFIX_SIZE.to_string());
-        out.push_sql(")");
+        match self.kind {
+            PrefixType::String(column) => {
+                out.push_sql("left(");
+                QueryValue(self.text, &column.column_type).walk_ast(out.reborrow())?;
+                out.push_sql(", ");
+                out.push_sql(&STRING_PREFIX_SIZE.to_string());
+                out.push_sql(")");
+            }
+            PrefixType::Bytes(column) => {
+                out.push_sql("substring(");
+                QueryValue(self.text, &column.column_type).walk_ast(out.reborrow())?;
+                out.push_sql(", 1, ");
+                out.push_sql(&BYTE_ARRAY_PREFIX_SIZE.to_string());
+                out.push_sql(")");
+            }
+        }
         Ok(())
     }
 
     fn push_prefix_cmp(&self, op: Comparison, mut out: AstPass<Pg>) -> QueryResult<()> {
-        Self::push_column_prefix(self.column, out.reborrow())?;
+        self.kind.push_column_prefix(&mut out)?;
         out.push_sql(op.as_str());
         self.push_value_prefix(out.reborrow())
     }
@@ -749,18 +820,16 @@ impl<'a> QueryFragment<Pg> for PrefixComparison<'a> {
         //
         // For `op` either `<=` or `>=`, we can write (using '<=' as an example)
         //   uv <= st <=> u < s || u = s && uv <= st
-        let large = if let Value::String(s) = self.text {
-            // We need to check the entire string
-            s.len() > STRING_PREFIX_SIZE - 1
-        } else {
-            return Err(constraint_violation!(
+        let large = self.kind.is_large(&self.text).map_err(|()| {
+            constraint_violation!(
                 "column {} has type {} and can't be compared with the value `{}` using {}",
                 self.column.name(),
                 self.column.column_type().sql_type(),
                 self.text,
                 self.op.as_str()
-            ));
-        };
+            )
+        })?;
+
         match self.op {
             Equal => {
                 if large {
@@ -961,35 +1030,25 @@ impl<'a> QueryFilter<'a> {
     ) -> QueryResult<()> {
         let column = self.column(attribute);
 
-        if column.has_arbitrary_size() {
-            PrefixComparison::new(op, column, value).walk_ast(out.reborrow())?;
+        if matches!(value, Value::Null) {
+            // Deal with nulls first since they always need special
+            // treatment
+            out.push_identifier(column.name.as_str())?;
+            match op {
+                Comparison::Equal => out.push_sql(" is null"),
+                Comparison::NotEqual => out.push_sql(" is not null"),
+                _ => unreachable!("we only call equals with '=' or '!='"),
+            }
+        } else if column.has_arbitrary_size() {
+            PrefixComparison::new(op, column, value)?.walk_ast(out.reborrow())?;
         } else if column.is_fulltext() {
             out.push_identifier(column.name.as_str())?;
             out.push_sql(Comparison::Match.as_str());
             QueryValue(value, &column.column_type).walk_ast(out)?;
         } else {
             out.push_identifier(column.name.as_str())?;
-
-            match value {
-                Value::String(_)
-                | Value::BigInt(_)
-                | Value::Bool(_)
-                | Value::Bytes(_)
-                | Value::BigDecimal(_)
-                | Value::Int(_)
-                | Value::List(_) => {
-                    out.push_sql(op.as_str());
-                    QueryValue(value, &column.column_type).walk_ast(out)?;
-                }
-                Value::Null => {
-                    use Comparison as c;
-                    match op {
-                        c::Equal => out.push_sql(" is null"),
-                        c::NotEqual => out.push_sql(" is not null"),
-                        _ => unreachable!("we only call equals with '=' or '!='"),
-                    }
-                }
-            }
+            out.push_sql(op.as_str());
+            QueryValue(value, &column.column_type).walk_ast(out)?;
         }
         Ok(())
     }
@@ -1004,7 +1063,7 @@ impl<'a> QueryFilter<'a> {
         let column = self.column(attribute);
 
         if column.has_arbitrary_size() {
-            PrefixComparison::new(op, column, value).walk_ast(out.reborrow())?;
+            PrefixComparison::new(op, column, value)?.walk_ast(out.reborrow())?;
         } else {
             out.push_identifier(column.name.as_str())?;
             out.push_sql(op.as_str());
@@ -1086,7 +1145,7 @@ impl<'a> QueryFilter<'a> {
                 // Postgres' query optimizer
                 // See PrefixComparison for a more detailed discussion of what
                 // is happening here
-                PrefixComparison::push_column_prefix(column, out.reborrow())?;
+                PrefixType::new(column)?.push_column_prefix(&mut out)?;
             } else {
                 out.push_identifier(column.name.as_str())?;
             }
diff --git a/store/postgres/tests/store.rs b/store/postgres/tests/store.rs
@@ -1594,6 +1594,99 @@ fn handle_large_string_with_index() {
     })
 }
 
+#[test]
+fn handle_large_bytea_with_index() {
+    const NAME: &str = "bin_name";
+    const ONE: &str = "large_string_one";
+    const TWO: &str = "large_string_two";
+
+    fn make_insert_op(id: &str, name: &[u8]) -> EntityModification {
+        let mut data = Entity::new();
+        data.set("id", id);
+        data.set(NAME, scalar::Bytes::from(name));
+
+        let key = EntityKey::data(TEST_SUBGRAPH_ID.clone(), USER.to_owned(), id.to_owned());
+
+        EntityModification::Insert { key, data }
+    }
+
+    run_test(|store, writable, deployment| async move {
+        // We have to produce a massive bytea (240_000 bytes) because the
+        // repeated text compresses so well. This leads to an error 'index
+        // row size 2784 exceeds btree version 4 maximum 2704' if used with
+        // a btree index without size limitation
+        let long_bytea = std::iter::repeat("Quo usque tandem")
+            .take(15000)
+            .collect::<String>()
+            .into_bytes();
+        let other_bytea = {
+            let mut other_bytea = long_bytea.clone();
+            other_bytea.push(b'X');
+            scalar::Bytes::from(other_bytea.as_slice())
+        };
+        let long_bytea = scalar::Bytes::from(long_bytea.as_slice());
+
+        let metrics_registry = Arc::new(MockMetricsRegistry::new());
+        let stopwatch_metrics = StopwatchMetrics::new(
+            Logger::root(slog::Discard, o!()),
+            deployment.hash.clone(),
+            metrics_registry.clone(),
+        );
+
+        writable
+            .transact_block_operations(
+                TEST_BLOCK_3_PTR.clone(),
+                None,
+                vec![
+                    make_insert_op(ONE, &long_bytea),
+                    make_insert_op(TWO, &other_bytea),
+                ],
+                stopwatch_metrics,
+                Vec::new(),
+                Vec::new(),
+            )
+            .expect("Failed to insert large text");
+
+        let query = user_query()
+            .first(5)
+            .filter(EntityFilter::Equal(
+                NAME.to_owned(),
+                long_bytea.clone().into(),
+            ))
+            .asc(NAME);
+
+        let ids = store
+            .subgraph_store()
+            .find(query)
+            .expect("Could not find entity")
+            .iter()
+            .map(|e| e.id())
+            .collect::<Result<Vec<_>, _>>()
+            .expect("Found entities without an id");
+
+        assert_eq!(vec![ONE], ids);
+
+        // Make sure we check the full string and not just a prefix
+        let prefix = scalar::Bytes::from(&long_bytea.as_slice()[..64]);
+        let query = user_query()
+            .first(5)
+            .filter(EntityFilter::LessOrEqual(NAME.to_owned(), prefix.into()))
+            .asc(NAME);
+
+        let ids = store
+            .subgraph_store()
+            .find(query)
+            .expect("Could not find entity")
+            .iter()
+            .map(|e| e.id())
+            .collect::<Result<Vec<_>, _>>()
+            .expect("Found entities without an id");
+
+        // Users with name 'Cindini' and 'Johnton'
+        assert_eq!(vec!["2", "1"], ids);
+    })
+}
+
 #[derive(Clone)]
 struct WindowQuery(EntityQuery, Arc<DieselSubgraphStore>);