diff --git a/.bazelrc b/.bazelrc index ad3ca15d1..2521e741d 100644 --- a/.bazelrc +++ b/.bazelrc @@ -1,5 +1,18 @@ build --cxxopt=-std=c++17 +build --cxxopt=-fsized-deallocation # Enable matchers in googletest build --define absl=1 +build:asan --linkopt -ldl +build:asan --linkopt -fsanitize=address +build:asan --copt -fsanitize=address +build:asan --copt -DADDRESS_SANITIZER=1 +build:asan --copt -D__SANITIZE_ADDRESS__ +build:asan --test_env=ASAN_OPTIONS=handle_abort=1:allow_addr2line=true:check_initialization_order=true:strict_init_order=true:detect_odr_violation=1 +build:asan --test_env=ASAN_SYMBOLIZER_PATH +build:asan --copt -O1 +build:asan --copt -fno-optimize-sibling-calls +build:asan --linkopt=-fuse-ld=lld + + diff --git a/.bazelversion b/.bazelversion index 4a36342fc..0062ac971 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -3.0.0 +5.0.0 diff --git a/Dockerfile b/Dockerfile index 2561f3a82..eeae61607 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,17 @@ -FROM ubuntu:bionic +FROM gcr.io/gcp-runtimes/ubuntu_20_0_4 ENV DEBIAN_FRONTEND=noninteractive RUN rm -rf /var/lib/apt/lists/* \ && apt-get update --fix-missing -qq \ - && apt-get install -qqy --no-install-recommends ca-certificates tzdata wget git clang-10 patch \ + && apt-get install -qqy --no-install-recommends build-essential ca-certificates tzdata wget git default-jdk clang-12 lld-12 patch \ && apt-get clean && rm -rf /var/lib/apt/lists/* RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.5.0/bazelisk-linux-amd64 && chmod +x bazelisk-linux-amd64 && mv bazelisk-linux-amd64 /bin/bazel -ENV CC=clang-10 -ENV CXX=clang++-10 +ENV CC=clang-12 +ENV CXX=clang++-12 + +RUN mkdir -p /workspace ENTRYPOINT ["/bin/bazel"] diff --git a/base/BUILD b/base/BUILD index 7554034cf..7a547dd68 100644 --- a/base/BUILD +++ b/base/BUILD @@ -1,17 +1,277 @@ -licenses(["notice"]) # Apache v2.0 +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -package(default_visibility = ["//visibility:public"]) +package( + # Under active development, not yet being released. + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) + +cc_library( + name = "handle", + hdrs = ["handle.h"], + deps = [ + "//base/internal:handle", + "//internal:casts", + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_library( + name = "kind", + srcs = ["kind.cc"], + hdrs = ["kind.h"], + deps = [ + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "kind_test", + srcs = ["kind_test.cc"], + deps = [ + ":kind", + "//internal:testing", + ], +) + +cc_library( + name = "memory_manager", + srcs = ["memory_manager.cc"], + hdrs = ["memory_manager.h"], + deps = [ + "//base/internal:memory_manager", + "//internal:no_destructor", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:config", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", + "@com_google_absl//absl/numeric:bits", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:optional", + ], +) + +cc_test( + name = "memory_manager_test", + srcs = ["memory_manager_test.cc"], + deps = [ + ":memory_manager", + "//internal:testing", + ], +) cc_library( - name = "unilib", + name = "operators", + srcs = ["operators.cc"], + hdrs = ["operators.h"], + deps = [ + "//base/internal:operators", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "operators_test", + srcs = ["operators_test.cc"], + deps = [ + ":operators", + "//internal:testing", + "@com_google_absl//absl/hash:hash_testing", + "@com_google_absl//absl/status", + ], +) + +cc_library( + name = "type", srcs = [ - "unilib.cc", + "type.cc", + "type_factory.cc", + "type_manager.cc", + "type_provider.cc", ], hdrs = [ - "unilib.h", + "type.h", + "type_factory.h", + "type_manager.h", + "type_provider.h", + "type_registry.h", ], deps = [ - "@com_github_google_flatbuffers//:flatbuffers", + ":handle", + ":kind", + ":memory_manager", + "//base/internal:type", + "//internal:casts", + "//internal:no_destructor", + "//internal:rtti", + "//internal:status_macros", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:span", + "@com_google_absl//absl/types:variant", + ], +) + +cc_test( + name = "type_test", + srcs = [ + "type_factory_test.cc", + "type_test.cc", + ], + deps = [ + ":handle", + ":memory_manager", + ":type", + ":value", + "//base/internal:memory_manager_testing", + "//internal:testing", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/hash:hash_testing", + "@com_google_absl//absl/status", + ], +) + +cc_library( + name = "value", + srcs = [ + "value.cc", + "value_factory.cc", + ], + hdrs = [ + "value.h", + "value_factory.h", + ], + deps = [ + ":handle", + ":kind", + ":memory_manager", + ":type", + "//base/internal:value", + "//internal:casts", + "//internal:no_destructor", + "//internal:rtti", + "//internal:status_macros", + "//internal:strings", + "//internal:time", + "//internal:utf8", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:btree", + "@com_google_absl//absl/container:inlined_vector", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:cord", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:variant", + ], +) + +cc_test( + name = "value_test", + srcs = [ + "value_factory_test.cc", + "value_test.cc", + ], + deps = [ + ":memory_manager", + ":type", + ":value", + "//base/internal:memory_manager_testing", + "//internal:strings", + "//internal:testing", + "//internal:time", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/hash:hash_testing", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + ], +) + +cc_library( + name = "ast", + hdrs = [ + "ast.h", + ], + deps = [ + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:variant", + ], +) + +cc_test( + name = "ast_test", + srcs = [ + "ast_test.cc", + ], + deps = [ + ":ast", + "//internal:testing", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:variant", + ], +) + +cc_library( + name = "ast_utility", + srcs = ["ast_utility.cc"], + hdrs = ["ast_utility.h"], + deps = [ + ":ast", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/time", + "@com_google_googleapis//google/api/expr/v1alpha1:checked_cc_proto", + "@com_google_googleapis//google/api/expr/v1alpha1:syntax_cc_proto", + "@com_google_protobuf//:protobuf", + ], +) + +cc_test( + name = "ast_utility_test", + srcs = [ + "ast_utility_test.cc", + ], + deps = [ + ":ast", + ":ast_utility", + "//internal:testing", + "@com_google_absl//absl/status", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:variant", + "@com_google_googleapis//google/api/expr/v1alpha1:checked_cc_proto", + "@com_google_googleapis//google/api/expr/v1alpha1:syntax_cc_proto", + "@com_google_protobuf//:protobuf", ], ) diff --git a/base/README b/base/README deleted file mode 100644 index 26c974c82..000000000 --- a/base/README +++ /dev/null @@ -1,5 +0,0 @@ -This directory contains forked copies of google libraries not already available -in open source. Generally, these libraries should always be considered -'internal' and subject to change without notice. - -The original copy is located in https://github.com/google/zetasql/tree/master/zetasql/base diff --git a/base/ast.h b/base/ast.h new file mode 100644 index 000000000..a4fcc34ac --- /dev/null +++ b/base/ast.h @@ -0,0 +1,1008 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CEL_CPP_BASE_AST_H_ +#define THIRD_PARTY_CEL_CPP_BASE_AST_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/macros.h" +#include "absl/container/flat_hash_map.h" +#include "absl/time/time.h" +#include "absl/types/variant.h" +namespace cel::ast::internal { + +enum class NullValue { kNullValue = 0 }; + +// Represents a primitive literal. +// +// This is similar as the primitives supported in the well-known type +// `google.protobuf.Value`, but richer so it can represent CEL's full range of +// primitives. +// +// Lists and structs are not included as constants as these aggregate types may +// contain [Expr][] elements which require evaluation and are thus not constant. +// +// Examples of constants include: `"hello"`, `b'bytes'`, `1u`, `4.2`, `-2`, +// `true`, `null`. +// +// (-- +// TODO(issues/5): Extend or replace the constant with a canonical Value +// message that can hold any constant object representation supplied or +// produced at evaluation time. +// --) +using Constant = absl::variant; + +class Expr; + +// An identifier expression. e.g. `request`. +class Ident { + public: + explicit Ident(std::string name) : name_(std::move(name)) {} + + void set_name(std::string name) { name_ = std::move(name); } + + const std::string& name() const { return name_; } + + private: + // Required. Holds a single, unqualified identifier, possibly preceded by a + // '.'. + // + // Qualified names are represented by the [Expr.Select][] expression. + std::string name_; +}; + +// A field selection expression. e.g. `request.auth`. +class Select { + public: + Select() {} + Select(std::unique_ptr operand, std::string field, + bool test_only = false) + : operand_(std::move(operand)), + field_(std::move(field)), + test_only_(test_only) {} + + void set_operand(std::unique_ptr operand) { + operand_ = std::move(operand); + } + + void set_field(std::string field) { field_ = std::move(field); } + + void set_test_only(bool test_only) { test_only_ = test_only; } + + const Expr* operand() const { return operand_.get(); } + + Expr& mutable_operand() { + if (operand_ == nullptr) { + operand_ = std::make_unique(); + } + return *operand_; + } + + const std::string& field() const { return field_; } + + bool test_only() const { return test_only_; } + + private: + // Required. The target of the selection expression. + // + // For example, in the select expression `request.auth`, the `request` + // portion of the expression is the `operand`. + std::unique_ptr operand_; + // Required. The name of the field to select. + // + // For example, in the select expression `request.auth`, the `auth` portion + // of the expression would be the `field`. + std::string field_; + // Whether the select is to be interpreted as a field presence test. + // + // This results from the macro `has(request.auth)`. + bool test_only_; +}; + +// A call expression, including calls to predefined functions and operators. +// +// For example, `value == 10`, `size(map_value)`. +// (-- TODO(issues/5): Convert built-in globals to instance methods --) +class Call { + public: + Call() {} + Call(std::unique_ptr target, std::string function, + std::vector args) + : target_(std::move(target)), + function_(std::move(function)), + args_(std::move(args)) {} + + void set_target(std::unique_ptr target) { target_ = std::move(target); } + + void set_function(std::string function) { function_ = std::move(function); } + + void set_args(std::vector args) { args_ = std::move(args); } + + const Expr* target() const { return target_.get(); } + + Expr& mutable_target() { + if (target_ == nullptr) { + target_ = std::make_unique(); + } + return *target_; + } + + const std::string& function() const { return function_; } + + const std::vector& args() const { return args_; } + + std::vector& mutable_args() { return args_; } + + private: + // The target of an method call-style expression. For example, `x` in + // `x.f()`. + std::unique_ptr target_; + // Required. The name of the function or method being called. + std::string function_; + // The arguments. + std::vector args_; +}; + +// A list creation expression. +// +// Lists may either be homogenous, e.g. `[1, 2, 3]`, or heterogeneous, e.g. +// `dyn([1, 'hello', 2.0])` +// (-- +// TODO(issues/5): Determine how to disable heterogeneous types as a feature +// of type-checking rather than through the language construct 'dyn'. +// --) +class CreateList { + public: + CreateList() {} + explicit CreateList(std::vector elements) + : elements_(std::move(elements)) {} + + void set_elements(std::vector elements) { + elements_ = std::move(elements); + } + + const std::vector& elements() const { return elements_; } + + std::vector& mutable_elements() { return elements_; } + + private: + // The elements part of the list. + std::vector elements_; +}; + +// A map or message creation expression. +// +// Maps are constructed as `{'key_name': 'value'}`. Message construction is +// similar, but prefixed with a type name and composed of field ids: +// `types.MyType{field_id: 'value'}`. +class CreateStruct { + public: + // Represents an entry. + class Entry { + public: + using KeyKind = absl::variant>; + Entry() {} + Entry(int64_t id, KeyKind key_kind, std::unique_ptr value) + : id_(id), key_kind_(std::move(key_kind)), value_(std::move(value)) {} + + void set_id(int64_t id) { id_ = id; } + + void set_key_kind(KeyKind key_kind) { key_kind_ = std::move(key_kind); } + + void set_value(std::unique_ptr value) { value_ = std::move(value); } + + int64_t id() const { return id_; } + + const KeyKind& key_kind() const { return key_kind_; } + + KeyKind& mutable_key_kind() { return key_kind_; } + + const Expr* value() const { return value_.get(); } + + Expr& mutable_value() { + if (value_ == nullptr) { + value_ = std::make_unique(); + } + return *value_; + } + + private: + // Required. An id assigned to this node by the parser which is unique + // in a given expression tree. This is used to associate type + // information and other attributes to the node. + int64_t id_; + // The `Entry` key kinds. + KeyKind key_kind_; + // Required. The value assigned to the key. + std::unique_ptr value_; + }; + + CreateStruct() {} + CreateStruct(std::string message_name, std::vector entries) + : message_name_(std::move(message_name)), entries_(std::move(entries)) {} + + void set_message_name(std::string message_name) { + message_name_ = std::move(message_name); + } + + void set_entries(std::vector entries) { + entries_ = std::move(entries); + } + + const std::vector& entries() const { return entries_; } + + std::vector& mutable_entries() { return entries_; } + + private: + // The type name of the message to be created, empty when creating map + // literals. + std::string message_name_; + // The entries in the creation expression. + std::vector entries_; +}; + +// A comprehension expression applied to a list or map. +// +// Comprehensions are not part of the core syntax, but enabled with macros. +// A macro matches a specific call signature within a parsed AST and replaces +// the call with an alternate AST block. Macro expansion happens at parse +// time. +// +// The following macros are supported within CEL: +// +// Aggregate type macros may be applied to all elements in a list or all keys +// in a map: +// +// * `all`, `exists`, `exists_one` - test a predicate expression against +// the inputs and return `true` if the predicate is satisfied for all, +// any, or only one value `list.all(x, x < 10)`. +// * `filter` - test a predicate expression against the inputs and return +// the subset of elements which satisfy the predicate: +// `payments.filter(p, p > 1000)`. +// * `map` - apply an expression to all elements in the input and return the +// output aggregate type: `[1, 2, 3].map(i, i * i)`. +// +// The `has(m.x)` macro tests whether the property `x` is present in struct +// `m`. The semantics of this macro depend on the type of `m`. For proto2 +// messages `has(m.x)` is defined as 'defined, but not set`. For proto3, the +// macro tests whether the property is set to its default. For map and struct +// types, the macro tests whether the property `x` is defined on `m`. +// +// Comprehension evaluation can be best visualized as the following +// pseudocode: +// +// ``` +// let `accu_var` = `accu_init` +// for (let `iter_var` in `iter_range`) { +// if (!`loop_condition`) { +// break +// } +// `accu_var` = `loop_step` +// } +// return `result` +// ``` +// +// (-- +// TODO(issues/5): ensure comprehensions work equally well on maps and +// messages. +// --) +class Comprehension { + public: + Comprehension() {} + Comprehension(std::string iter_var, std::unique_ptr iter_range, + std::string accu_var, std::unique_ptr accu_init, + std::unique_ptr loop_condition, + std::unique_ptr loop_step, std::unique_ptr result) + : iter_var_(std::move(iter_var)), + iter_range_(std::move(iter_range)), + accu_var_(std::move(accu_var)), + accu_init_(std::move(accu_init)), + loop_condition_(std::move(loop_condition)), + loop_step_(std::move(loop_step)), + result_(std::move(result)) {} + + void set_iter_var(std::string iter_var) { iter_var_ = std::move(iter_var); } + + void set_iter_range(std::unique_ptr iter_range) { + iter_range_ = std::move(iter_range); + } + + void set_accu_var(std::string accu_var) { accu_var_ = std::move(accu_var); } + + void set_accu_init(std::unique_ptr accu_init) { + accu_init_ = std::move(accu_init); + } + + void set_loop_condition(std::unique_ptr loop_condition) { + loop_condition_ = std::move(loop_condition); + } + + void set_loop_step(std::unique_ptr loop_step) { + loop_step_ = std::move(loop_step); + } + + void set_result(std::unique_ptr result) { result_ = std::move(result); } + + const std::string& iter_var() const { return iter_var_; } + + const Expr* iter_range() const { return iter_range_.get(); } + + Expr& mutable_iter_range() { + if (iter_range_ == nullptr) { + iter_range_ = std::make_unique(); + } + return *iter_range_; + } + + const std::string& accu_var() const { return accu_var_; } + + const Expr* accu_init() const { return accu_init_.get(); } + + Expr& mutable_accu_init() { + if (accu_init_ == nullptr) { + accu_init_ = std::make_unique(); + } + return *accu_init_; + } + + const Expr* loop_condition() const { return loop_condition_.get(); } + + Expr& mutable_loop_condition() { + if (loop_condition_ == nullptr) { + loop_condition_ = std::make_unique(); + } + return *loop_condition_; + } + + const Expr* loop_step() const { return loop_step_.get(); } + + Expr& mutable_loop_step() { + if (loop_step_ == nullptr) { + loop_step_ = std::make_unique(); + } + return *loop_step_; + } + + const Expr* result() const { return result_.get(); } + + Expr& mutable_result() { + if (result_ == nullptr) { + result_ = std::make_unique(); + } + return *result_; + } + + private: + // The name of the iteration variable. + std::string iter_var_; + + // The range over which var iterates. + std::unique_ptr iter_range_; + + // The name of the variable used for accumulation of the result. + std::string accu_var_; + + // The initial value of the accumulator. + std::unique_ptr accu_init_; + + // An expression which can contain iter_var and accu_var. + // + // Returns false when the result has been computed and may be used as + // a hint to short-circuit the remainder of the comprehension. + std::unique_ptr loop_condition_; + + // An expression which can contain iter_var and accu_var. + // + // Computes the next value of accu_var. + std::unique_ptr loop_step_; + + // An expression which can contain accu_var. + // + // Computes the result. + std::unique_ptr result_; +}; + +using ExprKind = absl::variant; + +// Analogous to google::api::expr::v1alpha1::Expr +// An abstract representation of a common expression. +// +// Expressions are abstractly represented as a collection of identifiers, +// select statements, function calls, literals, and comprehensions. All +// operators with the exception of the '.' operator are modelled as function +// calls. This makes it easy to represent new operators into the existing AST. +// +// All references within expressions must resolve to a [Decl][] provided at +// type-check for an expression to be valid. A reference may either be a bare +// identifier `name` or a qualified identifier `google.api.name`. References +// may either refer to a value or a function declaration. +// +// For example, the expression `google.api.name.startsWith('expr')` references +// the declaration `google.api.name` within a [Expr.Select][] expression, and +// the function declaration `startsWith`. +// Move-only type. +class Expr { + public: + Expr() {} + Expr(int64_t id, ExprKind expr_kind) + : id_(id), expr_kind_(std::move(expr_kind)) {} + + Expr(Expr&& rhs) = default; + Expr& operator=(Expr&& rhs) = default; + + void set_id(int64_t id) { id_ = id; } + + void set_expr_kind(ExprKind expr_kind) { expr_kind_ = std::move(expr_kind); } + + int64_t id() const { return id_; } + + const ExprKind& expr_kind() const { return expr_kind_; } + + ExprKind& mutable_expr_kind() { return expr_kind_; } + + private: + // Required. An id assigned to this node by the parser which is unique in a + // given expression tree. This is used to associate type information and other + // attributes to a node in the parse tree. + int64_t id_ = 0; + // Required. Variants of expressions. + ExprKind expr_kind_; +}; + +// Source information collected at parse time. +class SourceInfo { + public: + SourceInfo() {} + SourceInfo(std::string syntax_version, std::string location, + std::vector line_offsets, + absl::flat_hash_map positions, + absl::flat_hash_map macro_calls) + : syntax_version_(std::move(syntax_version)), + location_(std::move(location)), + line_offsets_(std::move(line_offsets)), + positions_(std::move(positions)), + macro_calls_(std::move(macro_calls)) {} + + void set_syntax_version(std::string syntax_version) { + syntax_version_ = std::move(syntax_version); + } + + void set_location(std::string location) { location_ = std::move(location); } + + void set_line_offsets(std::vector line_offsets) { + line_offsets_ = std::move(line_offsets); + } + + void set_positions(absl::flat_hash_map positions) { + positions_ = std::move(positions); + } + + void set_macro_calls(absl::flat_hash_map macro_calls) { + macro_calls_ = std::move(macro_calls); + } + + const std::string& syntax_version() const { return syntax_version_; } + + const std::string& location() const { return location_; } + + const std::vector& line_offsets() const { return line_offsets_; } + + std::vector& mutable_line_offsets() { return line_offsets_; } + + const absl::flat_hash_map& positions() const { + return positions_; + } + + absl::flat_hash_map& mutable_positions() { + return positions_; + } + + const absl::flat_hash_map& macro_calls() const { + return macro_calls_; + } + + absl::flat_hash_map& mutable_macro_calls() { + return macro_calls_; + } + + private: + // The syntax version of the source, e.g. `cel1`. + std::string syntax_version_; + + // The location name. All position information attached to an expression is + // relative to this location. + // + // The location could be a file, UI element, or similar. For example, + // `acme/app/AnvilPolicy.cel`. + std::string location_; + + // Monotonically increasing list of code point offsets where newlines + // `\n` appear. + // + // The line number of a given position is the index `i` where for a given + // `id` the `line_offsets[i] < id_positions[id] < line_offsets[i+1]`. The + // column may be derivd from `id_positions[id] - line_offsets[i]`. + // + // TODO(issues/5): clarify this documentation + std::vector line_offsets_; + + // A map from the parse node id (e.g. `Expr.id`) to the code point offset + // within source. + absl::flat_hash_map positions_; + + // A map from the parse node id where a macro replacement was made to the + // call `Expr` that resulted in a macro expansion. + // + // For example, `has(value.field)` is a function call that is replaced by a + // `test_only` field selection in the AST. Likewise, the call + // `list.exists(e, e > 10)` translates to a comprehension expression. The key + // in the map corresponds to the expression id of the expanded macro, and the + // value is the call `Expr` that was replaced. + absl::flat_hash_map macro_calls_; +}; + +// Analogous to google::api::expr::v1alpha1::ParsedExpr +// An expression together with source information as returned by the parser. +// Move-only type. +class ParsedExpr { + public: + ParsedExpr() {} + ParsedExpr(Expr expr, SourceInfo source_info) + : expr_(std::move(expr)), source_info_(std::move(source_info)) {} + + ParsedExpr(ParsedExpr&& rhs) = default; + ParsedExpr& operator=(ParsedExpr&& rhs) = default; + + void set_expr(Expr expr) { expr_ = std::move(expr); } + + void set_source_info(SourceInfo source_info) { + source_info_ = std::move(source_info); + } + + const Expr& expr() const { return expr_; } + + Expr& mutable_expr() { return expr_; } + + const SourceInfo& source_info() const { return source_info_; } + + SourceInfo& mutable_source_info() { return source_info_; } + + private: + // The parsed expression. + Expr expr_; + // The source info derived from input that generated the parsed `expr`. + SourceInfo source_info_; +}; + +// CEL primitive types. +enum class PrimitiveType { + // Unspecified type. + kPrimitiveTypeUnspecified = 0, + // Boolean type. + kBool = 1, + // Int64 type. + // + // Proto-based integer values are widened to int64_t. + kInt64 = 2, + // Uint64 type. + // + // Proto-based unsigned integer values are widened to uint64_t. + kUint64 = 3, + // Double type. + // + // Proto-based float values are widened to double values. + kDouble = 4, + // String type. + kString = 5, + // Bytes type. + kBytes = 6, +}; + +// Well-known protobuf types treated with first-class support in CEL. +// +// TODO(issues/5): represent well-known via abstract types (or however) +// they will be named. +enum class WellKnownType { + // Unspecified type. + kWellKnownTypeUnspecified = 0, + // Well-known protobuf.Any type. + // + // Any types are a polymorphic message type. During type-checking they are + // treated like `DYN` types, but at runtime they are resolved to a specific + // message type specified at evaluation time. + kAny = 1, + // Well-known protobuf.Timestamp type, internally referenced as `timestamp`. + kTimestamp = 2, + // Well-known protobuf.Duration type, internally referenced as `duration`. + kDuration = 3, +}; + +class Type; + +// List type with typed elements, e.g. `list`. +class ListType { + public: + ListType() {} + explicit ListType(std::unique_ptr elem_type) + : elem_type_(std::move(elem_type)) {} + + void set_elem_type(std::unique_ptr elem_type) { + elem_type_ = std::move(elem_type); + } + + const Type* elem_type() const { return elem_type_.get(); } + + Type& mutable_elem_type() { + if (elem_type_ == nullptr) { + elem_type_ = std::make_unique(); + } + return *elem_type_; + } + + private: + std::unique_ptr elem_type_; +}; + +// Map type with parameterized key and value types, e.g. `map`. +class MapType { + public: + MapType() {} + MapType(std::unique_ptr key_type, std::unique_ptr value_type) + : key_type_(std::move(key_type)), value_type_(std::move(value_type)) {} + + void set_key_type(std::unique_ptr key_type) { + key_type_ = std::move(key_type); + } + + void set_value_type(std::unique_ptr value_type) { + value_type_ = std::move(value_type); + } + + const Type* key_type() const { return key_type_.get(); } + + const Type* value_type() const { return value_type_.get(); } + + Type& mutable_key_type() { + if (key_type_ == nullptr) { + key_type_ = std::make_unique(); + } + return *key_type_; + } + + Type& mutable_value_type() { + if (value_type_ == nullptr) { + value_type_ = std::make_unique(); + } + return *value_type_; + } + + private: + // The type of the key. + std::unique_ptr key_type_; + + // The type of the value. + std::unique_ptr value_type_; +}; + +// Function type with result and arg types. +// +// (-- +// NOTE: function type represents a lambda-style argument to another function. +// Supported through macros, but not yet a first-class concept in CEL. +// --) +class FunctionType { + public: + FunctionType() {} + FunctionType(std::unique_ptr result_type, std::vector arg_types) + : result_type_(std::move(result_type)), + arg_types_(std::move(arg_types)) {} + + void set_result_type(std::unique_ptr result_type) { + result_type_ = std::move(result_type); + } + + void set_arg_types(std::vector arg_types) { + arg_types_ = std::move(arg_types); + } + + const Type* result_type() const { return result_type_.get(); } + + Type& mutable_result_type() { + if (result_type_ == nullptr) { + result_type_ = std::make_unique(); + } + return *result_type_; + } + + const std::vector& arg_types() const { return arg_types_; } + + std::vector& mutable_arg_types() { return arg_types_; } + + private: + // Result type of the function. + std::unique_ptr result_type_; + + // Argument types of the function. + std::vector arg_types_; +}; + +// Application defined abstract type. +// +// TODO(issues/5): decide on final naming for this. +class AbstractType { + public: + AbstractType(std::string name, std::vector parameter_types) + : name_(std::move(name)), parameter_types_(std::move(parameter_types)) {} + + void set_name(std::string name) { name_ = std::move(name); } + + void set_parameter_types(std::vector parameter_types) { + parameter_types_ = std::move(parameter_types); + } + + const std::string& name() const { return name_; } + + const std::vector& parameter_types() const { return parameter_types_; } + + std::vector& mutable_parameter_types() { return parameter_types_; } + + private: + // The fully qualified name of this abstract type. + std::string name_; + + // Parameter types for this abstract type. + std::vector parameter_types_; +}; + +// Wrapper of a primitive type, e.g. `google.protobuf.Int64Value`. +class PrimitiveTypeWrapper { + public: + explicit PrimitiveTypeWrapper(PrimitiveType type) : type_(std::move(type)) {} + + void set_type(PrimitiveType type) { type_ = std::move(type); } + + const PrimitiveType& type() const { return type_; } + + PrimitiveType& mutable_type() { return type_; } + + private: + PrimitiveType type_; +}; + +// Protocol buffer message type. +// +// The `message_type` string specifies the qualified message type name. For +// example, `google.plus.Profile`. +class MessageType { + public: + explicit MessageType(std::string type) : type_(std::move(type)) {} + + void set_type(std::string type) { type_ = std::move(type); } + + const std::string& type() const { return type_; } + + private: + std::string type_; +}; + +// Type param type. +// +// The `type_param` string specifies the type parameter name, e.g. `list` +// would be a `list_type` whose element type was a `type_param` type +// named `E`. +class ParamType { + public: + explicit ParamType(std::string type) : type_(std::move(type)) {} + + void set_type(std::string type) { type_ = std::move(type); } + + const std::string& type() const { return type_; } + + private: + std::string type_; +}; + +// Error type. +// +// During type-checking if an expression is an error, its type is propagated +// as the `ERROR` type. This permits the type-checker to discover other +// errors present in the expression. +enum class ErrorType { kErrorTypeValue = 0 }; + +using DynamicType = absl::monostate; + +using TypeKind = + absl::variant, ErrorType, AbstractType>; + +// Analogous to google::api::expr::v1alpha1::Type. +// Represents a CEL type. +// +// TODO(issues/5): align with value.proto +class Type { + public: + Type() {} + explicit Type(TypeKind type_kind) : type_kind_(std::move(type_kind)) {} + + Type(Type&& rhs) = default; + Type& operator=(Type&& rhs) = default; + + void set_type_kind(TypeKind type_kind) { type_kind_ = std::move(type_kind); } + + const TypeKind& type_kind() const { return type_kind_; } + + TypeKind& mutable_type_kind() { return type_kind_; } + + private: + TypeKind type_kind_; +}; + +// Describes a resolved reference to a declaration. +class Reference { + public: + Reference(std::string name, std::vector overload_id, + Constant value) + : name_(std::move(name)), + overload_id_(std::move(overload_id)), + value_(std::move(value)) {} + + void set_name(std::string name) { name_ = std::move(name); } + + void set_overload_id(std::vector overload_id) { + overload_id_ = std::move(overload_id); + } + + void set_value(Constant value) { value_ = std::move(value); } + + const std::string& name() const { return name_; } + + const std::vector& overload_id() const { return overload_id_; } + + const Constant& value() const { return value_; } + + std::vector& mutable_overload_id() { return overload_id_; } + + Constant& mutable_value() { return value_; } + + private: + // The fully qualified name of the declaration. + std::string name_; + // For references to functions, this is a list of `Overload.overload_id` + // values which match according to typing rules. + // + // If the list has more than one element, overload resolution among the + // presented candidates must happen at runtime because of dynamic types. The + // type checker attempts to narrow down this list as much as possible. + // + // Empty if this is not a reference to a [Decl.FunctionDecl][]. + std::vector overload_id_; + // For references to constants, this may contain the value of the + // constant if known at compile time. + Constant value_; +}; + +// Analogous to google::api::expr::v1alpha1::CheckedExpr +// A CEL expression which has been successfully type checked. +// Move-only type. +class CheckedExpr { + public: + CheckedExpr() {} + CheckedExpr(absl::flat_hash_map reference_map, + absl::flat_hash_map type_map, + SourceInfo source_info, std::string expr_version, Expr expr) + : reference_map_(std::move(reference_map)), + type_map_(std::move(type_map)), + source_info_(std::move(source_info)), + expr_version_(std::move(expr_version)), + expr_(std::move(expr)) {} + + CheckedExpr(CheckedExpr&& rhs) = default; + CheckedExpr& operator=(CheckedExpr&& rhs) = default; + + void set_reference_map( + absl::flat_hash_map reference_map) { + reference_map_ = std::move(reference_map); + } + + void set_type_map(absl::flat_hash_map type_map) { + type_map_ = std::move(type_map); + } + + void set_source_info(SourceInfo source_info) { + source_info_ = std::move(source_info); + } + + void set_expr_version(std::string expr_version) { + expr_version_ = std::move(expr_version); + } + + void set_expr(Expr expr) { expr_ = std::move(expr); } + + const absl::flat_hash_map& reference_map() const { + return reference_map_; + } + + absl::flat_hash_map& mutable_reference_map() { + return reference_map_; + } + + const absl::flat_hash_map& type_map() const { + return type_map_; + } + + absl::flat_hash_map& mutable_type_map() { return type_map_; } + + const SourceInfo& source_info() const { return source_info_; } + + SourceInfo& mutable_source_info() { return source_info_; } + + const std::string& expr_version() const { return expr_version_; } + + const Expr& expr() const { return expr_; } + + Expr& mutable_expr() { return expr_; } + + private: + // A map from expression ids to resolved references. + // + // The following entries are in this table: + // + // - An Ident or Select expression is represented here if it resolves to a + // declaration. For instance, if `a.b.c` is represented by + // `select(select(id(a), b), c)`, and `a.b` resolves to a declaration, + // while `c` is a field selection, then the reference is attached to the + // nested select expression (but not to the id or or the outer select). + // In turn, if `a` resolves to a declaration and `b.c` are field selections, + // the reference is attached to the ident expression. + // - Every Call expression has an entry here, identifying the function being + // called. + // - Every CreateStruct expression for a message has an entry, identifying + // the message. + absl::flat_hash_map reference_map_; + // A map from expression ids to types. + // + // Every expression node which has a type different than DYN has a mapping + // here. If an expression has type DYN, it is omitted from this map to save + // space. + absl::flat_hash_map type_map_; + // The source info derived from input that generated the parsed `expr` and + // any optimizations made during the type-checking pass. + SourceInfo source_info_; + // The expr version indicates the major / minor version number of the `expr` + // representation. + // + // The most common reason for a version change will be to indicate to the CEL + // runtimes that transformations have been performed on the expr during static + // analysis. In some cases, this will save the runtime the work of applying + // the same or similar transformations prior to evaluation. + std::string expr_version_; + // The checked expression. Semantically equivalent to the parsed `expr`, but + // may have structural differences. + Expr expr_; +}; + +} // namespace cel::ast::internal + +#endif // THIRD_PARTY_CEL_CPP_BASE_AST_H_ diff --git a/base/ast_test.cc b/base/ast_test.cc new file mode 100644 index 000000000..8f1bf3bd7 --- /dev/null +++ b/base/ast_test.cc @@ -0,0 +1,254 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "base/ast.h" + +#include +#include + +#include "absl/memory/memory.h" +#include "absl/types/variant.h" +#include "internal/testing.h" + +namespace cel { +namespace ast { +namespace internal { +namespace { +TEST(AstTest, ExprConstructionConstant) { + Expr expr(1, true); + ASSERT_TRUE(absl::holds_alternative(expr.expr_kind())); + const auto& constant = absl::get(expr.expr_kind()); + ASSERT_TRUE(absl::holds_alternative(constant)); + ASSERT_TRUE(absl::get(constant)); +} + +TEST(AstTest, ExprConstructionIdent) { + Expr expr(1, Ident("var")); + ASSERT_TRUE(absl::holds_alternative(expr.expr_kind())); + ASSERT_EQ(absl::get(expr.expr_kind()).name(), "var"); +} + +TEST(AstTest, ExprConstructionSelect) { + Expr expr(1, Select(std::make_unique(2, Ident("var")), "field")); + ASSERT_TRUE(absl::holds_alternative(expr.expr_kind()); + ASSERT_TRUE(absl::holds_alternative(select.operand()->expr_kind())); + ASSERT_EQ(absl::get(select.operand()->expr_kind()).name(), "var"); + ASSERT_EQ(select.field(), "field"); +} + +TEST(AstTest, SelectMutableOperand) { + Select select; + select.mutable_operand().set_expr_kind(Ident("var")); + ASSERT_TRUE(absl::holds_alternative(select.operand()->expr_kind())); + ASSERT_EQ(absl::get(select.operand()->expr_kind()).name(), "var"); +} + +TEST(AstTest, ExprConstructionCall) { + Expr expr(1, Call(std::make_unique(2, Ident("var")), "function", {})); + ASSERT_TRUE(absl::holds_alternative(expr.expr_kind())); + const auto& call = absl::get(expr.expr_kind()); + ASSERT_TRUE(absl::holds_alternative(call.target()->expr_kind())); + ASSERT_EQ(absl::get(call.target()->expr_kind()).name(), "var"); + ASSERT_EQ(call.function(), "function"); + ASSERT_TRUE(call.args().empty()); +} + +TEST(AstTest, CallMutableTarget) { + Call call; + call.mutable_target().set_expr_kind(Ident("var")); + ASSERT_TRUE(absl::holds_alternative(call.target()->expr_kind())); + ASSERT_EQ(absl::get(call.target()->expr_kind()).name(), "var"); +} + +TEST(AstTest, ExprConstructionCreateList) { + CreateList create_list; + create_list.mutable_elements().emplace_back(Expr(2, Ident("var1"))); + create_list.mutable_elements().emplace_back(Expr(3, Ident("var2"))); + create_list.mutable_elements().emplace_back(Expr(4, Ident("var3"))); + Expr expr(1, std::move(create_list)); + ASSERT_TRUE(absl::holds_alternative(expr.expr_kind())); + const auto& elements = absl::get(expr.expr_kind()).elements(); + ASSERT_EQ(absl::get(elements[0].expr_kind()).name(), "var1"); + ASSERT_EQ(absl::get(elements[1].expr_kind()).name(), "var2"); + ASSERT_EQ(absl::get(elements[2].expr_kind()).name(), "var3"); +} + +TEST(AstTest, ExprConstructionCreateStruct) { + CreateStruct create_struct; + create_struct.set_message_name("name"); + create_struct.mutable_entries().emplace_back(CreateStruct::Entry( + 1, "key1", std::make_unique(2, Ident("value1")))); + create_struct.mutable_entries().emplace_back(CreateStruct::Entry( + 3, "key2", std::make_unique(4, Ident("value2")))); + create_struct.mutable_entries().emplace_back( + CreateStruct::Entry(5, std::make_unique(6, Ident("key3")), + std::make_unique(6, Ident("value3")))); + Expr expr(1, std::move(create_struct)); + ASSERT_TRUE(absl::holds_alternative(expr.expr_kind())); + const auto& entries = absl::get(expr.expr_kind()).entries(); + ASSERT_EQ(absl::get(entries[0].key_kind()), "key1"); + ASSERT_EQ(absl::get(entries[0].value()->expr_kind()).name(), "value1"); + ASSERT_EQ(absl::get(entries[1].key_kind()), "key2"); + ASSERT_EQ(absl::get(entries[1].value()->expr_kind()).name(), "value2"); + ASSERT_EQ( + absl::get( + absl::get>(entries[2].key_kind())->expr_kind()) + .name(), + "key3"); + ASSERT_EQ(absl::get(entries[2].value()->expr_kind()).name(), "value3"); +} + +TEST(AstTest, CreateStructEntryMutableValue) { + CreateStruct::Entry entry; + entry.mutable_value().set_expr_kind(Ident("var")); + ASSERT_TRUE(absl::holds_alternative(entry.value()->expr_kind())); + ASSERT_EQ(absl::get(entry.value()->expr_kind()).name(), "var"); +} + +TEST(AstTest, ExprConstructionComprehension) { + Comprehension comprehension; + comprehension.set_iter_var("iter_var"); + comprehension.set_iter_range(std::make_unique(1, Ident("range"))); + comprehension.set_accu_var("accu_var"); + comprehension.set_accu_init(std::make_unique(2, Ident("init"))); + comprehension.set_loop_condition(std::make_unique(3, Ident("cond"))); + comprehension.set_loop_step(std::make_unique(4, Ident("step"))); + comprehension.set_result(std::make_unique(5, Ident("result"))); + Expr expr(6, std::move(comprehension)); + ASSERT_TRUE(absl::holds_alternative(expr.expr_kind())); + auto& created_expr = absl::get(expr.expr_kind()); + ASSERT_EQ(created_expr.iter_var(), "iter_var"); + ASSERT_EQ(absl::get(created_expr.iter_range()->expr_kind()).name(), + "range"); + ASSERT_EQ(created_expr.accu_var(), "accu_var"); + ASSERT_EQ(absl::get(created_expr.accu_init()->expr_kind()).name(), + "init"); + ASSERT_EQ(absl::get(created_expr.loop_condition()->expr_kind()).name(), + "cond"); + ASSERT_EQ(absl::get(created_expr.loop_step()->expr_kind()).name(), + "step"); + ASSERT_EQ(absl::get(created_expr.result()->expr_kind()).name(), + "result"); +} + +TEST(AstTest, ComprehensionMutableConstruction) { + Comprehension comprehension; + comprehension.mutable_iter_range().set_expr_kind(Ident("var")); + ASSERT_TRUE( + absl::holds_alternative(comprehension.iter_range()->expr_kind())); + ASSERT_EQ(absl::get(comprehension.iter_range()->expr_kind()).name(), + "var"); + comprehension.mutable_accu_init().set_expr_kind(Ident("var")); + ASSERT_TRUE( + absl::holds_alternative(comprehension.accu_init()->expr_kind())); + ASSERT_EQ(absl::get(comprehension.accu_init()->expr_kind()).name(), + "var"); + comprehension.mutable_loop_condition().set_expr_kind(Ident("var")); + ASSERT_TRUE(absl::holds_alternative( + comprehension.loop_condition()->expr_kind())); + ASSERT_EQ( + absl::get(comprehension.loop_condition()->expr_kind()).name(), + "var"); + comprehension.mutable_loop_step().set_expr_kind(Ident("var")); + ASSERT_TRUE( + absl::holds_alternative(comprehension.loop_step()->expr_kind())); + ASSERT_EQ(absl::get(comprehension.loop_step()->expr_kind()).name(), + "var"); + comprehension.mutable_result().set_expr_kind(Ident("var")); + ASSERT_TRUE( + absl::holds_alternative(comprehension.result()->expr_kind())); + ASSERT_EQ(absl::get(comprehension.result()->expr_kind()).name(), + "var"); +} + +TEST(AstTest, ExprMoveTest) { + Expr expr(1, Ident("var")); + ASSERT_TRUE(absl::holds_alternative(expr.expr_kind())); + ASSERT_EQ(absl::get(expr.expr_kind()).name(), "var"); + Expr new_expr = std::move(expr); + ASSERT_TRUE(absl::holds_alternative(new_expr.expr_kind())); + ASSERT_EQ(absl::get(new_expr.expr_kind()).name(), "var"); +} + +TEST(AstTest, ParsedExpr) { + ParsedExpr parsed_expr; + parsed_expr.set_expr(Expr(1, Ident("name"))); + auto& source_info = parsed_expr.mutable_source_info(); + source_info.set_syntax_version("syntax_version"); + source_info.set_location("location"); + source_info.set_line_offsets({1, 2, 3}); + source_info.set_positions({{1, 1}, {2, 2}}); + ASSERT_TRUE(absl::holds_alternative(parsed_expr.expr().expr_kind())); + ASSERT_EQ(absl::get(parsed_expr.expr().expr_kind()).name(), "name"); + ASSERT_EQ(parsed_expr.source_info().syntax_version(), "syntax_version"); + ASSERT_EQ(parsed_expr.source_info().location(), "location"); + EXPECT_THAT(parsed_expr.source_info().line_offsets(), + testing::UnorderedElementsAre(1, 2, 3)); + EXPECT_THAT( + parsed_expr.source_info().positions(), + testing::UnorderedElementsAre(testing::Pair(1, 1), testing::Pair(2, 2))); +} + +TEST(AstTest, ListTypeMutableConstruction) { + ListType type; + type.mutable_elem_type() = Type(PrimitiveType::kBool); + EXPECT_EQ(absl::get(type.elem_type()->type_kind()), + PrimitiveType::kBool); +} + +TEST(AstTest, MapTypeMutableConstruction) { + MapType type; + type.mutable_key_type() = Type(PrimitiveType::kBool); + type.mutable_value_type() = Type(PrimitiveType::kBool); + EXPECT_EQ(absl::get(type.key_type()->type_kind()), + PrimitiveType::kBool); + EXPECT_EQ(absl::get(type.value_type()->type_kind()), + PrimitiveType::kBool); +} + +TEST(AstTest, FunctionTypeMutableConstruction) { + FunctionType type; + type.mutable_result_type() = Type(PrimitiveType::kBool); + EXPECT_EQ(absl::get(type.result_type()->type_kind()), + PrimitiveType::kBool); +} + +TEST(AstTest, CheckedExpr) { + CheckedExpr checked_expr; + checked_expr.set_expr(Expr(1, Ident("name"))); + auto& source_info = checked_expr.mutable_source_info(); + source_info.set_syntax_version("syntax_version"); + source_info.set_location("location"); + source_info.set_line_offsets({1, 2, 3}); + source_info.set_positions({{1, 1}, {2, 2}}); + checked_expr.set_expr_version("expr_version"); + checked_expr.mutable_type_map().insert( + {1, Type(PrimitiveType(PrimitiveType::kBool))}); + ASSERT_TRUE(absl::holds_alternative(checked_expr.expr().expr_kind())); + ASSERT_EQ(absl::get(checked_expr.expr().expr_kind()).name(), "name"); + ASSERT_EQ(checked_expr.source_info().syntax_version(), "syntax_version"); + ASSERT_EQ(checked_expr.source_info().location(), "location"); + EXPECT_THAT(checked_expr.source_info().line_offsets(), + testing::UnorderedElementsAre(1, 2, 3)); + EXPECT_THAT( + checked_expr.source_info().positions(), + testing::UnorderedElementsAre(testing::Pair(1, 1), testing::Pair(2, 2))); + EXPECT_EQ(checked_expr.expr_version(), "expr_version"); +} + +} // namespace +} // namespace internal +} // namespace ast +} // namespace cel diff --git a/base/ast_utility.cc b/base/ast_utility.cc new file mode 100644 index 000000000..812470d8b --- /dev/null +++ b/base/ast_utility.cc @@ -0,0 +1,506 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "base/ast_utility.h" + +#include +#include +#include +#include +#include + +#include "google/api/expr/v1alpha1/checked.pb.h" +#include "google/api/expr/v1alpha1/syntax.pb.h" +#include "google/protobuf/duration.pb.h" +#include "google/protobuf/timestamp.pb.h" +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/time/time.h" +#include "base/ast.h" + +namespace cel::ast::internal { + +absl::StatusOr ToNative(const google::api::expr::v1alpha1::Constant& constant) { + switch (constant.constant_kind_case()) { + case google::api::expr::v1alpha1::Constant::kNullValue: + return NullValue::kNullValue; + case google::api::expr::v1alpha1::Constant::kBoolValue: + return constant.bool_value(); + case google::api::expr::v1alpha1::Constant::kInt64Value: + return constant.int64_value(); + case google::api::expr::v1alpha1::Constant::kUint64Value: + return constant.uint64_value(); + case google::api::expr::v1alpha1::Constant::kDoubleValue: + return constant.double_value(); + case google::api::expr::v1alpha1::Constant::kStringValue: + return constant.string_value(); + case google::api::expr::v1alpha1::Constant::kBytesValue: + return constant.bytes_value(); + case google::api::expr::v1alpha1::Constant::kDurationValue: + return absl::Seconds(constant.duration_value().seconds()) + + absl::Nanoseconds(constant.duration_value().nanos()); + case google::api::expr::v1alpha1::Constant::kTimestampValue: + return absl::FromUnixSeconds(constant.timestamp_value().seconds()) + + absl::Nanoseconds(constant.timestamp_value().nanos()); + default: + return absl::InvalidArgumentError( + "Illegal type supplied for google::api::expr::v1alpha1::Constant."); + } +} + +Ident ToNative(const google::api::expr::v1alpha1::Expr::Ident& ident) { + return Ident(ident.name()); +} + +absl::StatusOr(native_expr->expr_kind())); + auto& native_select = absl::get", + absl::string_view expression, const std::vector& macros, + absl::string_view description = "", const ParserOptions& options = ParserOptions()); absl::StatusOr Parse( - const std::string& expression, const std::string& description = "", + absl::string_view expression, absl::string_view description = "", const ParserOptions& options = ParserOptions()); absl::StatusOr ParseWithMacros( - const std::string& expression, const std::vector& macros, - const std::string& description = "", + absl::string_view expression, const std::vector& macros, + absl::string_view description = "", const ParserOptions& options = ParserOptions()); -} // namespace parser -} // namespace expr -} // namespace api -} // namespace google +} // namespace google::api::expr::parser #endif // THIRD_PARTY_CEL_CPP_PARSER_PARSER_H_ diff --git a/parser/parser_test.cc b/parser/parser_test.cc index 6bf4204a4..657fbd155 100644 --- a/parser/parser_test.cc +++ b/parser/parser_test.cc @@ -1,8 +1,23 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "parser/parser.h" #include #include #include +#include #include #include @@ -11,15 +26,14 @@ #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" #include "absl/types/optional.h" +#include "internal/benchmark.h" #include "internal/testing.h" #include "parser/options.h" #include "parser/source_factory.h" #include "testutil/expr_printer.h" -namespace google { -namespace api { -namespace expr { -namespace parser { +namespace google::api::expr::parser { + namespace { using ::google::api::expr::v1alpha1::Expr; @@ -30,8 +44,9 @@ using cel::internal::IsOk; struct TestInfo { TestInfo(const std::string& I, const std::string& P, const std::string& E = "", const std::string& L = "", - const std::string& R = "", const std::string& M = "") - : I(I), P(P), E(E), L(L), R(R), M(M) {} + const std::string& R = "", const std::string& M = "", + bool benchmark = true) + : I(I), P(P), E(E), L(L), R(R), M(M), benchmark(benchmark) {} // I contains the input expression to be parsed. std::string I; @@ -51,6 +66,10 @@ struct TestInfo { // M contains the expected macro call output of hte expression tree. std::string M; + + // Whether to run the test when benchmarking. Enable by default. Disabled for + // some expressions which bump up against the stack limit. + bool benchmark; }; std::vector test_cases = { @@ -687,7 +706,7 @@ std::vector test_cases = { {"\"hi\\u263A \\u263Athere\"", "\"hi☺ ☺there\"^#1:string#"}, {"\"\\U000003A8\\?\"", "\"Ψ?\"^#1:string#"}, {"\"\\a\\b\\f\\n\\r\\t\\v'\\\"\\\\\\? Legal escapes\"", - "\"\\a\\b\\f\\n\\r\\t\\v'\\\"\\? Legal escapes\"^#1:string#"}, + "\"\\x07\\x08\\x0c\\n\\r\\t\\x0b'\\\"\\\\? Legal escapes\"^#1:string#"}, {"\"\\xFh\"", "", "ERROR: :1:1: Syntax error: token recognition error at: '\"\\xFh'\n" " | \"\\xFh\"\n" @@ -724,6 +743,33 @@ std::vector test_cases = { " \"😦\"^#6:string#\n" " ]^#3:Expr.CreateList#\n" ")^#2:Expr.Call#"}, + {"'\u00ff' in ['\u00ff', '\u00ff', '\u00ff']", + "@in(\n" + " \"\u00ff\"^#1:string#,\n" + " [\n" + " \"\u00ff\"^#4:string#,\n" + " \"\u00ff\"^#5:string#,\n" + " \"\u00ff\"^#6:string#\n" + " ]^#3:Expr.CreateList#\n" + ")^#2:Expr.Call#"}, + {"'\u00ff' in ['\uffff', '\U00100000', '\U0010ffff']", + "@in(\n" + " \"\u00ff\"^#1:string#,\n" + " [\n" + " \"\uffff\"^#4:string#,\n" + " \"\U00100000\"^#5:string#,\n" + " \"\U0010ffff\"^#6:string#\n" + " ]^#3:Expr.CreateList#\n" + ")^#2:Expr.Call#"}, + {"'\u00ff' in ['\U00100000', '\uffff', '\U0010ffff']", + "@in(\n" + " \"\u00ff\"^#1:string#,\n" + " [\n" + " \"\U00100000\"^#4:string#,\n" + " \"\uffff\"^#5:string#,\n" + " \"\U0010ffff\"^#6:string#\n" + " ]^#3:Expr.CreateList#\n" + ")^#2:Expr.Call#"}, {"'😁' in ['😁', '😑', '😦']\n" " && in.😁", "", @@ -839,14 +885,19 @@ std::vector test_cases = { "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" "]]]]]]", - "", "Expression recursion limit exceeded. limit: 250"}, + "", "Expression recursion limit exceeded. limit: 250", "", "", "", false}, { // Note, the ANTLR parse stack may recurse much more deeply and permit // more detailed expressions than the visitor can recurse over in // practice. "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[['just fine'],[1],[2],[3],[4],[5]]]]]]]" "]]]]]]]]]]]]]]]]]]]]]]]]", - "" // parse output not validated as it is too large. + "", // parse output not validated as it is too large. + "", + "", + "", + "", + false, }, { "[\n\t\r[\n\t\r[\n\t\r]\n\t\r]\n\t\r", @@ -1047,7 +1098,82 @@ std::vector test_cases = { ")^#18:exists#,\n" "has(\n" " z^#8:Expr.Ident#.a^#9:Expr.Select#\n" - ")^#10:has"}}; + ")^#10:has"}, + {"has(a.b).asList().exists(c, c)", + "__comprehension__(\n" + " // Variable\n" + " c,\n" + " // Target\n" + " a^#2:Expr.Ident#.b~test-only~^#4:Expr.Select#.asList()^#5:Expr.Call#,\n" + " // Accumulator\n" + " __result__,\n" + " // Init\n" + " false^#9:bool#,\n" + " // LoopCondition\n" + " @not_strictly_false(\n" + " !_(\n" + " __result__^#10:Expr.Ident#\n" + " )^#11:Expr.Call#\n" + " )^#12:Expr.Call#,\n" + " // LoopStep\n" + " _||_(\n" + " __result__^#13:Expr.Ident#,\n" + " c^#8:Expr.Ident#\n" + " )^#14:Expr.Call#,\n" + " // Result\n" + " __result__^#15:Expr.Ident#)^#16:Expr.Comprehension#", + "", "", "", + "^#4:has#.asList()^#5:Expr.Call#.exists(\n" + " c^#7:Expr.Ident#,\n" + " c^#8:Expr.Ident#\n" + ")^#16:exists#,\n" + "has(\n" + " a^#2:Expr.Ident#.b^#3:Expr.Select#\n" + ")^#4:has"}, + {"[has(a.b), has(c.d)].exists(e, e)", + "__comprehension__(\n" + " // Variable\n" + " e,\n" + " // Target\n" + " [\n" + " a^#3:Expr.Ident#.b~test-only~^#5:Expr.Select#,\n" + " c^#7:Expr.Ident#.d~test-only~^#9:Expr.Select#\n" + " ]^#1:Expr.CreateList#,\n" + " // Accumulator\n" + " __result__,\n" + " // Init\n" + " false^#13:bool#,\n" + " // LoopCondition\n" + " @not_strictly_false(\n" + " !_(\n" + " __result__^#14:Expr.Ident#\n" + " )^#15:Expr.Call#\n" + " )^#16:Expr.Call#,\n" + " // LoopStep\n" + " _||_(\n" + " __result__^#17:Expr.Ident#,\n" + " e^#12:Expr.Ident#\n" + " )^#18:Expr.Call#,\n" + " // Result\n" + " __result__^#19:Expr.Ident#)^#20:Expr.Comprehension#", + "", "", "", + "[\n" + " ^#5:has#,\n" + " ^#9:has#\n" + "]^#1:Expr.CreateList#.exists(\n" + " e^#11:Expr.Ident#,\n" + " e^#12:Expr.Ident#\n" + ")^#20:exists#,\n" + "has(\n" + " c^#7:Expr.Ident#.d^#8:Expr.Select#\n" + ")^#9:has#,\n" + "has(\n" + " a^#3:Expr.Ident#.b^#4:Expr.Select#\n" + ")^#5:has"}, + {"b'\\UFFFFFFFF'", "", + "ERROR: :1:1: Invalid bytes literal: Illegal escape sequence: " + "Unicode escape sequence \\U cannot be used in bytes literals\n | " + "b'\\UFFFFFFFF'\n | ^"}}; class KindAndIdAdorner : public testutil::ExpressionAdorner { public: @@ -1218,7 +1344,7 @@ TEST_P(ExpressionTest, Parse) { EXPECT_THAT(result, IsOk()); } else { EXPECT_THAT(result, Not(IsOk())); - EXPECT_EQ(result.status().message(), test_info.E); + EXPECT_EQ(test_info.E, result.status().message()); } if (!test_info.P.empty()) { @@ -1236,14 +1362,13 @@ TEST_P(ExpressionTest, Parse) { } if (!test_info.R.empty()) { - EXPECT_EQ(ConvertEnrichedSourceInfoToString(result->enriched_source_info()), - test_info.R); + EXPECT_EQ(test_info.R, ConvertEnrichedSourceInfoToString( + result->enriched_source_info())); } if (!test_info.M.empty()) { - EXPECT_EQ( - ConvertMacroCallsToString(result.value().parsed_expr().source_info()), - test_info.M); + EXPECT_EQ(test_info.M, ConvertMacroCallsToString( + result.value().parsed_expr().source_info())); } } @@ -1327,8 +1452,18 @@ TEST(ExpressionTest, RecursionDepthExceeded) { INSTANTIATE_TEST_SUITE_P(CelParserTest, ExpressionTest, testing::ValuesIn(test_cases)); +void BM_Parse(benchmark::State& state) { + std::vector macros = Macro::AllMacros(); + for (auto s : state) { + for (const auto& test_case : test_cases) { + if (test_case.benchmark) { + benchmark::DoNotOptimize(ParseWithMacros(test_case.I, macros)); + } + } + } +} + +BENCHMARK(BM_Parse)->ThreadRange(1, std::thread::hardware_concurrency()); + } // namespace -} // namespace parser -} // namespace expr -} // namespace api -} // namespace google +} // namespace google::api::expr::parser diff --git a/parser/source_factory.cc b/parser/source_factory.cc index 0eabc6de3..dc830d3f1 100644 --- a/parser/source_factory.cc +++ b/parser/source_factory.cc @@ -1,7 +1,24 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "parser/source_factory.h" #include +#include #include +#include +#include #include "google/protobuf/struct.pb.h" #include "absl/container/flat_hash_set.h" @@ -12,10 +29,7 @@ #include "absl/strings/str_split.h" #include "common/operators.h" -namespace google { -namespace api { -namespace expr { -namespace parser { +namespace google::api::expr::parser { namespace { const int kMaxErrorsToReport = 100; @@ -29,63 +43,63 @@ int32_t PositiveOrMax(int32_t value) { } // namespace -SourceFactory::SourceFactory(const std::string& expression) +SourceFactory::SourceFactory(absl::string_view expression) : next_id_(1), num_errors_(0) { - calcLineOffsets(expression); + CalcLineOffsets(expression); } -int64_t SourceFactory::id(const antlr4::Token* token) { +int64_t SourceFactory::Id(const antlr4::Token* token) { int64_t new_id = next_id_; positions_.emplace( - new_id, - SourceLocation{static_cast(token->getLine()), - static_cast(token->getCharPositionInLine()), - static_cast(token->getStopIndex()), line_offsets_}); + new_id, SourceLocation{ + static_cast(token->getLine()), + static_cast(token->getCharPositionInLine()), + static_cast(token->getStopIndex()), line_offsets_}); next_id_ += 1; return new_id; } -const SourceFactory::SourceLocation& SourceFactory::getSourceLocation( +const SourceFactory::SourceLocation& SourceFactory::GetSourceLocation( int64_t id) const { return positions_.at(id); } -const SourceFactory::SourceLocation SourceFactory::noLocation() { +const SourceFactory::SourceLocation SourceFactory::NoLocation() { return SourceLocation(-1, -1, -1, {}); } -int64_t SourceFactory::id(antlr4::ParserRuleContext* ctx) { - return id(ctx->getStart()); +int64_t SourceFactory::Id(antlr4::ParserRuleContext* ctx) { + return Id(ctx->getStart()); } -int64_t SourceFactory::id(const SourceLocation& location) { +int64_t SourceFactory::Id(const SourceLocation& location) { int64_t new_id = next_id_; positions_.emplace(new_id, location); next_id_ += 1; return new_id; } -int64_t SourceFactory::nextMacroId(int64_t macro_id) { - return id(getSourceLocation(macro_id)); +int64_t SourceFactory::NextMacroId(int64_t macro_id) { + return Id(GetSourceLocation(macro_id)); } -Expr SourceFactory::newExpr(int64_t id) { +Expr SourceFactory::NewExpr(int64_t id) { Expr expr; expr.set_id(id); return expr; } -Expr SourceFactory::newExpr(antlr4::ParserRuleContext* ctx) { - return newExpr(id(ctx)); +Expr SourceFactory::NewExpr(antlr4::ParserRuleContext* ctx) { + return NewExpr(Id(ctx)); } -Expr SourceFactory::newExpr(const antlr4::Token* token) { - return newExpr(id(token)); +Expr SourceFactory::NewExpr(const antlr4::Token* token) { + return NewExpr(Id(token)); } -Expr SourceFactory::newGlobalCall(int64_t id, const std::string& function, +Expr SourceFactory::NewGlobalCall(int64_t id, const std::string& function, const std::vector& args) { - Expr expr = newExpr(id); + Expr expr = NewExpr(id); auto call_expr = expr.mutable_call_expr(); call_expr->set_function(function); std::for_each(args.begin(), args.end(), @@ -93,16 +107,16 @@ Expr SourceFactory::newGlobalCall(int64_t id, const std::string& function, return expr; } -Expr SourceFactory::newGlobalCallForMacro(int64_t macro_id, +Expr SourceFactory::NewGlobalCallForMacro(int64_t macro_id, const std::string& function, const std::vector& args) { - return newGlobalCall(nextMacroId(macro_id), function, args); + return NewGlobalCall(NextMacroId(macro_id), function, args); } -Expr SourceFactory::newReceiverCall(int64_t id, const std::string& function, +Expr SourceFactory::NewReceiverCall(int64_t id, const std::string& function, const Expr& target, const std::vector& args) { - Expr expr = newExpr(id); + Expr expr = NewExpr(id); auto call_expr = expr.mutable_call_expr(); call_expr->set_function(function); *call_expr->mutable_target() = target; @@ -111,33 +125,34 @@ Expr SourceFactory::newReceiverCall(int64_t id, const std::string& function, return expr; } -Expr SourceFactory::newIdent(const antlr4::Token* token, +Expr SourceFactory::NewIdent(const antlr4::Token* token, const std::string& ident_name) { - Expr expr = newExpr(token); + Expr expr = NewExpr(token); expr.mutable_ident_expr()->set_name(ident_name); return expr; } -Expr SourceFactory::newIdentForMacro(int64_t macro_id, +Expr SourceFactory::NewIdentForMacro(int64_t macro_id, const std::string& ident_name) { - Expr expr = newExpr(nextMacroId(macro_id)); + Expr expr = NewExpr(NextMacroId(macro_id)); expr.mutable_ident_expr()->set_name(ident_name); return expr; } -Expr SourceFactory::newSelect( - ::cel_grammar::CelParser::SelectOrCallContext* ctx, Expr& operand, +Expr SourceFactory::NewSelect( + ::cel_parser_internal::CelParser::SelectOrCallContext* ctx, Expr& operand, const std::string& field) { - Expr expr = newExpr(ctx->op); + Expr expr = NewExpr(ctx->op); auto select_expr = expr.mutable_select_expr(); *select_expr->mutable_operand() = operand; select_expr->set_field(field); return expr; } -Expr SourceFactory::newPresenceTestForMacro(int64_t macro_id, const Expr& operand, +Expr SourceFactory::NewPresenceTestForMacro(int64_t macro_id, + const Expr& operand, const std::string& field) { - Expr expr = newExpr(nextMacroId(macro_id)); + Expr expr = NewExpr(NextMacroId(macro_id)); auto select_expr = expr.mutable_select_expr(); *select_expr->mutable_operand() = operand; select_expr->set_field(field); @@ -145,10 +160,10 @@ Expr SourceFactory::newPresenceTestForMacro(int64_t macro_id, const Expr& operan return expr; } -Expr SourceFactory::newObject( +Expr SourceFactory::NewObject( int64_t obj_id, const std::string& type_name, const std::vector& entries) { - auto expr = newExpr(obj_id); + auto expr = NewExpr(obj_id); auto struct_expr = expr.mutable_struct_expr(); struct_expr->set_message_name(type_name); std::for_each(entries.begin(), entries.end(), @@ -158,7 +173,7 @@ Expr SourceFactory::newObject( return expr; } -Expr::CreateStruct::Entry SourceFactory::newObjectField( +Expr::CreateStruct::Entry SourceFactory::NewObjectField( int64_t field_id, const std::string& field, const Expr& value) { Expr::CreateStruct::Entry entry; entry.set_id(field_id); @@ -167,13 +182,13 @@ Expr::CreateStruct::Entry SourceFactory::newObjectField( return entry; } -Expr SourceFactory::newComprehension(int64_t id, const std::string& iter_var, +Expr SourceFactory::NewComprehension(int64_t id, const std::string& iter_var, const Expr& iter_range, const std::string& accu_var, const Expr& accu_init, const Expr& condition, const Expr& step, const Expr& result) { - Expr expr = newExpr(id); + Expr expr = NewExpr(id); auto comp_expr = expr.mutable_comprehension_expr(); comp_expr->set_iter_var(iter_var); *comp_expr->mutable_iter_range() = iter_range; @@ -185,32 +200,32 @@ Expr SourceFactory::newComprehension(int64_t id, const std::string& iter_var, return expr; } -Expr SourceFactory::foldForMacro(int64_t macro_id, const std::string& iter_var, +Expr SourceFactory::FoldForMacro(int64_t macro_id, const std::string& iter_var, const Expr& iter_range, const std::string& accu_var, const Expr& accu_init, const Expr& condition, const Expr& step, const Expr& result) { - return newComprehension(nextMacroId(macro_id), iter_var, iter_range, accu_var, + return NewComprehension(NextMacroId(macro_id), iter_var, iter_range, accu_var, accu_init, condition, step, result); } -Expr SourceFactory::newList(int64_t list_id, const std::vector& elems) { - auto expr = newExpr(list_id); +Expr SourceFactory::NewList(int64_t list_id, const std::vector& elems) { + auto expr = NewExpr(list_id); auto list_expr = expr.mutable_list_expr(); std::for_each(elems.begin(), elems.end(), [list_expr](const Expr& e) { *list_expr->add_elements() = e; }); return expr; } -Expr SourceFactory::newQuantifierExprForMacro( +Expr SourceFactory::NewQuantifierExprForMacro( SourceFactory::QuantifierKind kind, int64_t macro_id, const Expr& target, const std::vector& args) { if (args.empty()) { return Expr(); } if (!args[0].has_ident_expr()) { - auto loc = getSourceLocation(args[0].id()); - return reportError(loc, "argument must be a simple name"); + auto loc = GetSourceLocation(args[0].id()); + return ReportError(loc, "argument must be a simple name"); } std::string v = args[0].ident_expr().name(); @@ -218,7 +233,7 @@ Expr SourceFactory::newQuantifierExprForMacro( const std::string AccumulatorName = "__result__"; auto accu_ident = [this, ¯o_id, &AccumulatorName]() { - return newIdentForMacro(macro_id, AccumulatorName); + return NewIdentForMacro(macro_id, AccumulatorName); }; Expr init; @@ -227,56 +242,62 @@ Expr SourceFactory::newQuantifierExprForMacro( Expr result; switch (kind) { case QUANTIFIER_ALL: - init = newLiteralBoolForMacro(macro_id, true); - condition = newGlobalCallForMacro( + init = NewLiteralBoolForMacro(macro_id, true); + condition = NewGlobalCallForMacro( macro_id, CelOperator::NOT_STRICTLY_FALSE, {accu_ident()}); - step = newGlobalCallForMacro(macro_id, CelOperator::LOGICAL_AND, + step = NewGlobalCallForMacro(macro_id, CelOperator::LOGICAL_AND, {accu_ident(), args[1]}); result = accu_ident(); break; case QUANTIFIER_EXISTS: - init = newLiteralBoolForMacro(macro_id, false); - condition = newGlobalCallForMacro( + init = NewLiteralBoolForMacro(macro_id, false); + condition = NewGlobalCallForMacro( macro_id, CelOperator::NOT_STRICTLY_FALSE, - {newGlobalCallForMacro(macro_id, CelOperator::LOGICAL_NOT, + {NewGlobalCallForMacro(macro_id, CelOperator::LOGICAL_NOT, {accu_ident()})}); - step = newGlobalCallForMacro(macro_id, CelOperator::LOGICAL_OR, + step = NewGlobalCallForMacro(macro_id, CelOperator::LOGICAL_OR, {accu_ident(), args[1]}); result = accu_ident(); break; case QUANTIFIER_EXISTS_ONE: { - Expr zero_expr = newLiteralIntForMacro(macro_id, 0); - Expr one_expr = newLiteralIntForMacro(macro_id, 1); + Expr zero_expr = NewLiteralIntForMacro(macro_id, 0); + Expr one_expr = NewLiteralIntForMacro(macro_id, 1); init = zero_expr; - condition = newLiteralBoolForMacro(macro_id, true); - step = newGlobalCallForMacro( + condition = NewLiteralBoolForMacro(macro_id, true); + step = NewGlobalCallForMacro( macro_id, CelOperator::CONDITIONAL, {args[1], - newGlobalCallForMacro(macro_id, CelOperator::ADD, + NewGlobalCallForMacro(macro_id, CelOperator::ADD, {accu_ident(), one_expr}), accu_ident()}); - result = newGlobalCallForMacro(macro_id, CelOperator::EQUALS, + result = NewGlobalCallForMacro(macro_id, CelOperator::EQUALS, {accu_ident(), one_expr}); break; } } - return foldForMacro(macro_id, v, target, AccumulatorName, init, condition, + return FoldForMacro(macro_id, v, target, AccumulatorName, init, condition, step, result); } Expr SourceFactory::BuildArgForMacroCall(const Expr& expr) { - Expr result_expr; - result_expr.set_id(expr.id()); if (macro_calls_.find(expr.id()) != macro_calls_.end()) { + Expr result_expr; + result_expr.set_id(expr.id()); return result_expr; } // Call expression could have args or sub-args that are also macros found in // macro_calls. if (expr.has_call_expr()) { + Expr result_expr; + result_expr.set_id(expr.id()); auto mutable_expr = result_expr.mutable_call_expr(); mutable_expr->set_function(expr.call_expr().function()); + if (expr.call_expr().has_target()) { + *mutable_expr->mutable_target() = + BuildArgForMacroCall(expr.call_expr().target()); + } for (const auto& arg : expr.call_expr().args()) { // Iterate the AST from `expr` recursively looking for macros. Because we // are at most starting from the top level macro, this recursion is @@ -287,6 +308,17 @@ Expr SourceFactory::BuildArgForMacroCall(const Expr& expr) { } return result_expr; } + if (expr.has_list_expr()) { + Expr result_expr; + result_expr.set_id(expr.id()); + const auto& list_expr = expr.list_expr(); + auto mutable_list_expr = result_expr.mutable_list_expr(); + for (const auto& elem : list_expr.elements()) { + *mutable_list_expr->mutable_elements()->Add() = + BuildArgForMacroCall(elem); + } + return result_expr; + } return expr; } @@ -304,7 +336,7 @@ void SourceFactory::AddMacroCall(int64_t macro_id, const Expr& target, if (macro_calls_.find(target.id()) != macro_calls_.end()) { expr.set_id(target.id()); } else { - expr = target; + expr = BuildArgForMacroCall(target); } *mutable_macro_call->mutable_target() = expr; } @@ -315,14 +347,14 @@ void SourceFactory::AddMacroCall(int64_t macro_id, const Expr& target, macro_calls_.emplace(macro_id, macro_call); } -Expr SourceFactory::newFilterExprForMacro(int64_t macro_id, const Expr& target, +Expr SourceFactory::NewFilterExprForMacro(int64_t macro_id, const Expr& target, const std::vector& args) { if (args.empty()) { return Expr(); } if (!args[0].has_ident_expr()) { - auto loc = getSourceLocation(args[0].id()); - return reportError(loc, "argument is not an identifier"); + auto loc = GetSourceLocation(args[0].id()); + return ReportError(loc, "argument is not an identifier"); } std::string v = args[0].ident_expr().name(); @@ -330,26 +362,26 @@ Expr SourceFactory::newFilterExprForMacro(int64_t macro_id, const Expr& target, const std::string AccumulatorName = "__result__"; Expr filter = args[1]; - Expr accu_expr = newIdentForMacro(macro_id, AccumulatorName); - Expr init = newListForMacro(macro_id, {}); - Expr condition = newLiteralBoolForMacro(macro_id, true); + Expr accu_expr = NewIdentForMacro(macro_id, AccumulatorName); + Expr init = NewListForMacro(macro_id, {}); + Expr condition = NewLiteralBoolForMacro(macro_id, true); Expr step = - newGlobalCallForMacro(macro_id, CelOperator::ADD, - {accu_expr, newListForMacro(macro_id, {args[0]})}); - step = newGlobalCallForMacro(macro_id, CelOperator::CONDITIONAL, + NewGlobalCallForMacro(macro_id, CelOperator::ADD, + {accu_expr, NewListForMacro(macro_id, {args[0]})}); + step = NewGlobalCallForMacro(macro_id, CelOperator::CONDITIONAL, {filter, step, accu_expr}); - return foldForMacro(macro_id, v, target, AccumulatorName, init, condition, + return FoldForMacro(macro_id, v, target, AccumulatorName, init, condition, step, accu_expr); } -Expr SourceFactory::newListForMacro(int64_t macro_id, +Expr SourceFactory::NewListForMacro(int64_t macro_id, const std::vector& elems) { - return newList(nextMacroId(macro_id), elems); + return NewList(NextMacroId(macro_id), elems); } -Expr SourceFactory::newMap( +Expr SourceFactory::NewMap( int64_t map_id, const std::vector& entries) { - auto expr = newExpr(map_id); + auto expr = NewExpr(map_id); auto struct_expr = expr.mutable_struct_expr(); std::for_each(entries.begin(), entries.end(), [struct_expr](const Expr::CreateStruct::Entry& e) { @@ -358,14 +390,14 @@ Expr SourceFactory::newMap( return expr; } -Expr SourceFactory::newMapForMacro(int64_t macro_id, const Expr& target, +Expr SourceFactory::NewMapForMacro(int64_t macro_id, const Expr& target, const std::vector& args) { if (args.empty()) { return Expr(); } if (!args[0].has_ident_expr()) { - auto loc = getSourceLocation(args[0].id()); - return reportError(loc, "argument is not an identifier"); + auto loc = GetSourceLocation(args[0].id()); + return ReportError(loc, "argument is not an identifier"); } std::string v = args[0].ident_expr().name(); @@ -383,20 +415,20 @@ Expr SourceFactory::newMapForMacro(int64_t macro_id, const Expr& target, // traditional variable name assigned to the fold accumulator variable. const std::string AccumulatorName = "__result__"; - Expr accu_expr = newIdentForMacro(macro_id, AccumulatorName); - Expr init = newListForMacro(macro_id, {}); - Expr condition = newLiteralBoolForMacro(macro_id, true); - Expr step = newGlobalCallForMacro( - macro_id, CelOperator::ADD, {accu_expr, newListForMacro(macro_id, {fn})}); + Expr accu_expr = NewIdentForMacro(macro_id, AccumulatorName); + Expr init = NewListForMacro(macro_id, {}); + Expr condition = NewLiteralBoolForMacro(macro_id, true); + Expr step = NewGlobalCallForMacro( + macro_id, CelOperator::ADD, {accu_expr, NewListForMacro(macro_id, {fn})}); if (has_filter) { - step = newGlobalCallForMacro(macro_id, CelOperator::CONDITIONAL, + step = NewGlobalCallForMacro(macro_id, CelOperator::CONDITIONAL, {filter, step, accu_expr}); } - return foldForMacro(macro_id, v, target, AccumulatorName, init, condition, + return FoldForMacro(macro_id, v, target, AccumulatorName, init, condition, step, accu_expr); } -Expr::CreateStruct::Entry SourceFactory::newMapEntry(int64_t entry_id, +Expr::CreateStruct::Entry SourceFactory::NewMapEntry(int64_t entry_id, const Expr& key, const Expr& value) { Expr::CreateStruct::Entry entry; @@ -406,94 +438,96 @@ Expr::CreateStruct::Entry SourceFactory::newMapEntry(int64_t entry_id, return entry; } -Expr SourceFactory::newLiteralInt(antlr4::ParserRuleContext* ctx, int64_t value) { - Expr expr = newExpr(ctx); +Expr SourceFactory::NewLiteralInt(antlr4::ParserRuleContext* ctx, + int64_t value) { + Expr expr = NewExpr(ctx); expr.mutable_const_expr()->set_int64_value(value); return expr; } -Expr SourceFactory::newLiteralIntForMacro(int64_t macro_id, int64_t value) { - Expr expr = newExpr(nextMacroId(macro_id)); +Expr SourceFactory::NewLiteralIntForMacro(int64_t macro_id, int64_t value) { + Expr expr = NewExpr(NextMacroId(macro_id)); expr.mutable_const_expr()->set_int64_value(value); return expr; } -Expr SourceFactory::newLiteralUint(antlr4::ParserRuleContext* ctx, +Expr SourceFactory::NewLiteralUint(antlr4::ParserRuleContext* ctx, uint64_t value) { - Expr expr = newExpr(ctx); + Expr expr = NewExpr(ctx); expr.mutable_const_expr()->set_uint64_value(value); return expr; } -Expr SourceFactory::newLiteralDouble(antlr4::ParserRuleContext* ctx, +Expr SourceFactory::NewLiteralDouble(antlr4::ParserRuleContext* ctx, double value) { - Expr expr = newExpr(ctx); + Expr expr = NewExpr(ctx); expr.mutable_const_expr()->set_double_value(value); return expr; } -Expr SourceFactory::newLiteralString(antlr4::ParserRuleContext* ctx, +Expr SourceFactory::NewLiteralString(antlr4::ParserRuleContext* ctx, const std::string& s) { - Expr expr = newExpr(ctx); + Expr expr = NewExpr(ctx); expr.mutable_const_expr()->set_string_value(s); return expr; } -Expr SourceFactory::newLiteralBytes(antlr4::ParserRuleContext* ctx, +Expr SourceFactory::NewLiteralBytes(antlr4::ParserRuleContext* ctx, const std::string& b) { - Expr expr = newExpr(ctx); + Expr expr = NewExpr(ctx); expr.mutable_const_expr()->set_bytes_value(b); return expr; } -Expr SourceFactory::newLiteralBool(antlr4::ParserRuleContext* ctx, bool b) { - Expr expr = newExpr(ctx); +Expr SourceFactory::NewLiteralBool(antlr4::ParserRuleContext* ctx, bool b) { + Expr expr = NewExpr(ctx); expr.mutable_const_expr()->set_bool_value(b); return expr; } -Expr SourceFactory::newLiteralBoolForMacro(int64_t macro_id, bool b) { - Expr expr = newExpr(nextMacroId(macro_id)); +Expr SourceFactory::NewLiteralBoolForMacro(int64_t macro_id, bool b) { + Expr expr = NewExpr(NextMacroId(macro_id)); expr.mutable_const_expr()->set_bool_value(b); return expr; } -Expr SourceFactory::newLiteralNull(antlr4::ParserRuleContext* ctx) { - Expr expr = newExpr(ctx); +Expr SourceFactory::NewLiteralNull(antlr4::ParserRuleContext* ctx) { + Expr expr = NewExpr(ctx); expr.mutable_const_expr()->set_null_value(::google::protobuf::NULL_VALUE); return expr; } -Expr SourceFactory::reportError(antlr4::ParserRuleContext* ctx, - const std::string& msg) { +Expr SourceFactory::ReportError(antlr4::ParserRuleContext* ctx, + absl::string_view msg) { num_errors_ += 1; - Expr expr = newExpr(ctx); + Expr expr = NewExpr(ctx); if (errors_truncated_.size() < kMaxErrorsToReport) { - errors_truncated_.emplace_back(msg, positions_.at(expr.id())); + errors_truncated_.emplace_back(std::string(msg), positions_.at(expr.id())); } return expr; } -Expr SourceFactory::reportError(int32_t line, int32_t col, const std::string& msg) { +Expr SourceFactory::ReportError(int32_t line, int32_t col, + absl::string_view msg) { num_errors_ += 1; SourceLocation loc(line, col, /*offset_end=*/-1, line_offsets_); if (errors_truncated_.size() < kMaxErrorsToReport) { - errors_truncated_.emplace_back(msg, loc); + errors_truncated_.emplace_back(std::string(msg), loc); } - return newExpr(id(loc)); + return NewExpr(Id(loc)); } -Expr SourceFactory::reportError(const SourceFactory::SourceLocation& loc, - const std::string& msg) { +Expr SourceFactory::ReportError(const SourceFactory::SourceLocation& loc, + absl::string_view msg) { num_errors_ += 1; if (errors_truncated_.size() < kMaxErrorsToReport) { - errors_truncated_.emplace_back(msg, loc); + errors_truncated_.emplace_back(std::string(msg), loc); } - return newExpr(id(loc)); + return NewExpr(Id(loc)); } -std::string SourceFactory::errorMessage(const std::string& description, - const std::string& expression) const { +std::string SourceFactory::ErrorMessage(absl::string_view description, + absl::string_view expression) const { // Errors are collected as they are encountered, not by their location within // the source. To have a more stable error message as implementation // details change, we sort the collected errors by their source location @@ -533,7 +567,7 @@ std::string SourceFactory::errorMessage(const std::string& description, "ERROR: %s:%zu:%zu: %s", description, error->location.line, // add one to the 0-based column error->location.col + 1, error->message); - std::string snippet = getSourceLine(error->location.line, expression); + std::string snippet = GetSourceLine(error->location.line, expression); std::string::size_type pos = 0; while ((pos = snippet.find('\t', pos)) != std::string::npos) { snippet.replace(pos, 1, " "); @@ -554,7 +588,7 @@ std::string SourceFactory::errorMessage(const std::string& description, return absl::StrJoin(messages, "\n"); } -bool SourceFactory::isReserved(const std::string& ident_name) { +bool SourceFactory::IsReserved(absl::string_view ident_name) { static const auto* reserved_words = new absl::flat_hash_set( {"as", "break", "const", "continue", "else", "false", "for", "function", "if", "import", "in", "let", "loop", "package", @@ -562,7 +596,7 @@ bool SourceFactory::isReserved(const std::string& ident_name) { return reserved_words->find(ident_name) != reserved_words->end(); } -google::api::expr::v1alpha1::SourceInfo SourceFactory::sourceInfo() const { +google::api::expr::v1alpha1::SourceInfo SourceFactory::source_info() const { google::api::expr::v1alpha1::SourceInfo source_info; source_info.set_location(""); auto positions = source_info.mutable_positions(); @@ -581,7 +615,7 @@ google::api::expr::v1alpha1::SourceInfo SourceFactory::sourceInfo() const { return source_info; } -EnrichedSourceInfo SourceFactory::enrichedSourceInfo() const { +EnrichedSourceInfo SourceFactory::enriched_source_info() const { std::map> offset; std::for_each( positions_.begin(), positions_.end(), @@ -591,7 +625,7 @@ EnrichedSourceInfo SourceFactory::enrichedSourceInfo() const { return EnrichedSourceInfo(std::move(offset)); } -void SourceFactory::calcLineOffsets(const std::string& expression) { +void SourceFactory::CalcLineOffsets(absl::string_view expression) { std::vector lines = absl::StrSplit(expression, '\n'); int offset = 0; line_offsets_.resize(lines.size()); @@ -601,7 +635,7 @@ void SourceFactory::calcLineOffsets(const std::string& expression) { } } -absl::optional SourceFactory::findLineOffset(int32_t line) const { +absl::optional SourceFactory::FindLineOffset(int32_t line) const { // note that err.line is 1-based, // while we need the 0-based index if (line == 1) { @@ -612,21 +646,19 @@ absl::optional SourceFactory::findLineOffset(int32_t line) const { return {}; } -std::string SourceFactory::getSourceLine(int32_t line, - const std::string& expression) const { - auto char_start = findLineOffset(line); +std::string SourceFactory::GetSourceLine(int32_t line, + absl::string_view expression) const { + auto char_start = FindLineOffset(line); if (!char_start) { return ""; } - auto char_end = findLineOffset(line + 1); + auto char_end = FindLineOffset(line + 1); if (char_end) { - return expression.substr(*char_start, *char_end - *char_end - 1); + return std::string( + expression.substr(*char_start, *char_end - *char_end - 1)); } else { - return expression.substr(*char_start); + return std::string(expression.substr(*char_start)); } } -} // namespace parser -} // namespace expr -} // namespace api -} // namespace google +} // namespace google::api::expr::parser diff --git a/parser/source_factory.h b/parser/source_factory.h index 09744f94f..a9fe01a6e 100644 --- a/parser/source_factory.h +++ b/parser/source_factory.h @@ -1,25 +1,39 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef THIRD_PARTY_CEL_CPP_PARSER_SOURCE_FACTORY_H_ #define THIRD_PARTY_CEL_CPP_PARSER_SOURCE_FACTORY_H_ +#include #include #include #include #include "google/api/expr/v1alpha1/syntax.pb.h" +#include "absl/strings/string_view.h" #include "absl/types/optional.h" -#include "parser/cel_grammar.inc/cel_grammar/CelParser.h" #include "antlr4-runtime.h" +#include "parser/internal/CelParser.h" -namespace google { -namespace api { -namespace expr { -namespace parser { +namespace google::api::expr::parser { using google::api::expr::v1alpha1::Expr; class EnrichedSourceInfo { public: - EnrichedSourceInfo(std::map> offsets) + explicit EnrichedSourceInfo( + std::map> offsets) : offsets_(std::move(offsets)) {} const std::map>& offsets() const { @@ -67,90 +81,90 @@ class SourceFactory { QUANTIFIER_EXISTS_ONE }; - SourceFactory(const std::string& expression); + explicit SourceFactory(absl::string_view expression); - int64_t id(const antlr4::Token* token); - int64_t id(antlr4::ParserRuleContext* ctx); - int64_t id(const SourceLocation& location); + int64_t Id(const antlr4::Token* token); + int64_t Id(antlr4::ParserRuleContext* ctx); + int64_t Id(const SourceLocation& location); - int64_t nextMacroId(int64_t macro_id); + int64_t NextMacroId(int64_t macro_id); - const SourceLocation& getSourceLocation(int64_t id) const; + const SourceLocation& GetSourceLocation(int64_t id) const; - static const SourceLocation noLocation(); + static const SourceLocation NoLocation(); - Expr newExpr(int64_t id); - Expr newExpr(antlr4::ParserRuleContext* ctx); - Expr newExpr(const antlr4::Token* token); - Expr newGlobalCall(int64_t id, const std::string& function, + Expr NewExpr(int64_t id); + Expr NewExpr(antlr4::ParserRuleContext* ctx); + Expr NewExpr(const antlr4::Token* token); + Expr NewGlobalCall(int64_t id, const std::string& function, const std::vector& args); - Expr newGlobalCallForMacro(int64_t macro_id, const std::string& function, + Expr NewGlobalCallForMacro(int64_t macro_id, const std::string& function, const std::vector& args); - Expr newReceiverCall(int64_t id, const std::string& function, + Expr NewReceiverCall(int64_t id, const std::string& function, const Expr& target, const std::vector& args); - Expr newIdent(const antlr4::Token* token, const std::string& ident_name); - Expr newIdentForMacro(int64_t macro_id, const std::string& ident_name); - Expr newSelect(::cel_grammar::CelParser::SelectOrCallContext* ctx, + Expr NewIdent(const antlr4::Token* token, const std::string& ident_name); + Expr NewIdentForMacro(int64_t macro_id, const std::string& ident_name); + Expr NewSelect(::cel_parser_internal::CelParser::SelectOrCallContext* ctx, Expr& operand, const std::string& field); - Expr newPresenceTestForMacro(int64_t macro_id, const Expr& operand, + Expr NewPresenceTestForMacro(int64_t macro_id, const Expr& operand, const std::string& field); - Expr newObject(int64_t obj_id, const std::string& type_name, + Expr NewObject(int64_t obj_id, const std::string& type_name, const std::vector& entries); - Expr::CreateStruct::Entry newObjectField(int64_t field_id, + Expr::CreateStruct::Entry NewObjectField(int64_t field_id, const std::string& field, const Expr& value); - Expr newComprehension(int64_t id, const std::string& iter_var, + Expr NewComprehension(int64_t id, const std::string& iter_var, const Expr& iter_range, const std::string& accu_var, const Expr& accu_init, const Expr& condition, const Expr& step, const Expr& result); - Expr foldForMacro(int64_t macro_id, const std::string& iter_var, + Expr FoldForMacro(int64_t macro_id, const std::string& iter_var, const Expr& iter_range, const std::string& accu_var, const Expr& accu_init, const Expr& condition, const Expr& step, const Expr& result); - Expr newQuantifierExprForMacro(QuantifierKind kind, int64_t macro_id, + Expr NewQuantifierExprForMacro(QuantifierKind kind, int64_t macro_id, const Expr& target, const std::vector& args); - Expr newFilterExprForMacro(int64_t macro_id, const Expr& target, + Expr NewFilterExprForMacro(int64_t macro_id, const Expr& target, const std::vector& args); - Expr newList(int64_t list_id, const std::vector& elems); - Expr newListForMacro(int64_t macro_id, const std::vector& elems); - Expr newMap(int64_t map_id, + Expr NewList(int64_t list_id, const std::vector& elems); + Expr NewListForMacro(int64_t macro_id, const std::vector& elems); + Expr NewMap(int64_t map_id, const std::vector& entries); - Expr newMapForMacro(int64_t macro_id, const Expr& target, + Expr NewMapForMacro(int64_t macro_id, const Expr& target, const std::vector& args); - Expr::CreateStruct::Entry newMapEntry(int64_t entry_id, const Expr& key, + Expr::CreateStruct::Entry NewMapEntry(int64_t entry_id, const Expr& key, const Expr& value); - Expr newLiteralInt(antlr4::ParserRuleContext* ctx, int64_t value); - Expr newLiteralIntForMacro(int64_t macro_id, int64_t value); - Expr newLiteralUint(antlr4::ParserRuleContext* ctx, uint64_t value); - Expr newLiteralDouble(antlr4::ParserRuleContext* ctx, double value); - Expr newLiteralString(antlr4::ParserRuleContext* ctx, const std::string& s); - Expr newLiteralBytes(antlr4::ParserRuleContext* ctx, const std::string& b); - Expr newLiteralBool(antlr4::ParserRuleContext* ctx, bool b); - Expr newLiteralBoolForMacro(int64_t macro_id, bool b); - Expr newLiteralNull(antlr4::ParserRuleContext* ctx); - - Expr reportError(antlr4::ParserRuleContext* ctx, const std::string& msg); - Expr reportError(int32_t line, int32_t col, const std::string& msg); - Expr reportError(const SourceLocation& loc, const std::string& msg); - - bool isReserved(const std::string& ident_name); - google::api::expr::v1alpha1::SourceInfo sourceInfo() const; - EnrichedSourceInfo enrichedSourceInfo() const; + Expr NewLiteralInt(antlr4::ParserRuleContext* ctx, int64_t value); + Expr NewLiteralIntForMacro(int64_t macro_id, int64_t value); + Expr NewLiteralUint(antlr4::ParserRuleContext* ctx, uint64_t value); + Expr NewLiteralDouble(antlr4::ParserRuleContext* ctx, double value); + Expr NewLiteralString(antlr4::ParserRuleContext* ctx, const std::string& s); + Expr NewLiteralBytes(antlr4::ParserRuleContext* ctx, const std::string& b); + Expr NewLiteralBool(antlr4::ParserRuleContext* ctx, bool b); + Expr NewLiteralBoolForMacro(int64_t macro_id, bool b); + Expr NewLiteralNull(antlr4::ParserRuleContext* ctx); + + Expr ReportError(antlr4::ParserRuleContext* ctx, absl::string_view msg); + Expr ReportError(int32_t line, int32_t col, absl::string_view msg); + Expr ReportError(const SourceLocation& loc, absl::string_view msg); + + bool IsReserved(absl::string_view ident_name); + google::api::expr::v1alpha1::SourceInfo source_info() const; + EnrichedSourceInfo enriched_source_info() const; const std::vector& errors() const { return errors_truncated_; } - std::string errorMessage(const std::string& description, - const std::string& expression) const; + std::string ErrorMessage(absl::string_view description, + absl::string_view expression) const; Expr BuildArgForMacroCall(const Expr& expr); void AddMacroCall(int64_t macro_id, const Expr& target, const std::vector& args, std::string function); private: - void calcLineOffsets(const std::string& expression); - absl::optional findLineOffset(int32_t line) const; - std::string getSourceLine(int32_t line, const std::string& expression) const; + void CalcLineOffsets(absl::string_view expression); + absl::optional FindLineOffset(int32_t line) const; + std::string GetSourceLine(int32_t line, absl::string_view expression) const; private: int64_t next_id_; @@ -162,9 +176,6 @@ class SourceFactory { std::map macro_calls_; }; -} // namespace parser -} // namespace expr -} // namespace api -} // namespace google +} // namespace google::api::expr::parser #endif // THIRD_PARTY_CEL_CPP_PARSER_SOURCE_FACTORY_H_ diff --git a/parser/visitor.cc b/parser/visitor.cc deleted file mode 100644 index b793f7c8a..000000000 --- a/parser/visitor.cc +++ /dev/null @@ -1,606 +0,0 @@ -#include "parser/visitor.h" - -#include -#include - -#include "google/protobuf/struct.pb.h" -#include "absl/memory/memory.h" -#include "absl/strings/match.h" -#include "absl/strings/numbers.h" -#include "absl/strings/str_format.h" -#include "absl/strings/str_join.h" -#include "common/escaping.h" -#include "common/operators.h" -#include "parser/balancer.h" -#include "parser/source_factory.h" - -namespace google { -namespace api { -namespace expr { -namespace parser { -namespace { - -using common::CelOperator; -using common::ReverseLookupOperator; - -using ::cel_grammar::CelParser; -using google::api::expr::v1alpha1::Expr; - -// Scoped helper for incrementing the parse recursion count. -// Increments on creation, decrements on destruction (stack unwind). -class ScopedIncrement { - public: - explicit ScopedIncrement(int& recursion_depth) - : recursion_depth_(recursion_depth) { - ++recursion_depth_; - } - - ~ScopedIncrement() { --recursion_depth_; } - - private: - int& recursion_depth_; -}; - -} // namespace - -ParserVisitor::ParserVisitor(const std::string& description, - const std::string& expression, - const int max_recursion_depth, - const std::vector& macros, - const bool add_macro_calls) - : description_(description), - expression_(expression), - sf_(std::make_shared(expression)), - recursion_depth_(0), - max_recursion_depth_(max_recursion_depth), - add_macro_calls_(add_macro_calls) { - for (const auto& m : macros) { - macros_.emplace(m.macroKey(), m); - } -} - -ParserVisitor::~ParserVisitor() {} - -template ::value>> -T* tree_as(antlr4::tree::ParseTree* tree) { - return dynamic_cast(tree); -} - -antlrcpp::Any ParserVisitor::visit(antlr4::tree::ParseTree* tree) { - ScopedIncrement inc(recursion_depth_); - if (recursion_depth_ > max_recursion_depth_) { - return sf_->reportError( - SourceFactory::noLocation(), - absl::StrFormat("Exceeded max recursion depth of %d when parsing.", - max_recursion_depth_)); - } - if (auto* ctx = tree_as(tree)) { - return visitStart(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitExpr(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitConditionalAnd(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitConditionalOr(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitRelation(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitCalc(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitLogicalNot(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitPrimaryExpr(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitMemberExpr(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitSelectOrCall(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitMapInitializerList(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitNegate(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitIndex(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitUnary(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitCreateList(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitCreateMessage(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitCreateStruct(ctx); - } - - if (tree) { - return sf_->reportError(tree_as(tree), - "unknown parsetree type"); - } - return sf_->reportError(SourceFactory::noLocation(), "<> parsetree"); -} - -antlrcpp::Any ParserVisitor::visitPrimaryExpr( - CelParser::PrimaryExprContext* pctx) { - CelParser::PrimaryContext* primary = pctx->primary(); - if (auto* ctx = tree_as(primary)) { - return visitNested(ctx); - } else if (auto* ctx = - tree_as(primary)) { - return visitIdentOrGlobalCall(ctx); - } else if (auto* ctx = tree_as(primary)) { - return visitCreateList(ctx); - } else if (auto* ctx = tree_as(primary)) { - return visitCreateStruct(ctx); - } else if (auto* ctx = tree_as(primary)) { - return visitConstantLiteral(ctx); - } - return sf_->reportError(pctx, "invalid primary expression"); -} - -antlrcpp::Any ParserVisitor::visitMemberExpr( - CelParser::MemberExprContext* mctx) { - CelParser::MemberContext* member = mctx->member(); - if (auto* ctx = tree_as(member)) { - return visitPrimaryExpr(ctx); - } else if (auto* ctx = tree_as(member)) { - return visitSelectOrCall(ctx); - } else if (auto* ctx = tree_as(member)) { - return visitIndex(ctx); - } else if (auto* ctx = tree_as(member)) { - return visitCreateMessage(ctx); - } - return sf_->reportError(mctx, "unsupported simple expression"); -} - -antlrcpp::Any ParserVisitor::visitStart(CelParser::StartContext* ctx) { - return visit(ctx->expr()); -} - -antlrcpp::Any ParserVisitor::visitExpr(CelParser::ExprContext* ctx) { - auto result = visit(ctx->e); - if (!ctx->op) { - return result; - } - int64_t op_id = sf_->id(ctx->op); - Expr if_true = visit(ctx->e1); - Expr if_false = visit(ctx->e2); - - return globalCallOrMacro(op_id, CelOperator::CONDITIONAL, - {result, if_true, if_false}); -} - -antlrcpp::Any ParserVisitor::visitConditionalOr( - CelParser::ConditionalOrContext* ctx) { - auto result = visit(ctx->e); - if (ctx->ops.empty()) { - return result; - } - ExpressionBalancer b(sf_, CelOperator::LOGICAL_OR, result); - for (size_t i = 0; i < ctx->ops.size(); ++i) { - auto op = ctx->ops[i]; - if (i >= ctx->e1.size()) { - return sf_->reportError(ctx, "unexpected character, wanted '||'"); - } - auto next = visit(ctx->e1[i]).as(); - int64_t op_id = sf_->id(op); - b.addTerm(op_id, next); - } - return b.balance(); -} - -antlrcpp::Any ParserVisitor::visitConditionalAnd( - CelParser::ConditionalAndContext* ctx) { - auto result = visit(ctx->e); - if (ctx->ops.empty()) { - return result; - } - ExpressionBalancer b(sf_, CelOperator::LOGICAL_AND, result); - for (size_t i = 0; i < ctx->ops.size(); ++i) { - auto op = ctx->ops[i]; - if (i >= ctx->e1.size()) { - return sf_->reportError(ctx, "unexpected character, wanted '&&'"); - } - auto next = visit(ctx->e1[i]).as(); - int64_t op_id = sf_->id(op); - b.addTerm(op_id, next); - } - return b.balance(); -} - -antlrcpp::Any ParserVisitor::visitRelation(CelParser::RelationContext* ctx) { - if (ctx->calc()) { - return visit(ctx->calc()); - } - std::string op_text; - if (ctx->op) { - op_text = ctx->op->getText(); - } - auto op = ReverseLookupOperator(op_text); - if (op) { - auto lhs = visit(ctx->relation(0)).as(); - int64_t op_id = sf_->id(ctx->op); - auto rhs = visit(ctx->relation(1)).as(); - return globalCallOrMacro(op_id, *op, {lhs, rhs}); - } - return sf_->reportError(ctx, "operator not found"); -} - -antlrcpp::Any ParserVisitor::visitCalc(CelParser::CalcContext* ctx) { - if (ctx->unary()) { - return visit(ctx->unary()); - } - std::string op_text; - if (ctx->op) { - op_text = ctx->op->getText(); - } - auto op = ReverseLookupOperator(op_text); - if (op) { - auto lhs = visit(ctx->calc(0)).as(); - int64_t op_id = sf_->id(ctx->op); - auto rhs = visit(ctx->calc(1)).as(); - return globalCallOrMacro(op_id, *op, {lhs, rhs}); - } - return sf_->reportError(ctx, "operator not found"); -} - -antlrcpp::Any ParserVisitor::visitUnary(CelParser::UnaryContext* ctx) { - return sf_->newLiteralString(ctx, "<>"); -} - -antlrcpp::Any ParserVisitor::visitLogicalNot( - CelParser::LogicalNotContext* ctx) { - if (ctx->ops.size() % 2 == 0) { - return visit(ctx->member()); - } - int64_t op_id = sf_->id(ctx->ops[0]); - auto target = visit(ctx->member()); - return globalCallOrMacro(op_id, CelOperator::LOGICAL_NOT, {target}); -} - -antlrcpp::Any ParserVisitor::visitNegate(CelParser::NegateContext* ctx) { - if (ctx->ops.size() % 2 == 0) { - return visit(ctx->member()); - } - int64_t op_id = sf_->id(ctx->ops[0]); - auto target = visit(ctx->member()); - return globalCallOrMacro(op_id, CelOperator::NEGATE, {target}); -} - -antlrcpp::Any ParserVisitor::visitSelectOrCall( - CelParser::SelectOrCallContext* ctx) { - auto operand = visit(ctx->member()).as(); - // Handle the error case where no valid identifier is specified. - if (!ctx->id) { - return sf_->newExpr(ctx); - } - auto id = ctx->id->getText(); - if (ctx->open) { - int64_t op_id = sf_->id(ctx->open); - return receiverCallOrMacro(op_id, id, operand, visitList(ctx->args)); - } - return sf_->newSelect(ctx, operand, id); -} - -antlrcpp::Any ParserVisitor::visitIndex(CelParser::IndexContext* ctx) { - auto target = visit(ctx->member()).as(); - int64_t op_id = sf_->id(ctx->op); - auto index = visit(ctx->index).as(); - return globalCallOrMacro(op_id, CelOperator::INDEX, {target, index}); -} - -antlrcpp::Any ParserVisitor::visitCreateMessage( - CelParser::CreateMessageContext* ctx) { - auto target = visit(ctx->member()).as(); - int64_t obj_id = sf_->id(ctx->op); - std::string message_name = extractQualifiedName(ctx, &target); - if (!message_name.empty()) { - auto entries = visitFieldInitializerList(ctx->entries) - .as>(); - return sf_->newObject(obj_id, message_name, entries); - } else { - return sf_->newExpr(obj_id); - } -} - -antlrcpp::Any ParserVisitor::visitFieldInitializerList( - CelParser::FieldInitializerListContext* ctx) { - std::vector res; - if (!ctx || ctx->fields.empty()) { - return res; - } - - res.resize(ctx->fields.size()); - for (size_t i = 0; i < ctx->fields.size(); ++i) { - if (i >= ctx->cols.size() || i >= ctx->values.size()) { - // This is the result of a syntax error detected elsewhere. - return res; - } - const auto& f = ctx->fields[i]; - int64_t init_id = sf_->id(ctx->cols[i]); - auto value = visit(ctx->values[i]).as(); - auto field = sf_->newObjectField(init_id, f->getText(), value); - res[i] = field; - } - - return res; -} - -antlrcpp::Any ParserVisitor::visitIdentOrGlobalCall( - CelParser::IdentOrGlobalCallContext* ctx) { - std::string ident_name; - if (ctx->leadingDot) { - ident_name = "."; - } - if (!ctx->id) { - return sf_->newExpr(ctx); - } - if (sf_->isReserved(ctx->id->getText())) { - return sf_->reportError( - ctx, absl::StrFormat("reserved identifier: %s", ctx->id->getText())); - } - // check if ID is in reserved identifiers - ident_name += ctx->id->getText(); - if (ctx->op) { - int64_t op_id = sf_->id(ctx->op); - return globalCallOrMacro(op_id, ident_name, visitList(ctx->args)); - } - return sf_->newIdent(ctx->id, ident_name); -} - -antlrcpp::Any ParserVisitor::visitNested(CelParser::NestedContext* ctx) { - return visit(ctx->e); -} - -antlrcpp::Any ParserVisitor::visitCreateList( - CelParser::CreateListContext* ctx) { - int64_t list_id = sf_->id(ctx->op); - return sf_->newList(list_id, visitList(ctx->elems)); -} - -std::vector ParserVisitor::visitList(CelParser::ExprListContext* ctx) { - std::vector rv; - if (!ctx) return rv; - std::transform(ctx->e.begin(), ctx->e.end(), std::back_inserter(rv), - [this](CelParser::ExprContext* expr_ctx) { - return visitExpr(expr_ctx).as(); - }); - return rv; -} - -antlrcpp::Any ParserVisitor::visitCreateStruct( - CelParser::CreateStructContext* ctx) { - int64_t struct_id = sf_->id(ctx->op); - std::vector entries; - if (ctx->entries) { - entries = visitMapInitializerList(ctx->entries) - .as>(); - } - return sf_->newMap(struct_id, entries); -} - -antlrcpp::Any ParserVisitor::visitConstantLiteral( - CelParser::ConstantLiteralContext* clctx) { - CelParser::LiteralContext* literal = clctx->literal(); - if (auto* ctx = tree_as(literal)) { - return visitInt(ctx); - } else if (auto* ctx = tree_as(literal)) { - return visitUint(ctx); - } else if (auto* ctx = tree_as(literal)) { - return visitDouble(ctx); - } else if (auto* ctx = tree_as(literal)) { - return visitString(ctx); - } else if (auto* ctx = tree_as(literal)) { - return visitBytes(ctx); - } else if (auto* ctx = tree_as(literal)) { - return visitBoolFalse(ctx); - } else if (auto* ctx = tree_as(literal)) { - return visitBoolTrue(ctx); - } else if (auto* ctx = tree_as(literal)) { - return visitNull(ctx); - } - return sf_->reportError(clctx, "invalid constant literal expression"); -} - -antlrcpp::Any ParserVisitor::visitMapInitializerList( - CelParser::MapInitializerListContext* ctx) { - std::vector res; - if (!ctx || ctx->keys.empty()) { - return res; - } - - res.resize(ctx->cols.size()); - for (size_t i = 0; i < ctx->cols.size(); ++i) { - int64_t col_id = sf_->id(ctx->cols[i]); - auto key = visit(ctx->keys[i]); - auto value = visit(ctx->values[i]); - res[i] = sf_->newMapEntry(col_id, key, value); - } - return res; -} - -antlrcpp::Any ParserVisitor::visitInt(CelParser::IntContext* ctx) { - std::string value; - if (ctx->sign) { - value = ctx->sign->getText(); - } - int base = 10; - if (absl::StartsWith(ctx->tok->getText(), "0x")) { - base = 16; - } - value += ctx->tok->getText(); - int64_t int_value; - if (absl::numbers_internal::safe_strto64_base(value, &int_value, base)) { - return sf_->newLiteralInt(ctx, int_value); - } else { - return sf_->reportError(ctx, "invalid int literal"); - } -} - -antlrcpp::Any ParserVisitor::visitUint(CelParser::UintContext* ctx) { - std::string value = ctx->tok->getText(); - // trim the 'u' designator included in the uint literal. - if (!value.empty()) { - value.resize(value.size() - 1); - } - int base = 10; - if (absl::StartsWith(ctx->tok->getText(), "0x")) { - base = 16; - } - uint64_t uint_value; - if (absl::numbers_internal::safe_strtou64_base(value, &uint_value, base)) { - return sf_->newLiteralUint(ctx, uint_value); - } else { - return sf_->reportError(ctx, "invalid uint literal"); - } -} - -antlrcpp::Any ParserVisitor::visitDouble(CelParser::DoubleContext* ctx) { - std::string value; - if (ctx->sign) { - value = ctx->sign->getText(); - } - value += ctx->tok->getText(); - double double_value; - if (absl::SimpleAtod(value, &double_value)) { - return sf_->newLiteralDouble(ctx, double_value); - } else { - return sf_->reportError(ctx, "invalid double literal"); - } -} - -antlrcpp::Any ParserVisitor::visitString(CelParser::StringContext* ctx) { - std::string value = unquote(ctx, ctx->tok->getText(), /* is bytes */ false); - return sf_->newLiteralString(ctx, value); -} - -antlrcpp::Any ParserVisitor::visitBytes(CelParser::BytesContext* ctx) { - std::string value = unquote(ctx, ctx->tok->getText().substr(1), - /* is bytes */ true); - return sf_->newLiteralBytes(ctx, value); -} - -antlrcpp::Any ParserVisitor::visitBoolTrue(CelParser::BoolTrueContext* ctx) { - return sf_->newLiteralBool(ctx, true); -} - -antlrcpp::Any ParserVisitor::visitBoolFalse(CelParser::BoolFalseContext* ctx) { - return sf_->newLiteralBool(ctx, false); -} - -antlrcpp::Any ParserVisitor::visitNull(CelParser::NullContext* ctx) { - return sf_->newLiteralNull(ctx); -} - -google::api::expr::v1alpha1::SourceInfo ParserVisitor::sourceInfo() const { - return sf_->sourceInfo(); -} - -EnrichedSourceInfo ParserVisitor::enrichedSourceInfo() const { - return sf_->enrichedSourceInfo(); -} - -void ParserVisitor::syntaxError(antlr4::Recognizer* recognizer, - antlr4::Token* offending_symbol, size_t line, - size_t col, const std::string& msg, - std::exception_ptr e) { - sf_->reportError(line, col, "Syntax error: " + msg); -} - -bool ParserVisitor::hasErrored() const { return !sf_->errors().empty(); } - -std::string ParserVisitor::errorMessage() const { - return sf_->errorMessage(description_, expression_); -} - -Expr ParserVisitor::globalCallOrMacro(int64_t expr_id, - const std::string& function, - const std::vector& args) { - Expr macro_expr; - if (expandMacro(expr_id, function, Expr::default_instance(), args, - ¯o_expr)) { - return macro_expr; - } - - return sf_->newGlobalCall(expr_id, function, args); -} - -Expr ParserVisitor::receiverCallOrMacro(int64_t expr_id, - const std::string& function, - const Expr& target, - const std::vector& args) { - Expr macro_expr; - if (expandMacro(expr_id, function, target, args, ¯o_expr)) { - return macro_expr; - } - - return sf_->newReceiverCall(expr_id, function, target, args); -} - -bool ParserVisitor::expandMacro(int64_t expr_id, const std::string& function, - const Expr& target, - const std::vector& args, - Expr* macro_expr) { - std::string macro_key = absl::StrFormat("%s:%d:%s", function, args.size(), - target.id() != 0 ? "true" : "false"); - auto m = macros_.find(macro_key); - if (m == macros_.end()) { - std::string var_arg_macro_key = absl::StrFormat( - "%s:*:%s", function, target.id() != 0 ? "true" : "false"); - m = macros_.find(var_arg_macro_key); - if (m == macros_.end()) { - return false; - } - } - - Expr expr = m->second.expand(sf_, expr_id, target, args); - if (expr.expr_kind_case() != Expr::EXPR_KIND_NOT_SET) { - *macro_expr = std::move(expr); - if (add_macro_calls_) { - // If the macro is nested, the full expression id is used as an argument - // id in the tree. Using this ID instead of expr_id allows argument id - // lookups in macro_calls when building the map and iterating - // the AST. - sf_->AddMacroCall(macro_expr->id(), target, args, function); - } - return true; - } - return false; -} - -std::string ParserVisitor::unquote(antlr4::ParserRuleContext* ctx, - const std::string& s, bool is_bytes) { - auto text = unescape(s, is_bytes); - if (!text) { - sf_->reportError(ctx, "failed to unquote"); - return s; - } - return *text; -} - -std::string ParserVisitor::extractQualifiedName(antlr4::ParserRuleContext* ctx, - const Expr* e) { - if (!e) { - return ""; - } - - switch (e->expr_kind_case()) { - case Expr::kIdentExpr: - return e->ident_expr().name(); - case Expr::kSelectExpr: { - auto& s = e->select_expr(); - std::string prefix = extractQualifiedName(ctx, &s.operand()); - if (!prefix.empty()) { - return prefix + "." + s.field(); - } - } break; - default: - break; - } - sf_->reportError(sf_->getSourceLocation(e->id()), - "expected a qualified name"); - return ""; -} - -} // namespace parser -} // namespace expr -} // namespace api -} // namespace google diff --git a/parser/visitor.h b/parser/visitor.h deleted file mode 100644 index 7df91c099..000000000 --- a/parser/visitor.h +++ /dev/null @@ -1,120 +0,0 @@ -#ifndef THIRD_PARTY_CEL_CPP_PARSER_VISITOR_H_ -#define THIRD_PARTY_CEL_CPP_PARSER_VISITOR_H_ - -#include "google/api/expr/v1alpha1/syntax.pb.h" -#include "absl/types/optional.h" -#include "parser/cel_grammar.inc/cel_grammar/CelBaseVisitor.h" -#include "parser/macro.h" -#include "parser/source_factory.h" - -namespace google { -namespace api { -namespace expr { -namespace parser { - -class SourceFactory; - -class ParserVisitor : public ::cel_grammar::CelBaseVisitor, - public antlr4::BaseErrorListener { - public: - ParserVisitor(const std::string& description, const std::string& expression, - const int max_recursion_depth, - const std::vector& macros = {}, - const bool add_macro_calls = false); - virtual ~ParserVisitor(); - - antlrcpp::Any visit(antlr4::tree::ParseTree* tree) override; - - antlrcpp::Any visitStart( - ::cel_grammar::CelParser::StartContext* ctx) override; - antlrcpp::Any visitExpr(::cel_grammar::CelParser::ExprContext* ctx) override; - antlrcpp::Any visitConditionalOr( - ::cel_grammar::CelParser::ConditionalOrContext* ctx) override; - antlrcpp::Any visitConditionalAnd( - ::cel_grammar::CelParser::ConditionalAndContext* ctx) override; - antlrcpp::Any visitRelation( - ::cel_grammar::CelParser::RelationContext* ctx) override; - antlrcpp::Any visitCalc(::cel_grammar::CelParser::CalcContext* ctx) override; - antlrcpp::Any visitUnary(::cel_grammar::CelParser::UnaryContext* ctx); - antlrcpp::Any visitLogicalNot( - ::cel_grammar::CelParser::LogicalNotContext* ctx) override; - antlrcpp::Any visitNegate( - ::cel_grammar::CelParser::NegateContext* ctx) override; - antlrcpp::Any visitSelectOrCall( - ::cel_grammar::CelParser::SelectOrCallContext* ctx) override; - antlrcpp::Any visitIndex( - ::cel_grammar::CelParser::IndexContext* ctx) override; - antlrcpp::Any visitCreateMessage( - ::cel_grammar::CelParser::CreateMessageContext* ctx) override; - antlrcpp::Any visitFieldInitializerList( - ::cel_grammar::CelParser::FieldInitializerListContext* ctx) override; - antlrcpp::Any visitIdentOrGlobalCall( - ::cel_grammar::CelParser::IdentOrGlobalCallContext* ctx) override; - antlrcpp::Any visitNested( - ::cel_grammar::CelParser::NestedContext* ctx) override; - antlrcpp::Any visitCreateList( - ::cel_grammar::CelParser::CreateListContext* ctx) override; - std::vector visitList( - ::cel_grammar::CelParser::ExprListContext* ctx); - antlrcpp::Any visitCreateStruct( - ::cel_grammar::CelParser::CreateStructContext* ctx) override; - antlrcpp::Any visitConstantLiteral( - ::cel_grammar::CelParser::ConstantLiteralContext* ctx) override; - antlrcpp::Any visitPrimaryExpr( - ::cel_grammar::CelParser::PrimaryExprContext* ctx) override; - antlrcpp::Any visitMemberExpr( - ::cel_grammar::CelParser::MemberExprContext* ctx) override; - - antlrcpp::Any visitMapInitializerList( - ::cel_grammar::CelParser::MapInitializerListContext* ctx) override; - antlrcpp::Any visitInt(::cel_grammar::CelParser::IntContext* ctx) override; - antlrcpp::Any visitUint(::cel_grammar::CelParser::UintContext* ctx) override; - antlrcpp::Any visitDouble( - ::cel_grammar::CelParser::DoubleContext* ctx) override; - antlrcpp::Any visitString( - ::cel_grammar::CelParser::StringContext* ctx) override; - antlrcpp::Any visitBytes( - ::cel_grammar::CelParser::BytesContext* ctx) override; - antlrcpp::Any visitBoolTrue( - ::cel_grammar::CelParser::BoolTrueContext* ctx) override; - antlrcpp::Any visitBoolFalse( - ::cel_grammar::CelParser::BoolFalseContext* ctx) override; - antlrcpp::Any visitNull(::cel_grammar::CelParser::NullContext* ctx) override; - google::api::expr::v1alpha1::SourceInfo sourceInfo() const; - EnrichedSourceInfo enrichedSourceInfo() const; - void syntaxError(antlr4::Recognizer* recognizer, - antlr4::Token* offending_symbol, size_t line, size_t col, - const std::string& msg, std::exception_ptr e) override; - bool hasErrored() const; - - std::string errorMessage() const; - - private: - Expr globalCallOrMacro(int64_t expr_id, const std::string& function, - const std::vector& args); - Expr receiverCallOrMacro(int64_t expr_id, const std::string& function, - const Expr& target, const std::vector& args); - bool expandMacro(int64_t expr_id, const std::string& function, - const Expr& target, const std::vector& args, - Expr* macro_expr); - std::string unquote(antlr4::ParserRuleContext* ctx, const std::string& s, - bool is_bytes); - std::string extractQualifiedName(antlr4::ParserRuleContext* ctx, - const Expr* e); - - private: - std::string description_; - std::string expression_; - std::shared_ptr sf_; - std::map macros_; - int recursion_depth_; - const int max_recursion_depth_; - const bool add_macro_calls_; -}; - -} // namespace parser -} // namespace expr -} // namespace api -} // namespace google - -#endif // THIRD_PARTY_CEL_CPP_PARSER_VISITOR_H_ diff --git a/testutil/BUILD b/testutil/BUILD index c13f0f150..75763d7c8 100644 --- a/testutil/BUILD +++ b/testutil/BUILD @@ -1,7 +1,16 @@ -# Description -# Test utilities for cpp CEL. +# Copyright 2021 Google LLC # -# Uses the namespace google::api::expr::testutil. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. package(default_visibility = ["//visibility:public"]) @@ -12,7 +21,7 @@ cc_library( srcs = ["expr_printer.cc"], hdrs = ["expr_printer.h"], deps = [ - "//common:escaping", + "//internal:strings", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_googleapis//google/api/expr/v1alpha1:syntax_cc_proto", @@ -27,6 +36,7 @@ cc_library( ], deps = [ "//internal:testing", + "@com_google_absl//absl/strings", "@com_google_protobuf//:protobuf", ], ) diff --git a/testutil/expr_printer.cc b/testutil/expr_printer.cc index 8b618ede3..695b9cfa1 100644 --- a/testutil/expr_printer.cc +++ b/testutil/expr_printer.cc @@ -1,9 +1,24 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "testutil/expr_printer.h" +#include #include #include "absl/strings/str_format.h" -#include "common/escaping.h" +#include "internal/strings.h" namespace google { namespace api { @@ -240,7 +255,7 @@ class Writer { case google::api::expr::v1alpha1::Constant::kBoolValue: return absl::StrFormat("%s", c.bool_value() ? "true" : "false"); case google::api::expr::v1alpha1::Constant::kBytesValue: - return absl::StrFormat("b\"%s\"", c.bytes_value()); + return cel::internal::FormatDoubleQuotedBytesLiteral(c.bytes_value()); case google::api::expr::v1alpha1::Constant::kDoubleValue: { std::string s = absl::StrFormat("%f", c.double_value()); // remove trailing zeros, i.e., convert 1.600000 to just 1.6 without @@ -254,7 +269,7 @@ class Writer { case google::api::expr::v1alpha1::Constant::kInt64Value: return absl::StrFormat("%d", c.int64_value()); case google::api::expr::v1alpha1::Constant::kStringValue: - return parser::escapeAndQuote(c.string_value()); + return cel::internal::FormatDoubleQuotedStringLiteral(c.string_value()); case google::api::expr::v1alpha1::Constant::kUint64Value: return absl::StrFormat("%uu", c.uint64_value()); case google::api::expr::v1alpha1::Constant::kNullValue: diff --git a/testutil/util.h b/testutil/util.h index 7eb62ea85..170c140b8 100644 --- a/testutil/util.h +++ b/testutil/util.h @@ -6,6 +6,7 @@ #include "google/protobuf/message.h" #include "google/protobuf/text_format.h" #include "gmock/gmock.h" +#include "absl/strings/string_view.h" namespace google { namespace api { @@ -32,7 +33,7 @@ T CreateProto(const std::string& textual_proto); */ class ProtoStringMatcher { public: - explicit inline ProtoStringMatcher(const std::string& expected) + explicit inline ProtoStringMatcher(absl::string_view expected) : expected_(expected) {} explicit inline ProtoStringMatcher(const google::protobuf::Message& expected) @@ -57,7 +58,7 @@ class ProtoStringMatcher { // Polymorphic matcher to compare any two protos. inline ::testing::PolymorphicMatcher EqualsProto( - const std::string& x) { + absl::string_view x) { return ::testing::MakePolymorphicMatcher(ProtoStringMatcher(x)); } diff --git a/tools/BUILD b/tools/BUILD index d418ea720..1146add08 100644 --- a/tools/BUILD +++ b/tools/BUILD @@ -1,11 +1,6 @@ -load( - "@com_github_google_flatbuffers//:build_defs.bzl", - "flatbuffer_library_public", -) - package(default_visibility = ["//visibility:public"]) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) cc_library( name = "flatbuffers_backed_impl", @@ -25,23 +20,6 @@ cc_library( ], ) -flatbuffer_library_public( - name = "flatbuffers_test", - srcs = ["testdata/flatbuffers.fbs"], - outs = ["testdata/flatbuffers_generated.h"], - language_flag = "-c", - reflection_name = "flatbuffers_reflection", -) - -cc_library( - name = "flatbuffers_test_cc", - srcs = [":flatbuffers_test"], - hdrs = [":flatbuffers_test"], - features = ["-parse_headers"], - linkstatic = True, - deps = ["@com_github_google_flatbuffers//:runtime_cc"], -) - cc_test( name = "flatbuffers_backed_impl_test", size = "small", @@ -49,7 +27,7 @@ cc_test( "flatbuffers_backed_impl_test.cc", ], data = [ - ":flatbuffers_reflection_out", + "//tools/testdata:flatbuffers_reflection_out", ], deps = [ ":flatbuffers_backed_impl", diff --git a/tools/flatbuffers_backed_impl_test.cc b/tools/flatbuffers_backed_impl_test.cc index e12865f4e..9f55f793a 100644 --- a/tools/flatbuffers_backed_impl_test.cc +++ b/tools/flatbuffers_backed_impl_test.cc @@ -1,5 +1,7 @@ #include "tools/flatbuffers_backed_impl.h" +#include + #include "internal/status_macros.h" #include "internal/testing.h" #include "flatbuffers/idl.h" @@ -12,10 +14,9 @@ namespace runtime { namespace { -using google::protobuf::Arena; - constexpr char kReflectionBufferPath[] = - "tools/flatbuffers.bfbs"; + "tools/testdata/" + "flatbuffers.bfbs"; constexpr absl::string_view kByteField = "f_byte"; constexpr absl::string_view kUbyteField = "f_ubyte"; diff --git a/tools/testdata/BUILD b/tools/testdata/BUILD new file mode 100644 index 000000000..13d5aa2a1 --- /dev/null +++ b/tools/testdata/BUILD @@ -0,0 +1,41 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "@com_github_google_flatbuffers//:build_defs.bzl", + "flatbuffer_library_public", +) + +licenses(["notice"]) + +package( + default_visibility = ["//visibility:public"], +) + +flatbuffer_library_public( + name = "flatbuffers_test", + srcs = ["flatbuffers.fbs"], + outs = ["flatbuffers_generated.h"], + language_flag = "-c", + reflection_name = "flatbuffers_reflection", +) + +cc_library( + name = "flatbuffers_test_cc", + srcs = [":flatbuffers_test"], + hdrs = [":flatbuffers_test"], + features = ["-parse_headers"], + linkstatic = True, + deps = ["@com_github_google_flatbuffers//:runtime_cc"], +) diff --git a/tools/testdata/checked_expr_and.textproto b/tools/testdata/checked_expr_and.textproto new file mode 100644 index 000000000..317b4419a --- /dev/null +++ b/tools/testdata/checked_expr_and.textproto @@ -0,0 +1,73 @@ +# proto-file: google3/google/api/expr/checked.proto +# proto-message: CheckedExpr +# x && y +reference_map { + key: 1 + value { + name: "x" + } +} +reference_map { + key: 2 + value { + name: "y" + } +} +reference_map { + key: 3 + value { + overload_id: "logical_and" + } +} +type_map { + key: 1 + value { + primitive: BOOL + } +} +type_map { + key: 2 + value { + primitive: BOOL + } +} +type_map { + key: 3 + value { + primitive: BOOL + } +} +expr { + id: 3 + call_expr { + function: "_&&_" + args { + id: 1 + ident_expr { + name: "x" + } + } + args { + id: 2 + ident_expr { + name: "y" + } + } + } +} +source_info { + location: "" + line_offsets: 7 + positions { + key: 1 + value: 0 + } + positions { + key: 2 + value: 5 + } + positions { + key: 3 + value: 2 + } +} diff --git a/tools/testdata/const_str.textproto b/tools/testdata/const_str.textproto new file mode 100644 index 000000000..ca8a8986d --- /dev/null +++ b/tools/testdata/const_str.textproto @@ -0,0 +1,23 @@ +# proto-file: google3/google/api/expr/checked.proto +# proto-message: CheckedExpr +type_map { + key: 1 + value { + primitive: STRING + } +} +expr { + id: 1 + const_expr { + string_value: "127.0.0.1" + } +} +source_info { + location: "" + line_offsets: 12 + positions { + key: 1 + value: 0 + } +} +