From d03732764e8a5c9eaa715c81ec0ff0c7a16c7273 Mon Sep 17 00:00:00 2001 From: ocshawn Date: Tue, 18 Apr 2023 11:15:25 -0500 Subject: [PATCH 1/6] fix/countSpeed: speed up _totalCount by not pulling data only total --- src/server/es/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/es/index.js b/src/server/es/index.js index 122361dc..a82b1e2d 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -424,7 +424,7 @@ class ES { async getCount(esIndex, esType, filter) { const result = await this.filterData( { esInstance: this, esIndex, esType }, - { filter, fields: false }, + { filter, fields: false, size: 0}, ); return result.hits.total; } From 16d9a9745e8db28f5d0fc917a9c65da1d87abf71 Mon Sep 17 00:00:00 2001 From: ocshawn Date: Thu, 4 May 2023 09:55:45 -0500 Subject: [PATCH 2/6] working _cardinalityCount at lowest level --- doc/queries.md | 40 +++++++++++++++++++++++++++++ src/server/__tests__/schema.test.js | 8 ++++-- src/server/es/index.js | 20 +++++++++++++++ src/server/resolvers.js | 26 +++++++++++++++++++ src/server/schema.js | 4 ++- 5 files changed, 95 insertions(+), 3 deletions(-) diff --git a/doc/queries.md b/doc/queries.md index 1c4157fd..5f4f5f88 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -8,6 +8,7 @@ Table of Contents - [Numeric Aggregation](#aggs-numeric) - [Nested Aggregation](#aggs-nested) - [Sub-aggregations](#aggs-sub) + - [Cardinality Count Aggregation](#aggs-cardinality) - [Filters](#filter) - [Basic Filter Unit](#filter-unit) - [Text Search Unit in Filter](#filter-search) @@ -765,6 +766,45 @@ Result: } ``` + + +### 6. Cardinality Count Aggregation + By using `_cardinalityCount` keyword, return a cardinality count of a feild. + + See [Elasticsearch documentation on Cardinality](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html) + + Example: + + ``` + query ($filter: JSON) { + _aggregation { + subject(filter: $filter) { + file_count { + _cardinalityCount( + precision_threshold: 1000 //optional defaults to 3000 + ) + } + } + } +} +``` + +Example result: + +``` +{ + "data": { + "_aggregation": { + "subject": { + "file_count": { + "_cardinalityCount": 98 + } + } + } + } +} +``` + ## Filters diff --git a/src/server/__tests__/schema.test.js b/src/server/__tests__/schema.test.js index 1cecb9f9..db80a2ae 100644 --- a/src/server/__tests__/schema.test.js +++ b/src/server/__tests__/schema.test.js @@ -110,7 +110,7 @@ describe('Schema', () => { const expectedIndividualAggsSchemas = ` type SubjectAggregation { - _totalCount: Int + _totalCount: Int, gen3_resource_path: HistogramForString, gender: HistogramForString, file_count: HistogramForNumber, @@ -121,7 +121,7 @@ describe('Schema', () => { visits:NestedHistogramForVisits } type FileAggregation { - _totalCount: Int + _totalCount: Int, gen3_resource_path: HistogramForString, file_id: HistogramForString, file_size: HistogramForNumber, @@ -147,12 +147,15 @@ describe('Schema', () => { const expectedHistogramSchemas = ` type HistogramForString { + _cardinalityCount(precision_threshold:Int=3000): Int, histogram: [BucketsForNestedStringAgg] } type RegularAccessHistogramForString { + _cardinalityCount(precision_threshold:Int=3000): Int, histogram: [BucketsForNestedStringAgg] } type HistogramForNumber { + _cardinalityCount(precision_threshold:Int=3000): Int, histogram( rangeStart: Int, rangeEnd: Int, @@ -162,6 +165,7 @@ describe('Schema', () => { asTextHistogram: [BucketsForNestedStringAgg] } type RegularAccessHistogramForNumber { + _cardinalityCount(precision_threshold:Int=3000): Int, histogram( rangeStart: Int, rangeEnd: Int, diff --git a/src/server/es/index.js b/src/server/es/index.js index a82b1e2d..68a96fd8 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -429,6 +429,26 @@ class ES { return result.hits.total; } + async getCardinalityCount(esIndex, esType, filter, field, precision_threshold) { + const queryBody = { + size: 0, + aggs: { + "cardinality_count": { + "cardinality": { + "field": field, + "precision_threshold": precision_threshold + } + } + }, + }; + if (typeof filter !== 'undefined') { + queryBody.query = getFilterObj(this, esIndex, filter); + } + + const result = await this.query(esIndex, esType, queryBody); + return result.aggregations.cardinality_count.value; + } + async getData({ esIndex, esType, fields, filter, sort, offset, size, }) { diff --git a/src/server/resolvers.js b/src/server/resolvers.js index e9d2be4b..e105fcc2 100644 --- a/src/server/resolvers.js +++ b/src/server/resolvers.js @@ -123,6 +123,24 @@ const textHistogramResolver = async (parent, args, context) => { }); }; +/** + * This resolver is for Cardinality. + * It inherits arguments from its parent, + * and uses "field" from parent and args "precision_threshold" to get the cardinality count + * @param {object} parent + * @param {object} args + */ +const cardinalityResolver = async (parent, args) => { + log.debug('[resolver.cardinalityResolver] args', args); + log.debug('[resolver.cardinalityResolver] parent', parent); + const { + esInstance, esIndex, esType, filter, field + } = parent; + const { precision_threshold } = args; + + return esInstance.getCardinalityCount(esIndex, esType, filter, field, precision_threshold); +}; + const getFieldAggregationResolverMappingsByField = (field) => { let isNumericField = false; if (esFieldNumericTextTypeMapping[field.type] === NumericTextTypeTypeEnum.ES_NUMERIC_TYPE) { @@ -137,6 +155,7 @@ const getFieldAggregationResolverMappingsByField = (field) => { })); }; +// this spreads all fields out into individual resolvers and adds "field", "isNumericField" and "nestedPath", to parent const getFieldAggregationResolverMappings = (esInstance, esIndex) => { const { fields } = esInstance.getESFields(esIndex); const fieldAggregationResolverMappings = {}; @@ -165,6 +184,9 @@ const getFieldAggregationResolverMappings = (esInstance, esIndex) => { * } * } * file_count { + * _cardinality ( + * precision_threshold: 1000 //optional + * ), ---> `cardinalityResolver` * histogram (rangeStart: xx, rangeEnd: xx, rangeStep: xx, binCount: xx) * { ---> `numericHistogramResolver` * key @@ -259,17 +281,21 @@ const getResolver = (esConfig, esInstance) => { ...typeAggregationResolvers, ...typeNestedAggregationResolvers, HistogramForNumber: { + _cardinalityCount: cardinalityResolver, histogram: numericHistogramResolver, asTextHistogram: textHistogramResolver, }, HistogramForString: { + _cardinalityCount: cardinalityResolver, histogram: textHistogramResolver, }, RegularAccessHistogramForNumber: { + _cardinalityCount: cardinalityResolver, histogram: numericHistogramResolver, asTextHistogram: textHistogramResolver, }, RegularAccessHistogramForString: { + _cardinalityCount: cardinalityResolver, histogram: textHistogramResolver, }, Mapping: { diff --git a/src/server/schema.js b/src/server/schema.js index b56d4708..63b04043 100644 --- a/src/server/schema.js +++ b/src/server/schema.js @@ -160,7 +160,7 @@ const getAggregationSchemaForOneIndex = (esInstance, esConfigElement) => { })); const fieldAggsNestedTypeMap = fieldGQLTypeMap.filter((f) => f.esType === 'nested'); return `type ${esTypeObjName}Aggregation { - _totalCount: Int + _totalCount: Int, ${fieldAggsTypeMap.map((entry) => `${getAggregationType(entry)}`).join('\n')} ${fieldAggsNestedTypeMap.map((entry) => `${entry.field}: NestedHistogramFor${firstLetterUpperCase(entry.field)}`).join('\n')} }`; @@ -228,6 +228,7 @@ export const getAggregationSchemaForEachNestedType = (esConfig, esInstance) => e const getNumberHistogramSchema = (isRegularAccess) => ` type ${(isRegularAccess ? histogramTypePrefix : '') + EnumAggsHistogramName.HISTOGRAM_FOR_NUMBER} { + _cardinalityCount(precision_threshold: Int = 3000): Int, histogram( rangeStart: Int, rangeEnd: Int, @@ -240,6 +241,7 @@ const getNumberHistogramSchema = (isRegularAccess) => ` const getTextHistogramSchema = (isRegularAccess) => ` type ${(isRegularAccess ? histogramTypePrefix : '') + EnumAggsHistogramName.HISTOGRAM_FOR_STRING} { + _cardinalityCount(precision_threshold: Int = 3000): Int, histogram: [BucketsForNestedStringAgg] } `; From ccd4eabc46b097b8245768f44e43ba5c9b896ce6 Mon Sep 17 00:00:00 2001 From: ocshawn Date: Thu, 4 May 2023 15:34:31 -0500 Subject: [PATCH 3/6] add countFields to connected filter run lint and fix errors --- src/components/ConnectedFilter/index.jsx | 5 +++ src/components/Utils/queries.js | 56 ++++++++++++++++++------ src/server/es/filter.js | 10 ++--- src/server/es/index.js | 15 ++++--- src/server/resolvers.js | 8 +++- 5 files changed, 66 insertions(+), 28 deletions(-) diff --git a/src/components/ConnectedFilter/index.jsx b/src/components/ConnectedFilter/index.jsx index 601f4e8d..a04c073f 100644 --- a/src/components/ConnectedFilter/index.jsx +++ b/src/components/ConnectedFilter/index.jsx @@ -46,6 +46,7 @@ class ConnectedFilter extends React.Component { this.state = { allFields, + countFields: this.props.extraAggsFieldsCardinalityCount, initialAggsData: {}, receivedAggsData: {}, accessibility: ENUM_ACCESSIBILITY.ALL, @@ -70,6 +71,7 @@ class ConnectedFilter extends React.Component { this.props.guppyConfig.path, this.props.guppyConfig.type, this.state.allFields, + this.state.countFields, this.state.accessibility, this.state.filter, ) @@ -126,6 +128,7 @@ class ConnectedFilter extends React.Component { this.props.guppyConfig.path, this.props.guppyConfig.type, this.state.allFields, + this.state.countFields, mergedFilterResults, this.state.accessibility, ) @@ -317,6 +320,7 @@ ConnectedFilter.propTypes = { })), }).isRequired, extraAggsFields: PropTypes.arrayOf(PropTypes.string), + extraAggsFieldsCardinalityCount: PropTypes.arrayOf(PropTypes.string), guppyConfig: PropTypes.shape({ path: PropTypes.string.isRequired, type: PropTypes.string.isRequired, @@ -344,6 +348,7 @@ ConnectedFilter.propTypes = { ConnectedFilter.defaultProps = { extraAggsFields: [], + extraAggsFieldsCardinalityCount: [], onFilterChange: () => {}, onReceiveNewAggsData: () => {}, className: '', diff --git a/src/components/Utils/queries.js b/src/components/Utils/queries.js index 026e8446..7430c0d6 100644 --- a/src/components/Utils/queries.js +++ b/src/components/Utils/queries.js @@ -23,31 +23,52 @@ const histogramQueryStrForEachField = (field) => { }`); }; -const queryGuppyForAggs = (path, type, fields, gqlFilter, acc) => { +const cardinalityQueryStrForEachField = (field) => { + const splittedFieldArray = field.split('.'); + const splittedField = splittedFieldArray.shift(); + + if (splittedFieldArray.length === 0) { + return (` + ${splittedField} { + _cardinalityCount + } + `); + } + return (` + ${splittedField} { + ${cardinalityQueryStrForEachField(splittedFieldArray.join('.'))} + }`); +}; + +const queryGuppyForAggs = (path, type, fields, countFields, gqlFilter, acc) => { let accessibility = acc; if (accessibility !== 'all' && accessibility !== 'accessible' && accessibility !== 'unaccessible') { accessibility = 'all'; } - const query = `query { - _aggregation { - ${type} (accessibility: ${accessibility}) { - ${fields.map((field) => histogramQueryStrForEachField(field))} - } - } - }`; - const queryBody = { query }; + const queryBody = {}; if (gqlFilter) { const queryWithFilter = `query ($filter: JSON) { _aggregation { ${type} (filter: $filter, filterSelf: false, accessibility: ${accessibility}) { - ${fields.map((field) => histogramQueryStrForEachField(field))} + ${fields.map((field) => histogramQueryStrForEachField(field))}, + ${countFields.map((field) => cardinalityQueryStrForEachField(field))} } } }`; queryBody.variables = { filter: gqlFilter }; queryBody.query = queryWithFilter; + } else { + queryBody.query = `query { + _aggregation { + ${type} (accessibility: ${accessibility}) { + ${fields.map((field) => histogramQueryStrForEachField(field))} + ${countFields.map((field) => cardinalityQueryStrForEachField(field))} + } + } + }`; } + return fetch(`${path}${graphqlEndpoint}`, { method: 'POST', headers: { @@ -265,18 +286,25 @@ export const getGQLFilter = (filterObj) => { }; export const askGuppyAboutAllFieldsAndOptions = ( - path, type, fields, accessibility, filter, + path, type, fields, countFields, accessibility, filter, ) => { const gqlFilter = getGQLFilter(filter); - return queryGuppyForAggs(path, type, fields, gqlFilter, accessibility); + return queryGuppyForAggs(path, type, fields, countFields, gqlFilter, accessibility); }; // eslint-disable-next-line max-len export const askGuppyAboutArrayTypes = (path) => queryGuppyForStatus(path).then((res) => res.indices); -export const askGuppyForAggregationData = (path, type, fields, filter, accessibility) => { +export const askGuppyForAggregationData = ( + path, + type, + fields, + countFields, + filter, + accessibility, +) => { const gqlFilter = getGQLFilter(filter); - return queryGuppyForAggs(path, type, fields, gqlFilter, accessibility); + return queryGuppyForAggs(path, type, fields, countFields, gqlFilter, accessibility); }; export const askGuppyForSubAggregationData = ( diff --git a/src/server/es/filter.js b/src/server/es/filter.js index d02248de..f2ca66fd 100644 --- a/src/server/es/filter.js +++ b/src/server/es/filter.js @@ -19,16 +19,16 @@ const fromPathToNode = (esInstance, esIndex, path) => { }; const mergeRangeOperations = (a, b) => { - let merged = Object.assign({}, a, b); + const merged = { ...a, ...b }; - Object.keys(merged).forEach(function(key) { + Object.keys(merged).forEach((key) => { if (typeof merged[key] === 'object' && merged[key] !== null) { merged[key] = mergeRangeOperations(a[key], b[key]); } - }) + }); return merged; -} +}; const getNumericTextType = ( esInstance, @@ -252,7 +252,7 @@ const getFilterObj = ( esInstance, esIndex, filterItem, aggsField, filterSelf, defaultAuthFilter, objPath, ); if (filterObj) { - if ("range" in filterObj) { + if ('range' in filterObj) { filterRange.push(filterObj); } else { boolItemsList.push(filterObj); diff --git a/src/server/es/index.js b/src/server/es/index.js index 68a96fd8..df567057 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -424,21 +424,22 @@ class ES { async getCount(esIndex, esType, filter) { const result = await this.filterData( { esInstance: this, esIndex, esType }, - { filter, fields: false, size: 0}, + { filter, fields: false, size: 0 }, ); return result.hits.total; } + // eslint-disable-next-line camelcase async getCardinalityCount(esIndex, esType, filter, field, precision_threshold) { const queryBody = { size: 0, aggs: { - "cardinality_count": { - "cardinality": { - "field": field, - "precision_threshold": precision_threshold - } - } + cardinality_count: { + cardinality: { + field, + precision_threshold, + }, + }, }, }; if (typeof filter !== 'undefined') { diff --git a/src/server/resolvers.js b/src/server/resolvers.js index e105fcc2..0390d47c 100644 --- a/src/server/resolvers.js +++ b/src/server/resolvers.js @@ -133,9 +133,12 @@ const textHistogramResolver = async (parent, args, context) => { const cardinalityResolver = async (parent, args) => { log.debug('[resolver.cardinalityResolver] args', args); log.debug('[resolver.cardinalityResolver] parent', parent); + // TODO make work with nested const { - esInstance, esIndex, esType, filter, field + esInstance, esIndex, esType, filter, field, } = parent; + + // eslint-disable-next-line camelcase const { precision_threshold } = args; return esInstance.getCardinalityCount(esIndex, esType, filter, field, precision_threshold); @@ -155,7 +158,8 @@ const getFieldAggregationResolverMappingsByField = (field) => { })); }; -// this spreads all fields out into individual resolvers and adds "field", "isNumericField" and "nestedPath", to parent +// this spreads all fields out into individual resolvers and +// adds "field", "isNumericField" and "nestedPath", to parent const getFieldAggregationResolverMappings = (esInstance, esIndex) => { const { fields } = esInstance.getESFields(esIndex); const fieldAggregationResolverMappings = {}; From 548936b86c3f1a8f2309b3c0b9f16c6b8011226f Mon Sep 17 00:00:00 2001 From: ocshawn Date: Thu, 4 May 2023 15:42:33 -0500 Subject: [PATCH 4/6] change example field in documentation fix spelling errors --- doc/queries.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/queries.md b/doc/queries.md index 5f4f5f88..3d0b2801 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -769,7 +769,7 @@ Result: ### 6. Cardinality Count Aggregation - By using `_cardinalityCount` keyword, return a cardinality count of a feild. + By using `_cardinalityCount` keyword, return a cardinality count of a field. See [Elasticsearch documentation on Cardinality](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html) @@ -779,7 +779,7 @@ Result: query ($filter: JSON) { _aggregation { subject(filter: $filter) { - file_count { + submitter_id { _cardinalityCount( precision_threshold: 1000 //optional defaults to 3000 ) @@ -796,7 +796,7 @@ Example result: "data": { "_aggregation": { "subject": { - "file_count": { + "submitter_id": { "_cardinalityCount": 98 } } From 78d1fafa25885fcb4b906dab191066cb4cf0c60d Mon Sep 17 00:00:00 2001 From: ocshawn Date: Fri, 5 May 2023 12:13:58 -0500 Subject: [PATCH 5/6] fixes for Mingfei --- doc/queries.md | 3 +++ src/components/Utils/queries.js | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/queries.md b/doc/queries.md index 3d0b2801..1781b96a 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -773,6 +773,9 @@ Result: See [Elasticsearch documentation on Cardinality](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html) +> **Note** +> curenntly does not support nested feilds + Example: ``` diff --git a/src/components/Utils/queries.js b/src/components/Utils/queries.js index 7430c0d6..19f29bf8 100644 --- a/src/components/Utils/queries.js +++ b/src/components/Utils/queries.js @@ -40,7 +40,7 @@ const cardinalityQueryStrForEachField = (field) => { }`); }; -const queryGuppyForAggs = (path, type, fields, countFields, gqlFilter, acc) => { +const queryGuppyForAggs = (path, type, fields, cardinalityFields = [], gqlFilter, acc) => { let accessibility = acc; if (accessibility !== 'all' && accessibility !== 'accessible' && accessibility !== 'unaccessible') { accessibility = 'all'; @@ -52,7 +52,7 @@ const queryGuppyForAggs = (path, type, fields, countFields, gqlFilter, acc) => { _aggregation { ${type} (filter: $filter, filterSelf: false, accessibility: ${accessibility}) { ${fields.map((field) => histogramQueryStrForEachField(field))}, - ${countFields.map((field) => cardinalityQueryStrForEachField(field))} + ${cardinalityFields.map((field) => cardinalityQueryStrForEachField(field))} } } }`; @@ -63,7 +63,7 @@ const queryGuppyForAggs = (path, type, fields, countFields, gqlFilter, acc) => { _aggregation { ${type} (accessibility: ${accessibility}) { ${fields.map((field) => histogramQueryStrForEachField(field))} - ${countFields.map((field) => cardinalityQueryStrForEachField(field))} + ${cardinalityFields.map((field) => cardinalityQueryStrForEachField(field))} } } }`; From c2def8837763005ae8388d8c2b9e685b38b7a666 Mon Sep 17 00:00:00 2001 From: ocshawn Date: Fri, 5 May 2023 12:16:28 -0500 Subject: [PATCH 6/6] version to 0.15.7 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index ce1bf946..60260cc8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@gen3/guppy", - "version": "0.15.5", + "version": "0.15.7", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@gen3/guppy", - "version": "0.15.5", + "version": "0.15.7", "license": "ISC", "dependencies": { "@elastic/elasticsearch": "~7.13.0", diff --git a/package.json b/package.json index 81dac1b7..dececc65 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@gen3/guppy", - "version": "0.15.5", + "version": "0.15.7", "description": "Server that support GraphQL queries on data from elasticsearch", "main": "src/server/server.js", "directories": {