From 317690bb6ee37bc17191ee87ed555c7e92df6e2c Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 10 Mar 2020 16:35:51 -0500 Subject: [PATCH 01/41] fix/datagen --- genData/genData.js | 57 ++++++++++++++++++++++++++++++++++++------ genData/types.js | 6 ++++- genData/valueBank.json | 33 ++++++++++++++++++++++-- 3 files changed, 86 insertions(+), 10 deletions(-) diff --git a/genData/genData.js b/genData/genData.js index 52f96720..1dff5100 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -49,7 +49,26 @@ const schema = { maxItems: max, }; -const getRandomInt = (maxValue) => Math.floor(Math.random() * Math.floor(maxValue)); +const MAX_INT = (2 ** 31) - 1; +const MIN_INT = -1 * (2 ** 31); +const MAX_LONG = (2 ** 63) - 1; +const MIN_LONG = -1 * (2 ** 63); + +const getRandomNumber = ( + minValue = 0, + maxValue = 1, +) => Math.random() * (maxValue - minValue + 1) + minValue; + +const getRandomInt = ( + minValue = 0, + maxValue = 1, +) => { + min = Math.ceil(minValue); + max = Math.floor(maxValue); + return Math.floor(Math.random() * (max - min + 1)) + min; +}; + +const getRandomString = () => (Math.random() + 1).toString(36).substring(7); async function run() { const mapping = await client.indices.getMapping({ index: esIndex }); @@ -70,14 +89,38 @@ async function run() { let sample = await resolve(schema); const fieldValues = JSON.parse(readFileSync('./genData/valueBank.json').toString()); - Object.entries(fieldValues).forEach(([k, values]) => { - sample = sample.map((d) => { - const id = getRandomInt(values.length - 1); - // eslint-disable-next-line no-param-reassign - d[k] = fieldValues[k][id]; return d; + sample = sample.map((d) => { + const dCopy = { ...d }; + Object.keys(dCopy).forEach((key) => { + if (fieldValues[key]) { + const index = getRandomInt(0, fieldValues[key].length - 1); + dCopy[key] = fieldValues[key][index]; + } else { + console.log('vtype: ', schema.items.properties[key].rawType); + switch (schema.items.properties[key].rawType) { + case 'integer': + dCopy[key] = getRandomInt(MIN_INT, MAX_INT); + break; + case 'long': + dCopy[key] = getRandomInt(MIN_LONG, MAX_LONG); + break; + case 'float': + dCopy[key] = getRandomNumber(MIN_INT, MAX_INT); + break; + case 'text': + case 'keyword': + dCopy[key] = getRandomString(); + break; + default: + break; + } + } }); + return dCopy; }); + console.log('sample: ', sample); + const body = sample.flatMap((d) => [{ index: { _index: esIndex, @@ -89,7 +132,7 @@ async function run() { chunks.forEach((c) => { client.bulk({ refresh: true, body: c }).then((res) => { res.body.items.forEach((item) => console.log(item)); - console.log(`Successfully insert ${c.length} items`); + console.log(`Successfully insert ${c.length / 2} items`); }).catch((res) => { if (res.body.errors) { const erroredDocuments = []; diff --git a/genData/types.js b/genData/types.js index 288bc5ff..f669d649 100644 --- a/genData/types.js +++ b/genData/types.js @@ -12,6 +12,7 @@ function fakerType(value) { case 'text': fieldType = { type: 'string', faker: 'name.findName' }; break; + case 'float': case 'double': fieldType = { type: 'number' }; break; @@ -32,7 +33,10 @@ function fakerType(value) { // console.log(value); break; } - return fieldType; + return { + ...fieldType, + rawType: value.type, + }; } module.exports = { diff --git a/genData/valueBank.json b/genData/valueBank.json index 15360203..c246b240 100644 --- a/genData/valueBank.json +++ b/genData/valueBank.json @@ -2,10 +2,39 @@ "gender": ["male", "female", "unknown"], "ethnicity": ["American Indian", "Pacific Islander", "Black", "Multi-racial", "White", "Haspanic" ], "race": ["white", "black", "hispanic", "asian", "mixed", "not reported" ], - "vital": ["Alive", "Dead", "no data" ], + "vital_status": ["Alive", "Dead", "no data" ], "file_type": ["mRNA Array", "Unaligned Reads", "Lipdomic MS", "Protionic MS", "1Gs Ribosomes", "Unknown" ], "file_format": ["BEM", "BAM", "BED", "CSV", "FASTQ", "RAW", "TAR", "TSV", "TXT", "IDAT" ], "auth_resource_path": ["/programs/jnkns/projects/jenkins", "/programs/DEV/projects/test", "/programs/external/projects/test"], "sensitive": [ "true", "false" ], - "project": ["jnkns-jenkins", "DEV-test", "external-test" ] + "study": ["study_1", "study_2", "study_3"], + "file_id": ["file_id_1", "file_id_2", "file_id_3"], + "subject_id": ["subject_id_1", "subject_id_2", "subject_id_3"], + "project": ["jnkns-jenkins", "DEV-test", "external-test" ], + "visits": [ + { + "days_to_visit": 1, + "visit_label": "vst_lbl_1", + "follow_ups": { + "days_to_follow_up": 1, + "follow_up_label": "flup_lbl_1" + } + }, + { + "days_to_visit": 2, + "visit_label": "vst_lbl_2", + "follow_ups": { + "days_to_follow_up": 2, + "follow_up_label": "flup_lbl_2" + } + }, + { + "days_to_visit": 3, + "visit_label": "vst_lbl_3", + "follow_ups": { + "days_to_follow_up": 3, + "follow_up_label": "flup_lbl_3" + } + } + ] } From 2fe2910d5ad2a7faa65c0d219474fc8ef69ed15e Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 10 Mar 2020 16:36:01 -0500 Subject: [PATCH 02/41] fix/dep --- package-lock.json | 1 - package.json | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/package-lock.json b/package-lock.json index 21f9fa6d..d292a511 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4008,7 +4008,6 @@ "version": "1.2.3", "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.2.3.tgz", "integrity": "sha512-OOEk+lkePcg+ODXIpvuU9PAryCikCJyo7GlDG1upleEpQRx6mzL9puEBkozQ5iAx20KV0l3DbyQwqciJtqe5Pg==", - "dev": true, "requires": { "define-properties": "^1.1.3", "es-abstract": "^1.17.0-next.1", diff --git a/package.json b/package.json index 1015f368..65bdf2e2 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "apollo-server": "^2.4.8", "apollo-server-express": "^2.4.8", "array.prototype.flat": "^1.2.2", + "array.prototype.flatmap": "^1.2.3", "body-parser": "^1.18.3", "cors": "^2.8.5", "express": "^4.16.4", From c11d057351271aebef9c3c80b7e7a1699d0e83ad Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Wed, 18 Mar 2020 17:10:00 -0500 Subject: [PATCH 03/41] fix/gen data --- genData/genData.js | 1 - generate_data.sh | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/genData/genData.js b/genData/genData.js index 1dff5100..5fcbca05 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -96,7 +96,6 @@ async function run() { const index = getRandomInt(0, fieldValues[key].length - 1); dCopy[key] = fieldValues[key][index]; } else { - console.log('vtype: ', schema.items.properties[key].rawType); switch (schema.items.properties[key].rawType) { case 'integer': dCopy[key] = getRandomInt(MIN_INT, MAX_INT); diff --git a/generate_data.sh b/generate_data.sh index 35f9f4bf..d3ff6ee9 100755 --- a/generate_data.sh +++ b/generate_data.sh @@ -10,8 +10,8 @@ es_delete_all $CASE_INDEX_NAME es_delete_all $FILE_INDEX_NAME es_delete_all $CONFIG_INDEX_NAME es_setup_index $CASE_INDEX_NAME $FILE_INDEX_NAME $CONFIG_INDEX_NAME -npm run gendata -- -i $CASE_INDEX_NAME -d subject -npm run gendata -- -i $FILE_INDEX_NAME -d file -npm run gendata -- -i $CONFIG_INDEX_NAME -d config +npm run gendata -- -i $CASE_INDEX_NAME -d subject -n $DATA_COUNT +npm run gendata -- -i $FILE_INDEX_NAME -d file -n $DATA_COUNT +npm run gendata -- -i $CONFIG_INDEX_NAME -d config -n $DATA_COUNT -echo "successfully generate ${DATA_COUNT} data records" +echo "Successfully generate ${DATA_COUNT} data records" From 6759eca4e1f39470362dc37661a509ff53814976 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Wed, 18 Mar 2020 17:10:06 -0500 Subject: [PATCH 04/41] lint --- stories/connectedTable.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stories/connectedTable.jsx b/stories/connectedTable.jsx index 2f1d3681..309080fc 100644 --- a/stories/connectedTable.jsx +++ b/stories/connectedTable.jsx @@ -13,7 +13,7 @@ storiesOf('Guppy Wrapper', module) e.field)} + rawDataFields={tableConfig.map((e) => e.field)} onFilterChange={action('wrapper receive filter change')} onReceiveNewAggsData={action('wrapper receive aggs data')} > From d4c9ce98c511e398c605aef3cd323872bfdffaf0 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 7 Apr 2020 14:06:34 -0500 Subject: [PATCH 05/41] feat/schema --- src/server/schema.js | 56 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/src/server/schema.js b/src/server/schema.js index 4be9d320..54a8e95c 100644 --- a/src/server/schema.js +++ b/src/server/schema.js @@ -27,7 +27,7 @@ const getGQLType = (esInstance, esIndex, field, esFieldType) => { return `[${gqlType}]`; } if (esFieldType === 'nested') { - return `[${field}]`; + return `[Nested${firstLetterUpperCase(field)}]`; } return gqlType; }; @@ -65,10 +65,12 @@ const getQuerySchemaForType = (esType) => { const getFieldGQLTypeMapForProperties = (esInstance, esIndex, properties) => { const result = Object.keys(properties).map((field) => { - const esFieldType = properties[field].type; + const esFieldType = (properties[field].esType) + ? properties[field].esType : properties[field].type; const gqlType = getGQLType(esInstance, esIndex, field, esFieldType); + return { - field, type: gqlType, esType: esFieldType, props: properties[field].properties, + field, type: gqlType, esType: esFieldType, properties: properties[field].properties, }; }); return result; @@ -101,10 +103,9 @@ const getTypeSchemaForOneIndex = (esInstance, esIndex, esType) => { Object.keys(fieldESTypeMap).forEach((fieldKey) => { const esFieldType = fieldESTypeMap[fieldKey].type; if (esFieldType === 'nested' && !existingFields.has(fieldKey)) { - const props = fieldESTypeMap[fieldKey].properties; - queueTypes.push({ type: fieldKey, props }); + const { properties } = fieldESTypeMap[fieldKey]; + queueTypes.push({ type: `Nested${firstLetterUpperCase(fieldKey)}`, properties }); existingFields.add(fieldKey); - // fieldToArgs[fieldKey] = getArgsByField(fieldKey, props); } }); @@ -117,12 +118,12 @@ const getTypeSchemaForOneIndex = (esInstance, esIndex, esType) => { while (queueTypes.length > 0) { const t = queueTypes.shift(); - const gqlTypes = getFieldGQLTypeMapForProperties(esInstance, esIndex, t.props); + const gqlTypes = getFieldGQLTypeMapForProperties(esInstance, esIndex, t.properties); gqlTypes.forEach((entry) => { if (entry.esType === 'nested' && !existingFields.has(entry.field)) { - queueTypes.push({ type: entry.field, props: entry.props }); + queueTypes.push({ type: `Nested${firstLetterUpperCase(entry.field)}`, properties: entry.properties }); existingFields.add(entry.field); - fieldToArgs[entry.field] = getArgsByField(entry.field, entry.props); + fieldToArgs[entry.field] = getArgsByField(entry.field, entry.properties); } }); sTypeSchema += ` @@ -148,9 +149,11 @@ const getAggregationSchemaForOneIndex = (esInstance, esIndex, esType) => { field: entry.field, aggType: getAggsHistogramName(entry.type), })); + const fieldAggsNestedTypeMap = fieldGQLTypeMap.filter((f) => f.esType === 'nested'); return `type ${esTypeObjName}Aggregation { _totalCount: Int ${fieldAggsTypeMap.map((entry) => `${getAggregationType(entry)}`).join('\n')} + ${fieldAggsNestedTypeMap.map((entry) => `${entry.field}: NestedHistogramFor${firstLetterUpperCase(entry.field)}`).join('\n')} }`; }; @@ -176,8 +179,39 @@ export const getAggregationSchema = (esConfig) => ` } `; +const getAggregationSchemaForOneNestedIndex = (esInstance, esIndex) => { + const fieldGQLTypeMap = getFieldGQLTypeMapForOneIndex(esInstance, esIndex); + const fieldAggsNestedTypeMap = fieldGQLTypeMap.filter((f) => f.esType === 'nested'); + + let AggsNestedTypeSchema = ''; + while (fieldAggsNestedTypeMap.length > 0) { + const entry = fieldAggsNestedTypeMap.shift(); + if (entry.field && entry.properties) { + AggsNestedTypeSchema += `type NestedHistogramFor${firstLetterUpperCase(entry.field)} {${Object.keys(entry.properties).map((propsKey) => { + const entryType = entry.properties[propsKey].type; + if (entryType === 'nested') { + fieldAggsNestedTypeMap.push({ + field: propsKey, + properties: entry.properties[propsKey].properties, + }); + return ` + ${propsKey}: NestedHistogramFor${firstLetterUpperCase(propsKey)}`; + } + return ` + ${propsKey}: ${getAggsHistogramName(esgqlTypeMapping[entryType])}`; + })} +} +`; + } + } + log.debug('[SCHEMA] AggsNestedTypeSchema: ', AggsNestedTypeSchema); + return AggsNestedTypeSchema; +}; + export const getAggregationSchemaForEachType = (esConfig, esInstance) => esConfig.indices.map((cfg) => getAggregationSchemaForOneIndex(esInstance, cfg.index, cfg.type)).join('\n'); +export const getAggregationSchemaForEachNestedType = (esConfig, esInstance) => esConfig.indices.map((cfg) => getAggregationSchemaForOneNestedIndex(esInstance, cfg.index)).join('\n'); + export const getMappingSchema = (esConfig) => ` type Mapping { ${esConfig.indices.map((cfg) => `${cfg.type}: [String]`).join('\n')} @@ -208,6 +242,9 @@ export const buildSchemaString = (esConfig, esInstance) => { const aggregationSchemasForEachType = getAggregationSchemaForEachType(esConfig, esInstance); + const aggregationSchemasForEachNestedType = getAggregationSchemaForEachNestedType(esConfig, + esInstance); + const textHistogramSchema = ` type ${EnumAggsHistogramName.HISTOGRAM_FOR_STRING} { histogram: [BucketsForNestedStringAgg] @@ -280,6 +317,7 @@ export const buildSchemaString = (esConfig, esInstance) => { ${typesSchemas} ${aggregationSchema} ${aggregationSchemasForEachType} + ${aggregationSchemasForEachNestedType} ${textHistogramSchema} ${numberHistogramSchema} ${textHistogramBucketSchema} From d54aba8e83fbf008bf365ef8e40e4a013fcb909f Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 7 Apr 2020 14:06:41 -0500 Subject: [PATCH 06/41] feat/resolver --- src/server/resolvers.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/server/resolvers.js b/src/server/resolvers.js index a78e9fc4..de942b50 100644 --- a/src/server/resolvers.js +++ b/src/server/resolvers.js @@ -122,9 +122,9 @@ const getFieldAggregationResolverMappings = (esInstance, esIndex) => { const fieldAggregationResolverMappings = {}; const { fields } = esInstance.getESFields(esIndex); fields.forEach((field) => { - if (field.type !== 'nested') { - fieldAggregationResolverMappings[`${field.name}`] = ((parent) => ({ ...parent, field: field.name })); - } + // if (field.type !== 'nested') { + fieldAggregationResolverMappings[`${field.name}`] = ((parent) => ({ ...parent, field: field.name })); + // } }); return fieldAggregationResolverMappings; }; From 06d8791c5a874cc466e85461f8cb12d79a93a111 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Fri, 10 Apr 2020 18:18:10 -0500 Subject: [PATCH 07/41] feat/resolver --- src/server/es/index.js | 10 ++++++++-- src/server/resolvers.js | 37 +++++++++++++++++++++++++++++++++---- src/server/schema.js | 3 +-- src/server/utils/utils.js | 22 ++++++++++++++++++++++ 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/src/server/es/index.js b/src/server/es/index.js index 6585be36..5e2a0186 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -8,7 +8,7 @@ import * as esAggregator from './aggs'; import log from '../logger'; import { SCROLL_PAGE_SIZE } from './const'; import CodedError from '../utils/error'; -import { fromFieldsToSource } from '../utils/utils'; +import { fromFieldsToSource, buildNestedField } from '../utils/utils'; class ES { constructor(esConfig = config.esConfig) { @@ -283,7 +283,12 @@ class ES { index: cfg.index, type: cfg.type, fields: Object.entries(this.fieldTypes[cfg.index]).map(([key, value]) => { - const r = { name: key, type: value.type }; + let r; + if (value.type !== 'nested') { + r = { name: key, type: value.type }; + } else { + r = buildNestedField(key, value, r); + } return r; }), }; @@ -324,6 +329,7 @@ class ES { const queryBody = { from: offset }; if (typeof filter !== 'undefined') { queryBody.query = getFilterObj(this, esIndex, filter); + log.debug('[ES] filterObj: ', queryBody.query); } queryBody.sort = getESSortBody(sort, this, esIndex); if (typeof size !== 'undefined') { diff --git a/src/server/resolvers.js b/src/server/resolvers.js index de942b50..e145f5c3 100644 --- a/src/server/resolvers.js +++ b/src/server/resolvers.js @@ -118,17 +118,23 @@ const textHistogramResolver = async (parent, args, context) => { }); }; +const getFieldAggregationResolverMappingsByField = (field) => { + if (field.type !== 'nested') { + return ((parent) => ({ ...parent, field: field.name })); + } + return ((parent) => ({ ...parent, field: field.name, path: (parent.path) ? `${parent.path}.${field.name}` : `${field.name}` })); +}; + const getFieldAggregationResolverMappings = (esInstance, esIndex) => { - const fieldAggregationResolverMappings = {}; const { fields } = esInstance.getESFields(esIndex); + const fieldAggregationResolverMappings = {}; fields.forEach((field) => { - // if (field.type !== 'nested') { - fieldAggregationResolverMappings[`${field.name}`] = ((parent) => ({ ...parent, field: field.name })); - // } + fieldAggregationResolverMappings[`${field.name}`] = getFieldAggregationResolverMappingsByField(field); }); return fieldAggregationResolverMappings; }; + /** * Tree-structured resolvers pass down arguments. * For better understanding, following is an example query, and related resolvers for each level: @@ -188,6 +194,28 @@ const getResolver = (esConfig, esInstance) => { return acc; }, {}); + const typeNestedAggregationResolvers = esConfig.indices.reduce((acc, cfg) => { + const { fields } = esInstance.getESFields(cfg.index); + const nestedFieldsArray = fields.filter((entry) => entry.type === 'nested'); + log.debug('[resolver.typeNestedAggregationResolvers] nestedFieldsArray', nestedFieldsArray); + + while (nestedFieldsArray.length > 0) { + const nestedFields = nestedFieldsArray.shift(); + log.debug('[resolver.typeNestedAggregationResolvers] nestedFields', nestedFields); + const typeNestedAggsName = `NestedHistogramFor${firstLetterUpperCase(nestedFields.name)}`; + acc[typeNestedAggsName] = {}; + if (nestedFields.type === 'nested' && nestedFields.nestedProps) { + nestedFields.nestedProps.forEach((props) => { + if (props.type === 'nested') { + nestedFieldsArray.push(props); + } + acc[typeNestedAggsName][props.name] = getFieldAggregationResolverMappingsByField(props); + }); + } + } + return acc; + }, {}); + const mappingResolvers = esConfig.indices.reduce((acc, cfg) => { acc[cfg.type] = () => (esInstance.getESFields(cfg.index).fields.map((f) => f.name)); return acc; @@ -204,6 +232,7 @@ const getResolver = (esConfig, esInstance) => { ...typeAggregationResolverMappings, }, ...typeAggregationResolvers, + ...typeNestedAggregationResolvers, HistogramForNumber: { histogram: numericHistogramResolver, asTextHistogram: textHistogramResolver, diff --git a/src/server/schema.js b/src/server/schema.js index 54a8e95c..c1600f07 100644 --- a/src/server/schema.js +++ b/src/server/schema.js @@ -200,8 +200,7 @@ const getAggregationSchemaForOneNestedIndex = (esInstance, esIndex) => { return ` ${propsKey}: ${getAggsHistogramName(esgqlTypeMapping[entryType])}`; })} -} -`; +}`; } } log.debug('[SCHEMA] AggsNestedTypeSchema: ', AggsNestedTypeSchema); diff --git a/src/server/utils/utils.js b/src/server/utils/utils.js index 74c6cc99..63e02616 100644 --- a/src/server/utils/utils.js +++ b/src/server/utils/utils.js @@ -76,3 +76,25 @@ export const fromFieldsToSource = (parsedInfo) => { } return fields; }; + +export const buildNestedField = (key, value) => { + let builtObj = {}; + if (value.type === 'nested') { + const nestedProps = []; + Object.keys(value.properties).forEach((propsKey) => { + nestedProps.push(buildNestedField(propsKey, value.properties[propsKey])); + }); + builtObj = { + name: key, + type: value.type, + nestedProps, + }; + } else { + builtObj = { + name: key, + type: value.type, + }; + } + + return builtObj; +}; From 8f6fc4f915b4f09833b395310197365dc9966d78 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Fri, 10 Apr 2020 18:18:29 -0500 Subject: [PATCH 08/41] fix/disable no console for genData --- genData/genData.js | 1 + 1 file changed, 1 insertion(+) diff --git a/genData/genData.js b/genData/genData.js index 5fcbca05..8a55f7ca 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -1,3 +1,4 @@ +/* eslint-disable no-console */ require('array.prototype.flatmap').shim(); const program = require('commander'); From 9092d452f5b947f453b7541912ae0c2ea8f6426d Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Sun, 12 Apr 2020 19:22:57 -0500 Subject: [PATCH 09/41] feat/update es version --- devHelper/docker/esearch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devHelper/docker/esearch.yml b/devHelper/docker/esearch.yml index bde930e4..61db7fee 100644 --- a/devHelper/docker/esearch.yml +++ b/devHelper/docker/esearch.yml @@ -3,7 +3,7 @@ version: "3.3" services: # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#docker-cli-run-prod-mode elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.5.4 + image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.7.0 ports: - "9200:9200" - "9300:9300" From 40a1f4af02542ccd500431c513c48d42dc51e11c Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 11:33:31 -0500 Subject: [PATCH 10/41] feat/update resolver --- src/server/resolvers.js | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/server/resolvers.js b/src/server/resolvers.js index e145f5c3..908902ce 100644 --- a/src/server/resolvers.js +++ b/src/server/resolvers.js @@ -102,9 +102,10 @@ const textHistogramResolver = async (parent, args, context) => { log.debug('[resolver.textHistogramResolver] args', args); const { esInstance, esIndex, esType, - filter, field, nestedAggFields, filterSelf, accessibility, + filter, field, nestedAggFields, filterSelf, accessibility, nestedPath, } = parent; - log.debug('[resolver.textHistogramResolver] parent', parent); + // log.debug('[resolver.textHistogramResolver] parent', parent); + log.debug('[resolver.textHistogramResolver] nestedPath', nestedPath); const { authHelper } = context; const defaultAuthFilter = await authHelper.getDefaultFilter(accessibility); return esInstance.textAggregation({ @@ -115,6 +116,7 @@ const textHistogramResolver = async (parent, args, context) => { filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }); }; @@ -122,7 +124,7 @@ const getFieldAggregationResolverMappingsByField = (field) => { if (field.type !== 'nested') { return ((parent) => ({ ...parent, field: field.name })); } - return ((parent) => ({ ...parent, field: field.name, path: (parent.path) ? `${parent.path}.${field.name}` : `${field.name}` })); + return ((parent) => ({ ...parent, field: field.name, nestedPath: (parent.nestedPath) ? `${parent.nestedPath}.${field.name}` : `${field.name}` })); }; const getFieldAggregationResolverMappings = (esInstance, esIndex) => { @@ -197,11 +199,11 @@ const getResolver = (esConfig, esInstance) => { const typeNestedAggregationResolvers = esConfig.indices.reduce((acc, cfg) => { const { fields } = esInstance.getESFields(cfg.index); const nestedFieldsArray = fields.filter((entry) => entry.type === 'nested'); - log.debug('[resolver.typeNestedAggregationResolvers] nestedFieldsArray', nestedFieldsArray); + // log.debug('[resolver.typeNestedAggregationResolvers] nestedFieldsArray', nestedFieldsArray); while (nestedFieldsArray.length > 0) { const nestedFields = nestedFieldsArray.shift(); - log.debug('[resolver.typeNestedAggregationResolvers] nestedFields', nestedFields); + // log.debug('[resolver.typeNestedAggregationResolvers] nestedFields', nestedFields); const typeNestedAggsName = `NestedHistogramFor${firstLetterUpperCase(nestedFields.name)}`; acc[typeNestedAggsName] = {}; if (nestedFields.type === 'nested' && nestedFields.nestedProps) { From 65b8e29ee2665560184800056dc58ff151e7ff37 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 12:43:10 -0500 Subject: [PATCH 11/41] feat/nested test agg --- src/server/es/aggs.js | 83 ++++++++++++++++++++++++++++++++---------- src/server/es/index.js | 2 + 2 files changed, 65 insertions(+), 20 deletions(-) diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index d179dfd5..b7758791 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -5,6 +5,7 @@ import { AGGS_ITEM_STATS_NAME, AGGS_QUERY_NAME, } from './const'; +import log from '../logger'; import config from '../config'; const PAGE_SIZE = 10000; @@ -472,6 +473,7 @@ export const textAggregation = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }, ) => { const queryBody = { size: 0 }; @@ -492,6 +494,14 @@ export const textAggregation = async ( } const aggsName = `${field}Aggs`; const aggsObj = {}; + let aggsNestedName; + let fieldNestedName; + // log.debug('[textAggregation] nestedPath', nestedPath); + if (nestedPath) { + aggsNestedName = `${field}NestedAggs`; + fieldNestedName = `${nestedPath}.${field}`; + } + if (nestedAggFields && nestedAggFields.termsFields) { missingAlias = {}; aggsObj.aggs = updateAggObjectForTermsFields(nestedAggFields.termsFields, aggsObj.aggs); @@ -502,43 +512,76 @@ export const textAggregation = async ( aggsObj.aggs = updateAggObjectForMissingFields(nestedAggFields.missingFields, aggsObj.aggs); } - queryBody.aggs = { - [aggsName]: { - composite: { - sources: [ - { - [field]: { - terms: { - field, - ...missingAlias, - }, + if (aggsNestedName) { + queryBody.aggs = { + [aggsNestedName]: { + nested: { + path: nestedPath, + }, + aggs: { + [aggsName]: { + composite: { + sources: [ + { + [fieldNestedName]: { + terms: { + field: fieldNestedName, + ...missingAlias, + }, + }, + }, + ], + size: PAGE_SIZE, }, + ...aggsObj, }, - ], - size: PAGE_SIZE, + }, }, - ...aggsObj, - }, - }; + }; + } else { + queryBody.aggs = { + [aggsName]: { + composite: { + sources: [ + { + [field]: { + terms: { + field, + ...missingAlias, + }, + }, + }, + ], + size: PAGE_SIZE, + }, + ...aggsObj, + }, + }; + } + log.debug('[textAggregation] queryBody', queryBody); let resultSize; let finalResults = []; /* eslint-disable */ do { const result = await esInstance.query(esIndex, esType, queryBody); + log.debug('[textAggregation] result', result); resultSize = 0; - - result.aggregations[aggsName].buckets.forEach((item) => { + + const resultBuckets = (aggsNestedName) ? result.aggregations[aggsNestedName][aggsName].buckets : result.aggregations[aggsName].buckets; + + resultBuckets.forEach((item) => { + log.debug('[textAggregation] item', item); const resultObj = processResultsForNestedAgg (nestedAggFields, item, {}) finalResults.push({ - key: item.key[field], + key: (fieldNestedName)? item.key[fieldNestedName] : item.key[field], count: item.doc_count, ...resultObj }); resultSize += 1; }); - const afterKey = result.aggregations[aggsName].after_key; + const afterKey = (aggsNestedName) ? result.aggregations[aggsNestedName][aggsName].after_key : result.aggregations[aggsName].after_key; if (typeof afterKey === 'undefined') break; - queryBody.aggs[aggsName].composite.after = afterKey; + (aggsNestedName) ? queryBody.aggs[aggsNestedName].aggs[aggsName].composite.after = afterKey : queryBody.aggs[aggsName].composite.after = afterKey; } while (resultSize === PAGE_SIZE); /* eslint-enable */ diff --git a/src/server/es/index.js b/src/server/es/index.js index 5e2a0186..c594987f 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -446,6 +446,7 @@ class ES { filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }) { return esAggregator.textAggregation( { @@ -459,6 +460,7 @@ class ES { filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }, ); } From fc264e2768fd113c6fbb1cde2c9b84424a404ab3 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 15:18:34 -0500 Subject: [PATCH 12/41] feat/nested numeric aggs --- src/server/es/aggs.js | 70 +++++++++++++++++++++++++++++++++-------- src/server/es/const.js | 1 + src/server/es/index.js | 2 ++ src/server/resolvers.js | 6 ++-- 4 files changed, 63 insertions(+), 16 deletions(-) diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index b7758791..0aa927b2 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -3,9 +3,9 @@ import getFilterObj from './filter'; import { AGGS_GLOBAL_STATS_NAME, AGGS_ITEM_STATS_NAME, + AGGS_NESTED_QUERY_NAME, AGGS_QUERY_NAME, } from './const'; -import log from '../logger'; import config from '../config'; const PAGE_SIZE = 10000; @@ -150,6 +150,7 @@ export const numericGlobalStats = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }) => { const queryBody = { size: 0 }; if (!!filter || !!defaultAuthFilter) { @@ -160,7 +161,9 @@ export const numericGlobalStats = async ( queryBody.query = appendAdditionalRangeQuery(field, queryBody.query, rangeStart, rangeEnd); let aggsObj = { [AGGS_GLOBAL_STATS_NAME]: { - stats: { field }, + stats: { + field: (nestedPath) ? `${nestedPath}.${field}` : `${field}`, + }, }, }; if (nestedAggFields && nestedAggFields.termsFields) { @@ -169,9 +172,25 @@ export const numericGlobalStats = async ( if (nestedAggFields && nestedAggFields.missingFields) { aggsObj = updateAggObjectForMissingFields(nestedAggFields.missingFields, aggsObj); } - queryBody.aggs = aggsObj; + if (nestedPath) { + queryBody.aggs = { + [AGGS_NESTED_QUERY_NAME]: { + nested: { + path: nestedPath, + }, + aggs: { + ...aggsObj, + }, + }, + }; + } else { + queryBody.aggs = aggsObj; + } + const result = await esInstance.query(esIndex, esType, queryBody); - let resultStats = result.aggregations[AGGS_GLOBAL_STATS_NAME]; + let resultStats = (nestedPath) + ? result.aggregations[AGGS_NESTED_QUERY_NAME][AGGS_GLOBAL_STATS_NAME] + : result.aggregations[AGGS_GLOBAL_STATS_NAME]; const range = [ typeof rangeStart === 'undefined' ? resultStats.min : rangeStart, typeof rangeEnd === 'undefined' ? resultStats.max : rangeEnd, @@ -213,6 +232,7 @@ export const numericHistogramWithFixedRangeStep = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }) => { const queryBody = { size: 0 }; if (!!filter || !!defaultAuthFilter) { @@ -229,18 +249,20 @@ export const numericHistogramWithFixedRangeStep = async ( queryBody.query = appendAdditionalRangeQuery(field, queryBody.query, rangeStart, rangeEnd); const aggsObj = { [AGGS_GLOBAL_STATS_NAME]: { - stats: { field }, + stats: { + field: (nestedPath) ? `${nestedPath}.${field}` : `${field}`, + }, }, }; aggsObj[AGGS_QUERY_NAME] = { histogram: { - field, + field: (nestedPath) ? `${nestedPath}.${field}` : `${field}`, interval: rangeStep, }, aggs: { [AGGS_ITEM_STATS_NAME]: { stats: { - field, + field: (nestedPath) ? `${nestedPath}.${field}` : `${field}`, }, }, }, @@ -264,11 +286,29 @@ export const numericHistogramWithFixedRangeStep = async ( aggsObj[AGGS_QUERY_NAME].aggs, ); } - queryBody.aggs = aggsObj; + + if (nestedPath) { + queryBody.aggs = { + [AGGS_NESTED_QUERY_NAME]: { + nested: { + path: nestedPath, + }, + aggs: { + ...aggsObj, + }, + }, + }; + } else { + queryBody.aggs = aggsObj; + } + const result = await esInstance.query(esIndex, esType, queryBody); const finalResults = []; let resultObj; - result.aggregations[AGGS_QUERY_NAME].buckets.forEach((item) => { + const resultBuckets = (nestedPath) + ? result.aggregations[AGGS_NESTED_QUERY_NAME][AGGS_QUERY_NAME].buckets + : result.aggregations[AGGS_QUERY_NAME].buckets; + resultBuckets.forEach((item) => { resultObj = processResultsForNestedAgg(nestedAggFields, item, resultObj); finalResults.push({ key: [item.key, item.key + rangeStep], @@ -308,6 +348,7 @@ export const numericHistogramWithFixedBinCount = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }) => { const globalStats = await numericGlobalStats( { @@ -323,6 +364,7 @@ export const numericHistogramWithFixedBinCount = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }, ); const { min, max } = globalStats; @@ -344,6 +386,7 @@ export const numericHistogramWithFixedBinCount = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }, ); }; @@ -378,6 +421,7 @@ export const numericAggregation = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }, ) => { if (rangeStep <= 0) { @@ -410,6 +454,7 @@ export const numericAggregation = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }, ); } @@ -429,6 +474,7 @@ export const numericAggregation = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }, ); } @@ -446,6 +492,7 @@ export const numericAggregation = async ( filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }, ); return [result]; @@ -496,7 +543,6 @@ export const textAggregation = async ( const aggsObj = {}; let aggsNestedName; let fieldNestedName; - // log.debug('[textAggregation] nestedPath', nestedPath); if (nestedPath) { aggsNestedName = `${field}NestedAggs`; fieldNestedName = `${nestedPath}.${field}`; @@ -558,19 +604,17 @@ export const textAggregation = async ( }, }; } - log.debug('[textAggregation] queryBody', queryBody); + // log.debug('[textAggregation] queryBody', queryBody); let resultSize; let finalResults = []; /* eslint-disable */ do { const result = await esInstance.query(esIndex, esType, queryBody); - log.debug('[textAggregation] result', result); resultSize = 0; const resultBuckets = (aggsNestedName) ? result.aggregations[aggsNestedName][aggsName].buckets : result.aggregations[aggsName].buckets; resultBuckets.forEach((item) => { - log.debug('[textAggregation] item', item); const resultObj = processResultsForNestedAgg (nestedAggFields, item, {}) finalResults.push({ key: (fieldNestedName)? item.key[fieldNestedName] : item.key[field], diff --git a/src/server/es/const.js b/src/server/es/const.js index 69e04a9c..c97993c0 100644 --- a/src/server/es/const.js +++ b/src/server/es/const.js @@ -1,4 +1,5 @@ export const AGGS_QUERY_NAME = 'numeric_aggs'; +export const AGGS_NESTED_QUERY_NAME = 'numeric_nested_aggs'; export const AGGS_GLOBAL_STATS_NAME = 'numeric_aggs_stats'; export const AGGS_ITEM_STATS_NAME = 'numeric_item_aggs_stats'; diff --git a/src/server/es/index.js b/src/server/es/index.js index c594987f..020c23ae 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -415,6 +415,7 @@ class ES { filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }) { return esAggregator.numericAggregation( { @@ -434,6 +435,7 @@ class ES { filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }, ); } diff --git a/src/server/resolvers.js b/src/server/resolvers.js index 908902ce..041b04ee 100644 --- a/src/server/resolvers.js +++ b/src/server/resolvers.js @@ -65,7 +65,7 @@ const aggsTotalQueryResolver = (parent) => { const numericHistogramResolver = async (parent, args, context) => { const { esInstance, esIndex, esType, - filter, field, nestedAggFields, filterSelf, accessibility, + filter, field, nestedAggFields, filterSelf, accessibility, nestedPath, } = parent; log.debug('[resolver.numericHistogramResolver] parent', parent); const { @@ -87,6 +87,7 @@ const numericHistogramResolver = async (parent, args, context) => { filterSelf, defaultAuthFilter, nestedAggFields, + nestedPath, }); }; @@ -104,8 +105,7 @@ const textHistogramResolver = async (parent, args, context) => { esInstance, esIndex, esType, filter, field, nestedAggFields, filterSelf, accessibility, nestedPath, } = parent; - // log.debug('[resolver.textHistogramResolver] parent', parent); - log.debug('[resolver.textHistogramResolver] nestedPath', nestedPath); + log.debug('[resolver.textHistogramResolver] parent', parent); const { authHelper } = context; const defaultAuthFilter = await authHelper.getDefaultFilter(accessibility); return esInstance.textAggregation({ From 6bb5315ad7dd255fe7e2f25f38f7fb7e58478042 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 15:23:29 -0500 Subject: [PATCH 13/41] fix/tests --- src/server/__tests__/schema.test.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/server/__tests__/schema.test.js b/src/server/__tests__/schema.test.js index f539a25e..ea04620e 100644 --- a/src/server/__tests__/schema.test.js +++ b/src/server/__tests__/schema.test.js @@ -52,7 +52,7 @@ describe('Schema', () => { const expectedTypesSchemas = ` type Subject { gen3_resource_path: String, - visits:[visits], + visits:[NestedVisits], gender: String, file_count: Int, name: String, @@ -61,12 +61,12 @@ describe('Schema', () => { whatever_lab_result_value: Float, _matched:[MatchedItem] } - type visits { + type NestedVisits { days_to_visit:Int, visit_label:String, - follow_ups:[follow_ups], + follow_ups:[NestedFollow_ups], } - type follow_ups { + type NestedFollow_ups { days_to_follow_up:Int, follow_up_label:String, } @@ -115,6 +115,7 @@ describe('Schema', () => { some_array_integer_field: HistogramForNumber, some_array_string_field: HistogramForString, whatever_lab_result_value: HistogramForNumber, + visits:NestedHistogramForVisits } type FileAggregation { _totalCount: Int From 0adc76225ad7a2b828ae6ec733b18512aed41607 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 15:31:39 -0500 Subject: [PATCH 14/41] chore/rename test --- src/server/__mocks__/mockDataFromES.js | 4 ++-- .../{mockNestedAggs.js => mockNestedTermsAndMissingAggs.js} | 4 ++-- src/server/es/__tests__/aggs.test.js | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename src/server/__mocks__/mockESData/{mockNestedAggs.js => mockNestedTermsAndMissingAggs.js} (98%) diff --git a/src/server/__mocks__/mockDataFromES.js b/src/server/__mocks__/mockDataFromES.js index 064320d2..dc420341 100644 --- a/src/server/__mocks__/mockDataFromES.js +++ b/src/server/__mocks__/mockDataFromES.js @@ -4,7 +4,7 @@ import mockTextAggs from './mockESData/mockTextAggs'; import mockNumericAggsGlobalStats from './mockESData/mockNumericAggsGlobalStats'; import mockHistogramFixWidth from './mockESData/mockNumericHistogramFixWidth'; import mockHistogramFixBinCount from './mockESData/mockNumericHistogramFixBinCount'; -import mockNestedAggs from './mockESData/mockNestedAggs'; +import mockNestedTermsAndMissingAggs from './mockESData/mockNestedTermsAndMissingAggs'; const mockPing = () => { nock(config.esConfig.host) @@ -350,7 +350,7 @@ const setup = () => { mockNumericAggsGlobalStats(); mockHistogramFixWidth(); mockHistogramFixBinCount(); - mockNestedAggs(); + mockNestedTermsAndMissingAggs(); }; export default setup; diff --git a/src/server/__mocks__/mockESData/mockNestedAggs.js b/src/server/__mocks__/mockESData/mockNestedTermsAndMissingAggs.js similarity index 98% rename from src/server/__mocks__/mockESData/mockNestedAggs.js rename to src/server/__mocks__/mockESData/mockNestedTermsAndMissingAggs.js index 5660ccf2..6195ab8b 100644 --- a/src/server/__mocks__/mockESData/mockNestedAggs.js +++ b/src/server/__mocks__/mockESData/mockNestedTermsAndMissingAggs.js @@ -1,6 +1,6 @@ import mockSearchEndpoint from './utils'; -const mockNestedAggs = () => { +const mockNestedTermsAndMissingAggs = () => { // only missing fields in nestedAggFields variables const missingAggsQuery = { size: 0, @@ -290,4 +290,4 @@ const mockNestedAggs = () => { mockSearchEndpoint(combinedAggsQuery, combinedTermsAggs); }; -export default mockNestedAggs; +export default mockNestedTermsAndMissingAggs; diff --git a/src/server/es/__tests__/aggs.test.js b/src/server/es/__tests__/aggs.test.js index dca959b0..076a4279 100644 --- a/src/server/es/__tests__/aggs.test.js +++ b/src/server/es/__tests__/aggs.test.js @@ -863,7 +863,7 @@ describe('could aggregate for numeric fields, fixed bin count', () => { }); }); -// see /src/server/__mocks__/mockESData/mockNestedAggs.js for mock results +// see /src/server/__mocks__/mockESData/mockNestedTermsAndMissingAggs.js for mock results describe('could only aggregate to find missing fields (both existing and non-existing fields)', () => { test('nested missing-only aggregation', async () => { await esInstance.initialize(); From 90502bc6f42b3368e258b43de3b41c0f0980416d Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 16:11:43 -0500 Subject: [PATCH 15/41] feat/unit tests --- src/server/__mocks__/mockDataFromES.js | 2 + .../__mocks__/mockESData/mockNestedAggs.js | 191 ++++++++++++++++++ src/server/es/__tests__/aggs.test.js | 100 +++++++++ 3 files changed, 293 insertions(+) create mode 100644 src/server/__mocks__/mockESData/mockNestedAggs.js diff --git a/src/server/__mocks__/mockDataFromES.js b/src/server/__mocks__/mockDataFromES.js index dc420341..0ff0bf4e 100644 --- a/src/server/__mocks__/mockDataFromES.js +++ b/src/server/__mocks__/mockDataFromES.js @@ -5,6 +5,7 @@ import mockNumericAggsGlobalStats from './mockESData/mockNumericAggsGlobalStats' import mockHistogramFixWidth from './mockESData/mockNumericHistogramFixWidth'; import mockHistogramFixBinCount from './mockESData/mockNumericHistogramFixBinCount'; import mockNestedTermsAndMissingAggs from './mockESData/mockNestedTermsAndMissingAggs'; +import mockNestedAggs from './mockESData/mockNestedAggs'; const mockPing = () => { nock(config.esConfig.host) @@ -351,6 +352,7 @@ const setup = () => { mockHistogramFixWidth(); mockHistogramFixBinCount(); mockNestedTermsAndMissingAggs(); + mockNestedAggs(); }; export default setup; diff --git a/src/server/__mocks__/mockESData/mockNestedAggs.js b/src/server/__mocks__/mockESData/mockNestedAggs.js new file mode 100644 index 00000000..23b1aa17 --- /dev/null +++ b/src/server/__mocks__/mockESData/mockNestedAggs.js @@ -0,0 +1,191 @@ +import mockSearchEndpoint from './utils'; + +const mockNestedAggs = () => { + // one-level text + const nestedAggsQuery1 = { + size: 0, + aggs: { + visit_labelNestedAggs: { + nested: { + path: 'visits', + }, + aggs: { + visit_labelAggs: { + composite: { + sources: [ + { + 'visits.visit_label': { + terms: { + field: 'visits.visit_label', + missing: 'no data', + }, + }, + }, + ], + size: 10000, + }, + }, + }, + }, + }, + }; + const fakeNestedAggs1 = { + aggregations: { + visit_labelNestedAggs: { + doc_count: 69, + visit_labelAggs: { + after_key: { + 'visits.visit_label': 'vst_lbl_3', + }, + buckets: [ + { + key: { + 'visits.visit_label': 'vst_lbl_1', + }, + doc_count: 21, + }, + { + key: { + 'visits.visit_label': 'vst_lbl_2', + }, + doc_count: 19, + }, + { + key: { + 'visits.visit_label': 'vst_lbl_3', + }, + doc_count: 29, + }, + { + key: { + 'visits.visit_label': 'no data', + }, + doc_count: 40, + }, + ], + }, + }, + }, + }; + mockSearchEndpoint(nestedAggsQuery1, fakeNestedAggs1); + + // two-level numeric global stats + const nestedAggsQuery2 = { + size: 0, + aggs: { + numeric_nested_aggs: { + nested: { + path: 'visits.follow_ups', + }, + aggs: { + numeric_aggs_stats: { + stats: { + field: 'visits.follow_ups.days_to_follow_up', + }, + }, + }, + }, + }, + }; + const fakeNestedAggs2 = { + aggregations: { + numeric_nested_aggs: { + doc_count: 69, + numeric_aggs_stats: { + count: 69, + min: 1.0, + max: 3.0, + avg: 2.1159420289855073, + sum: 146.0, + }, + }, + }, + }; + mockSearchEndpoint(nestedAggsQuery2, fakeNestedAggs2); + + // two-level numeric fixed bin width + const nestedAggsQuery3 = { + size: 0, + aggs: { + numeric_nested_aggs: { + nested: { + path: 'visits.follow_ups', + }, + aggs: { + numeric_aggs_stats: { + stats: { + field: 'visits.follow_ups.days_to_follow_up', + }, + }, + numeric_aggs: { + histogram: { + field: 'visits.follow_ups.days_to_follow_up', + interval: 1, + }, + aggs: { + numeric_item_aggs_stats: { + stats: { + field: 'visits.follow_ups.days_to_follow_up', + }, + }, + }, + }, + }, + }, + }, + }; + const fakeNestedAggs3 = { + aggregations: { + numeric_nested_aggs: { + doc_count: 69, + numeric_aggs: { + buckets: [ + { + key: 1.0, + doc_count: 21, + numeric_item_aggs_stats: { + count: 21, + min: 1.0, + max: 1.0, + avg: 1.0, + sum: 21.0, + }, + }, + { + key: 2.0, + doc_count: 19, + numeric_item_aggs_stats: { + count: 19, + min: 2.0, + max: 2.0, + avg: 2.0, + sum: 38.0, + }, + }, + { + key: 3.0, + doc_count: 29, + numeric_item_aggs_stats: { + count: 29, + min: 3.0, + max: 3.0, + avg: 3.0, + sum: 87.0, + }, + }, + ], + }, + numeric_aggs_stats: { + count: 69, + min: 1.0, + max: 3.0, + avg: 2.1159420289855073, + sum: 146.0, + }, + }, + }, + }; + mockSearchEndpoint(nestedAggsQuery3, fakeNestedAggs3); +}; + +export default mockNestedAggs; diff --git a/src/server/es/__tests__/aggs.test.js b/src/server/es/__tests__/aggs.test.js index 076a4279..d8773f1f 100644 --- a/src/server/es/__tests__/aggs.test.js +++ b/src/server/es/__tests__/aggs.test.js @@ -1099,3 +1099,103 @@ describe('could only aggregate to find missing fields (both existing and non-exi expect(result).toEqual(expectedResults); }); }); + +// see /src/server/__mocks__/mockESData/mockNestedAggs.js for mock results +describe('could aggregate for one-level nested text fields', () => { + test('one-level nested text aggregation', async () => { + await esInstance.initialize(); + const field = 'visit_label'; + const nestedPath = 'visits'; + const result = await textAggregation( + { esInstance, esIndex, esType }, + { field, nestedPath }, + ); + const expectedResults = [ + { + key: 'vst_lbl_3', + count: 29, + }, + { + key: 'vst_lbl_1', + count: 21, + }, + { + key: 'vst_lbl_2', + count: 19, + }, + { + key: 'no data', + count: 40, + }, // missing data always at end + ]; + expect(result).toEqual(expectedResults); + }); + + test('two-level nested numeric aggregation -- global stats', async () => { + await esInstance.initialize(); + const field = 'days_to_follow_up'; + const nestedPath = 'visits.follow_ups'; + const result = await numericGlobalStats( + { esInstance, esIndex, esType }, + { field, nestedPath }, + ); + const expectedResults = { + key: [ + 1, + 3, + ], + count: 69, + min: 1.0, + max: 3.0, + avg: 2.1159420289855073, + sum: 146.0, + }; + expect(result).toEqual(expectedResults); + }); + + test('two-level nested numeric aggregation -- fixed bin width', async () => { + await esInstance.initialize(); + const field = 'days_to_follow_up'; + const nestedPath = 'visits.follow_ups'; + const result = await numericHistogramWithFixedRangeStep( + { esInstance, esIndex, esType }, + { field, rangeStep: 1, nestedPath }, + ); + const expectedResults = [ + { + key: [ + 1, + 2, + ], + count: 21, + max: 1, + min: 1, + sum: 21, + avg: 1, + }, + { + key: [ + 2, + 3, + ], + count: 19, + max: 2, + min: 2, + sum: 38, + avg: 2, + }, + { + key: [ + 3, + 4, + ], + count: 29, + max: 3, + min: 3, + sum: 87, + avg: 3, + }, + ]; + expect(result).toEqual(expectedResults); + }); +}); From 6f194241fd68c2491113f985258cc2de2d6c5b2c Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 17:15:01 -0500 Subject: [PATCH 16/41] feat/new doc --- README.md | 11 ++- devHelper/README.md | 18 ++++- doc/queries.md | 187 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 205 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 88ef1be2..177dce62 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,9 @@ Please see [this doc](https://github.com/uc-cdis/guppy/blob/master/doc/queries.m Run `npm start` to start server at port 80. +### Local Deployment and Development: +Guppy has some helper script to help a developer to set up a local ES service using Docker, generate some example ES indices for testing, and pop mock data into these example ES indices. Please refer to [the DEV Helper doc](https://github.com/uc-cdis/guppy/blob/master/devHelper/README.md) for more information. + ### Configurations: Before launch, we need to write config and tell Guppy which elasticsearch indices and which auth control field to use. You could put following as your config files: @@ -40,7 +43,7 @@ export GUPPY_CONFIG_FILEPATH=./example_config.json npm start ``` -#### Authorization +### Authorization: Guppy connects Arborist for authorization. The `auth_filter_field` item in your config file is the field used for authorization. You could set the endpoint by: @@ -54,7 +57,7 @@ skip all authorization steps. But if you just want to mock your own authorizatio behavior for local test without Arborist, just set `INTERNAL_LOCAL_TEST=true`. Please look into `/src/server/auth/utils.js` for more details. -#### Tier access +### Tiered Access: Guppy also support 3 different levels of tier access, by setting `TIER_ACCESS_LEVEL`: - `private` by default: only allows access to authorized resources - `regular`: allows all kind of aggregation (with limitation for unauthorized resources), but forbid access to raw data without authorization @@ -94,7 +97,7 @@ export TIER_ACCESS_LIMIT=100 npm start ``` -> ##### Tier Access Sensitive Record Exclusion +> #### Tier Access Sensitive Record Exclusion > It is possible to configure Guppy to hide some records from being returned in `_aggregation` queries when Tiered Access is enabled (tierAccessLevel: "regular"). > The purpose of this is to "hide" information about certain sensitive resources, essentially making this an escape hatch from Tiered Access. > Crucially, Sensitive Record Exclusion only applies to records which the user does not have access to. If the user has access to a record, it will @@ -104,5 +107,5 @@ npm start > > (E.g., `"tier_access_sensitive_record_exclusion_field": "sensitive"` in the Guppy config tells Guppy to look for a field in the ES index called `sensitive`, and to exclude records in the ES index which have `sensitive: "true"`) -#### Download endpoint +### Download Endpoint: Guppy has another special endpoint `/download` for just fetching raw data from elasticsearch. please see [here](https://github.com/uc-cdis/guppy/blob/master/doc/download.md) for more details. diff --git a/devHelper/README.md b/devHelper/README.md index 91f05d2f..8417af9f 100644 --- a/devHelper/README.md +++ b/devHelper/README.md @@ -7,12 +7,24 @@ docker-compose -f ./esearch.yml up -d ``` ## Step.2 import mock data into elasticsearch index -Go to the repository's root directory and run the following command. - +Guppy has a helper function to generate mock data for a specific ES index. For example, to generate data for an ES index called `gen3-dev-subject` with document type `subject`, run the following command: ``` -sh ./generate_data.sh +npm run gendata -- -i gen3-dev-subject -d subject ``` +Here is a complete list of arguments that `npm run gendata` would take +| argument | description | default | +|------------------------------|--------------------------------------------------------|-------------------| +| -v, --verbose | verbose output | false | +| -h, --hostname `` | elasticsearch hostname | http://localhost | +| -p, --port `` | elasticsearch port | 9200 | +| -i, --index `` | elasticsearch index | undefined | +| -d, --doc_type `` | document type | undefined | +| -n, --number `` | number of documents to generate | 500 | +| -r, --random | generate random number of document up to `number` | false | + +Also, there are some predefined values in `/genData/valueBank.json`. + ## Step.3 start server for developing server side code Go to repo root directory, and run diff --git a/doc/queries.md b/doc/queries.md index 9d72d7ff..8c61e60b 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -7,6 +7,7 @@ Table of Contents - [Text Aggregation](#aggs-text) - [Numeric Aggregation](#aggs-numeric) - [Nested Aggregation](#aggs-nested) + - [Sub-aggregations](#aggs-sub) - [Filters](#filter) - [Basic Filter Unit](#filter-unit) - [Text Search Unit in Filter](#filter-search) @@ -395,11 +396,189 @@ Result: ### 4. Nested Aggregation -Guppy supports nested aggregations (sub-aggregations) for fields. Currently Guppy only supports two-level-sub-aggregations. +:heavy_exclamation_mark: This section is for performing aggregations on document which contains nested fields. For information about Guppy supporting nested sub-aggregations such as terms aggregation and missing aggregation, please refer to [Sub-aggregations](#aggs-sub) -There are two types of nested aggregations that is supported by Guppy: terms aggregation and missing aggregation, user can mix-and-match the using of both aggregations. +Guppy supports performing aggregations (both text and numeric aggregations) on nested fields. +> Suppose the ES index has a mapping as the following: +>``` +> "mappings": { +> "subject": { +> "properties": { +> "subject_id": { "type": "keyword" }, +> "visits": { +> "type": "nested", +> "properties": { +> "days_to_visit": { "type": "integer" }, +> "visit_label": { "type": "keyword" }, +> "follow_ups": { +> "type": "nested", +> "properties": { +> "days_to_follow_up": { "type": "integer" }, +> "follow_up_label": { "type": "keyword" }, +> } +> } +> } +> }, +> } +> } +> } +>``` + +An example nested query that Guppy can perform with respect to that ESS index could be: +``` +query: { + _aggregation: { + subject: { + subject_id: { --> normal non-nested aggregation + histogram: { + key + count + } + } + visits: { + visit_label: { --> one-level nested text aggregation + histogram: { + key + count + } + } + follow_ups: { + days_to_follow_up: { --> two-level nested numeric aggregation + histogram(rangeStep: 1) { + key + count + } + } + } + } + } + } +} +``` + +Result: +``` +{ + "data": { + "_aggregation": { + "subject": { + "subject_id": { + "histogram": [ + { + "key": "subject_id_1", + "count": 24 + }, + { + "key": "subject_id_2", + "count": 24 + }, + { + "key": "subject_id_3", + "count": 21 + } + ] + }, + "visits": { + "visit_label": { + "histogram": [ + { + "key": "vst_lbl_3", + "count": 29 + }, + { + "key": "vst_lbl_1", + "count": 21 + }, + { + "key": "vst_lbl_2", + "count": 19 + } + ] + }, + "days_to_visit": { + "histogram": [ + { + "key": [ + 1, + 2 + ], + "count": 21 + }, + { + "key": [ + 2, + 3 + ], + "count": 19 + }, + { + "key": [ + 3, + 4 + ], + "count": 29 + } + ] + }, + "follow_ups": { + "follow_up_label": { + "histogram": [ + { + "key": "flup_lbl_3", + "count": 29 + }, + { + "key": "flup_lbl_1", + "count": 21 + }, + { + "key": "flup_lbl_2", + "count": 19 + } + ] + }, + "days_to_follow_up": { + "histogram": [ + { + "key": [ + 1, + 2 + ], + "count": 21 + }, + { + "key": [ + 2, + 3 + ], + "count": 19 + }, + { + "key": [ + 3, + 4 + ], + "count": 29 + } + ] + } + } + } + } + } + } +} +``` + + + + +### 5. Sub-aggregations +Guppy supports sub-aggregations for fields. Currently Guppy only supports two-level-sub-aggregations. + +There are two types of sub-aggregations that is supported by Guppy: terms aggregation and missing aggregation, user can mix-and-match the using of both aggregations. -#### 4.1. Terms Aggregation +#### 5.1. Terms Aggregation Terms aggregation requires a single `field` for parent aggregation and an array of fields for the nested sub-aggregations. The sub-aggregations will be computed for the buckets which their parent aggregation generates. It is intended to show for each of the `key` of the single `field` in the parent aggregation, what is the distribution of each element from the array of fields in the sub-aggregations. Results are wrapped by keywords `field` and also `key` and `count` for that `field`, example: @@ -521,7 +700,7 @@ Result: } ``` -#### 4.2. Missing Aggregation +#### 5.2. Missing Aggregation Missing aggregation also requires a single `field` for parent aggregation and an array of fields for the nested sub-aggregations. The sub-aggregations will be computed for the buckets which their parent aggregation generates. It is intended to show for each of the `key` of the single `field` in the parent aggregation, how many elements from the array of fields in the sub-aggregation are missing from it. Results are wrapped by keywords `field` and `count`, example: From 191c2f5bcbda2c46e85b873bae98a39ac50806cc Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 17:37:11 -0500 Subject: [PATCH 17/41] chore/comments --- doc/queries.md | 2 +- src/server/es/aggs.js | 18 +++++++++++++++++- src/server/resolvers.js | 14 +++++++++++--- src/server/schema.js | 5 +++++ 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/doc/queries.md b/doc/queries.md index 8c61e60b..e54c007c 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -396,7 +396,7 @@ Result: ### 4. Nested Aggregation -:heavy_exclamation_mark: This section is for performing aggregations on document which contains nested fields. For information about Guppy supporting nested sub-aggregations such as terms aggregation and missing aggregation, please refer to [Sub-aggregations](#aggs-sub) +:bangbang: **This section is for performing aggregations on document which contains nested fields. For information about Guppy supporting nested sub-aggregations such as terms aggregation and missing aggregation, please refer to [Sub-aggregations](#aggs-sub)** Guppy supports performing aggregations (both text and numeric aggregations) on nested fields. > Suppose the ES index has a mapping as the following: diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index 0aa927b2..594a84e8 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -134,6 +134,9 @@ export const appendAdditionalRangeQuery = (field, oldQuery, rangeStart, rangeEnd * @param {object} param1.filterSelf - only valid if to avoid filtering the same aggregation field * @param {object} param1.defaultAuthFilter - once param1.filter is empty, * use this auth related filter instead + * @param {object} param1.nestedAggFields - fields for sub-aggregations + * (terms and/or missing aggregation) + * @param {object} param1.nestedPath - path info used by nested aggregation * @returns {min, max, sum, count, avg, key} */ export const numericGlobalStats = async ( @@ -216,6 +219,9 @@ export const numericGlobalStats = async ( * @param {object} param1.filterSelf - only valid if to avoid filtering the same aggregation field * @param {object} param1.defaultAuthFilter - once param1.filter is empty, * use this auth related filter instead + * @param {object} param1.nestedAggFields - fields for sub-aggregations + * (terms and/or missing aggregation) + * @param {object} param1.nestedPath - path info used by nested aggregation */ export const numericHistogramWithFixedRangeStep = async ( { @@ -332,6 +338,9 @@ export const numericHistogramWithFixedRangeStep = async ( * @param {object} param1.filterSelf - only valid if to avoid filtering the same aggregation field * @param {object} param1.defaultAuthFilter - once param1.filter is empty, * use this auth related filter instead + * @param {object} param1.nestedAggFields - fields for sub-aggregations + * (terms and/or missing aggregation) + * @param {object} param1.nestedPath - path info used by nested aggregation */ export const numericHistogramWithFixedBinCount = async ( { @@ -404,6 +413,9 @@ export const numericHistogramWithFixedBinCount = async ( * @param {object} param1.filterSelf - only valid if to avoid filtering the same aggregation field * @param {object} param1.defaultAuthFilter - once param1.filter is empty, * use this auth related filter instead + * @param {object} param1.nestedAggFields - fields for sub-aggregations + * (terms and/or missing aggregation) + * @param {object} param1.nestedPath - path info used by nested aggregation */ export const numericAggregation = async ( { @@ -507,6 +519,9 @@ export const numericAggregation = async ( * @param {object} param1.filterSelf - only valid if to avoid filtering the same aggregation field * @param {object} param1.defaultAuthFilter - once param1.filter is empty, * use this auth related filter instead + * @param {object} param1.nestedAggFields - fields for sub-aggregations + * (terms and/or missing aggregation) + * @param {object} param1.nestedPath - path info used by nested aggregation */ export const textAggregation = async ( { @@ -558,6 +573,7 @@ export const textAggregation = async ( aggsObj.aggs = updateAggObjectForMissingFields(nestedAggFields.missingFields, aggsObj.aggs); } + // build up ES query if is nested aggregation if (aggsNestedName) { queryBody.aggs = { [aggsNestedName]: { @@ -604,11 +620,11 @@ export const textAggregation = async ( }, }; } - // log.debug('[textAggregation] queryBody', queryBody); let resultSize; let finalResults = []; /* eslint-disable */ do { + // parse ES query result based on whether is doing nested aggregation or not (if `aggsNestedName` is defined) const result = await esInstance.query(esIndex, esType, queryBody); resultSize = 0; diff --git a/src/server/resolvers.js b/src/server/resolvers.js index 041b04ee..10365749 100644 --- a/src/server/resolvers.js +++ b/src/server/resolvers.js @@ -124,6 +124,7 @@ const getFieldAggregationResolverMappingsByField = (field) => { if (field.type !== 'nested') { return ((parent) => ({ ...parent, field: field.name })); } + // if field is nested type, update nestedPath info with parent's nestedPath and pass down return ((parent) => ({ ...parent, field: field.name, nestedPath: (parent.nestedPath) ? `${parent.nestedPath}.${field.name}` : `${field.name}` })); }; @@ -147,7 +148,7 @@ const getFieldAggregationResolverMappings = (esInstance, esIndex) => { * race * } * _aggregation { - * subject (filter: xx, filterSelf: xx} { ---> `typeAggsQueryResolver` + * subject (filter: xx, filterSelf: xx} { ---> `typeAggsQueryResolver` * _totalCount ---> `aggsTotalQueryResolver` * gender { * histogram { ---> `textHistogramResolver` @@ -162,6 +163,14 @@ const getFieldAggregationResolverMappings = (esInstance, esIndex) => { * count * } * } + * visits { ---> `typeNestedAggregationResolver` (fall-through) + * visit_label { + * histogram { ---> `textHistogramResolver` + * key + * count + * } + * } + * } * } * } * _mapping { @@ -199,11 +208,10 @@ const getResolver = (esConfig, esInstance) => { const typeNestedAggregationResolvers = esConfig.indices.reduce((acc, cfg) => { const { fields } = esInstance.getESFields(cfg.index); const nestedFieldsArray = fields.filter((entry) => entry.type === 'nested'); - // log.debug('[resolver.typeNestedAggregationResolvers] nestedFieldsArray', nestedFieldsArray); + // similar level by level "flatten" logic as for schema while (nestedFieldsArray.length > 0) { const nestedFields = nestedFieldsArray.shift(); - // log.debug('[resolver.typeNestedAggregationResolvers] nestedFields', nestedFields); const typeNestedAggsName = `NestedHistogramFor${firstLetterUpperCase(nestedFields.name)}`; acc[typeNestedAggsName] = {}; if (nestedFields.type === 'nested' && nestedFields.nestedProps) { diff --git a/src/server/schema.js b/src/server/schema.js index c1600f07..0ef80a8b 100644 --- a/src/server/schema.js +++ b/src/server/schema.js @@ -179,6 +179,11 @@ export const getAggregationSchema = (esConfig) => ` } `; +/** + * This is the function for getting schemas for a single nested index. + * Multi-level nested fields are "flattened" level by level. + * For each level of nested field a new type in schema is created. + */ const getAggregationSchemaForOneNestedIndex = (esInstance, esIndex) => { const fieldGQLTypeMap = getFieldGQLTypeMapForOneIndex(esInstance, esIndex); const fieldAggsNestedTypeMap = fieldGQLTypeMap.filter((f) => f.esType === 'nested'); From 13ebe53fae1ec6a248fc6d4aea88c9c8d3ca803b Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 17:38:32 -0500 Subject: [PATCH 18/41] update package --- package-lock.json | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index d292a511..95513d92 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "@gen3/guppy", - "version": "0.4.0", + "version": "0.5.0", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index 65bdf2e2..bb3d0b6c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@gen3/guppy", - "version": "0.4.0", + "version": "0.5.0", "description": "Server that support GraphQL queries on data from elasticsearch", "main": "src/server/server.js", "directories": { From bb22c6ae5be3d1c4e81cd753cc13086c17707f3e Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 17:45:40 -0500 Subject: [PATCH 19/41] fix/doc --- doc/queries.md | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/doc/queries.md b/doc/queries.md index e54c007c..2c0bf9bf 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -495,48 +495,7 @@ Result: } ] }, - "days_to_visit": { - "histogram": [ - { - "key": [ - 1, - 2 - ], - "count": 21 - }, - { - "key": [ - 2, - 3 - ], - "count": 19 - }, - { - "key": [ - 3, - 4 - ], - "count": 29 - } - ] - }, "follow_ups": { - "follow_up_label": { - "histogram": [ - { - "key": "flup_lbl_3", - "count": 29 - }, - { - "key": "flup_lbl_1", - "count": 21 - }, - { - "key": "flup_lbl_2", - "count": 19 - } - ] - }, "days_to_follow_up": { "histogram": [ { From fd1c2b2afea81a744267f104f6c5ccee3e3f1df5 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 17:56:24 -0500 Subject: [PATCH 20/41] fix/bot alert --- src/server/es/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/es/index.js b/src/server/es/index.js index 020c23ae..71dd9201 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -287,7 +287,7 @@ class ES { if (value.type !== 'nested') { r = { name: key, type: value.type }; } else { - r = buildNestedField(key, value, r); + r = buildNestedField(key, value); } return r; }), From af2877c435f88020cd720af6048bbbbc45043786 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Apr 2020 18:51:54 -0500 Subject: [PATCH 21/41] chore/doc update --- doc/queries.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/queries.md b/doc/queries.md index 2c0bf9bf..53d4f6c5 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -12,6 +12,7 @@ Table of Contents - [Basic Filter Unit](#filter-unit) - [Text Search Unit in Filter](#filter-search) - [Combined Filters](#filter-comb) + - [Nested Filter](#filter-nested) - [Some other queries and arguments](#other) @@ -398,7 +399,7 @@ Result: ### 4. Nested Aggregation :bangbang: **This section is for performing aggregations on document which contains nested fields. For information about Guppy supporting nested sub-aggregations such as terms aggregation and missing aggregation, please refer to [Sub-aggregations](#aggs-sub)** -Guppy supports performing aggregations (both text and numeric aggregations) on nested fields. +Guppy supports performing aggregations (both text and numeric aggregations) on nested fields. For information about using nested fields inside filters, see [Nested Filter](#filter-nested) > Suppose the ES index has a mapping as the following: >``` > "mappings": { @@ -927,7 +928,7 @@ In future Guppy will support `SQL` like syntax for filter, like ` {"filter": "(race = 'hispanic' OR race='asian') AND (file_count >= 15 AND file_count <= 75) AND project = 'Proj-1' AND gender = 'female'"} `. - + ### Nested filter Guppy now supports query on nested ElasticSearch schema. The way to query and filter the nested index is similar to the ES query. @@ -971,6 +972,7 @@ Assuming that there is `File` node nested inside `subject`. The nested query wil ElasticSearch only support the nested filter on the level of document for returning data. It means that the filter `file_count >=15` and `file_count<=75` will return the whole document having a `file_count` in the range of `[15, 75]`. The returned data will not filter the nested `file_count`(s) that are out of that range for that document. + ## Some other queries and arguments ### Mapping query From 84975f4010e5a8f8ffb2d2a03f121d17f040d171 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 14 Apr 2020 07:45:58 -0500 Subject: [PATCH 22/41] fix/typo --- doc/queries.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/queries.md b/doc/queries.md index 53d4f6c5..65600df3 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -425,12 +425,12 @@ Guppy supports performing aggregations (both text and numeric aggregations) on n > } >``` -An example nested query that Guppy can perform with respect to that ESS index could be: +An example nested query that Guppy can perform with respect to that ES index could be: ``` query: { _aggregation: { subject: { - subject_id: { --> normal non-nested aggregation + subject_id: { --> regular non-nested aggregation histogram: { key count From 2fa9965d1f2ca15043ece9a74a0053d03fdb6286 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 14 Apr 2020 09:14:57 -0500 Subject: [PATCH 23/41] fix/nested query --- src/server/__tests__/schema.test.js | 8 ++++---- src/server/schema.js | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/server/__tests__/schema.test.js b/src/server/__tests__/schema.test.js index ea04620e..5ae12551 100644 --- a/src/server/__tests__/schema.test.js +++ b/src/server/__tests__/schema.test.js @@ -52,7 +52,7 @@ describe('Schema', () => { const expectedTypesSchemas = ` type Subject { gen3_resource_path: String, - visits:[NestedVisits], + visits:visits, gender: String, file_count: Int, name: String, @@ -61,12 +61,12 @@ describe('Schema', () => { whatever_lab_result_value: Float, _matched:[MatchedItem] } - type NestedVisits { + type visits { days_to_visit:Int, visit_label:String, - follow_ups:[NestedFollow_ups], + follow_ups:follow_ups, } - type NestedFollow_ups { + type follow_ups { days_to_follow_up:Int, follow_up_label:String, } diff --git a/src/server/schema.js b/src/server/schema.js index 0ef80a8b..81eef29f 100644 --- a/src/server/schema.js +++ b/src/server/schema.js @@ -27,7 +27,7 @@ const getGQLType = (esInstance, esIndex, field, esFieldType) => { return `[${gqlType}]`; } if (esFieldType === 'nested') { - return `[Nested${firstLetterUpperCase(field)}]`; + return `${field}`; } return gqlType; }; @@ -104,7 +104,7 @@ const getTypeSchemaForOneIndex = (esInstance, esIndex, esType) => { const esFieldType = fieldESTypeMap[fieldKey].type; if (esFieldType === 'nested' && !existingFields.has(fieldKey)) { const { properties } = fieldESTypeMap[fieldKey]; - queueTypes.push({ type: `Nested${firstLetterUpperCase(fieldKey)}`, properties }); + queueTypes.push({ type: `${fieldKey}`, properties }); existingFields.add(fieldKey); } }); @@ -121,7 +121,7 @@ const getTypeSchemaForOneIndex = (esInstance, esIndex, esType) => { const gqlTypes = getFieldGQLTypeMapForProperties(esInstance, esIndex, t.properties); gqlTypes.forEach((entry) => { if (entry.esType === 'nested' && !existingFields.has(entry.field)) { - queueTypes.push({ type: `Nested${firstLetterUpperCase(entry.field)}`, properties: entry.properties }); + queueTypes.push({ type: `${entry.field}`, properties: entry.properties }); existingFields.add(entry.field); fieldToArgs[entry.field] = getArgsByField(entry.field, entry.properties); } From 8c1194f70020e136a7a7aaf383219911e4aad220 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 14 Apr 2020 09:27:54 -0500 Subject: [PATCH 24/41] fix/doc --- devHelper/README.md | 11 ++++++++++- generate_data.sh | 8 ++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/devHelper/README.md b/devHelper/README.md index 8417af9f..31fe7e10 100644 --- a/devHelper/README.md +++ b/devHelper/README.md @@ -7,7 +7,16 @@ docker-compose -f ./esearch.yml up -d ``` ## Step.2 import mock data into elasticsearch index -Guppy has a helper function to generate mock data for a specific ES index. For example, to generate data for an ES index called `gen3-dev-subject` with document type `subject`, run the following command: +Go to the repository's root directory and run the following command. + +``` +sh ./generate_data.sh +``` + +Doing so will automatically generate 3 ES indices (1 for `subject`, 1 for `file`, and 1 for `config`) and populate 100 records into each index. + +### Manually generate more mock data for a specific elasticsearch index (optional) +In case we want more mock data, Guppy has a helper function to generate mock data for a specific ES index. For example, to generate data for an ES index called `gen3-dev-subject` with document type `subject`, run the following command: ``` npm run gendata -- -i gen3-dev-subject -d subject ``` diff --git a/generate_data.sh b/generate_data.sh index d3ff6ee9..61aa39d7 100755 --- a/generate_data.sh +++ b/generate_data.sh @@ -2,15 +2,15 @@ source ./devHelper/scripts/commands.sh -CASE_INDEX_NAME=gen3-dev-subject +SUBJECT_INDEX_NAME=gen3-dev-subject FILE_INDEX_NAME=gen3-dev-file CONFIG_INDEX_NAME=gen3-dev-config DATA_COUNT=100 -es_delete_all $CASE_INDEX_NAME +es_delete_all $SUBJECT_INDEX_NAME es_delete_all $FILE_INDEX_NAME es_delete_all $CONFIG_INDEX_NAME -es_setup_index $CASE_INDEX_NAME $FILE_INDEX_NAME $CONFIG_INDEX_NAME -npm run gendata -- -i $CASE_INDEX_NAME -d subject -n $DATA_COUNT +es_setup_index $SUBJECT_INDEX_NAME $FILE_INDEX_NAME $CONFIG_INDEX_NAME +npm run gendata -- -i $SUBJECT_INDEX_NAME -d subject -n $DATA_COUNT npm run gendata -- -i $FILE_INDEX_NAME -d file -n $DATA_COUNT npm run gendata -- -i $CONFIG_INDEX_NAME -d config -n $DATA_COUNT From a1ee95cbdd4afc402ea8ced797abd82a94a1cb59 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 14 Apr 2020 09:29:57 -0500 Subject: [PATCH 25/41] fix/doc --- devHelper/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devHelper/README.md b/devHelper/README.md index 31fe7e10..cac00c09 100644 --- a/devHelper/README.md +++ b/devHelper/README.md @@ -1,7 +1,7 @@ # How to generate mock data and start developing in your local ## Step.1 start elasticsearch - +In this directory `(/devHelper)`, do: ``` docker-compose -f ./esearch.yml up -d ``` From dbe782c9947cbe528bf5bd3ba26087c3237456ef Mon Sep 17 00:00:00 2001 From: Mingfei Shao <2475897+mfshao@users.noreply.github.com> Date: Tue, 14 Apr 2020 17:04:27 -0500 Subject: [PATCH 26/41] Update doc/queries.md Co-Authored-By: Zakir Gowani --- doc/queries.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/queries.md b/doc/queries.md index 65600df3..e472030d 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -397,7 +397,7 @@ Result: ### 4. Nested Aggregation -:bangbang: **This section is for performing aggregations on document which contains nested fields. For information about Guppy supporting nested sub-aggregations such as terms aggregation and missing aggregation, please refer to [Sub-aggregations](#aggs-sub)** +:bangbang: **This section is for performing aggregations on documents which contain nested fields. For information about Guppy supporting nested sub-aggregations such as terms aggregation and missing aggregation, please refer to [Sub-aggregations](#aggs-sub)** Guppy supports performing aggregations (both text and numeric aggregations) on nested fields. For information about using nested fields inside filters, see [Nested Filter](#filter-nested) > Suppose the ES index has a mapping as the following: @@ -1226,4 +1226,3 @@ Result: } } ``` - From 828a57a42c2576006ba584a187f5482a1a092406 Mon Sep 17 00:00:00 2001 From: Mingfei Shao <2475897+mfshao@users.noreply.github.com> Date: Tue, 14 Apr 2020 17:04:34 -0500 Subject: [PATCH 27/41] Update genData/genData.js Co-Authored-By: Zakir Gowani --- genData/genData.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genData/genData.js b/genData/genData.js index 8a55f7ca..856ec391 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -132,7 +132,7 @@ async function run() { chunks.forEach((c) => { client.bulk({ refresh: true, body: c }).then((res) => { res.body.items.forEach((item) => console.log(item)); - console.log(`Successfully insert ${c.length / 2} items`); + console.log(`Successfully inserted ${c.length / 2} items`); }).catch((res) => { if (res.body.errors) { const erroredDocuments = []; From 6e2db0a7b49418e834dab654caedd5acfaade862 Mon Sep 17 00:00:00 2001 From: Mingfei Shao <2475897+mfshao@users.noreply.github.com> Date: Tue, 14 Apr 2020 17:04:39 -0500 Subject: [PATCH 28/41] Update generate_data.sh Co-Authored-By: Zakir Gowani --- generate_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generate_data.sh b/generate_data.sh index 61aa39d7..e9abf0c1 100755 --- a/generate_data.sh +++ b/generate_data.sh @@ -14,4 +14,4 @@ npm run gendata -- -i $SUBJECT_INDEX_NAME -d subject -n $DATA_COUNT npm run gendata -- -i $FILE_INDEX_NAME -d file -n $DATA_COUNT npm run gendata -- -i $CONFIG_INDEX_NAME -d config -n $DATA_COUNT -echo "Successfully generate ${DATA_COUNT} data records" +echo "Successfully generated ${DATA_COUNT} data records" From bd2099194259bddc475c63cb8a8f62e932f229d1 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Wed, 15 Apr 2020 15:12:47 -0500 Subject: [PATCH 29/41] chore/remove unnecessary printouts --- genData/genData.js | 4 ---- src/server/es/index.js | 1 - 2 files changed, 5 deletions(-) diff --git a/genData/genData.js b/genData/genData.js index 856ec391..d2e5a035 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -20,8 +20,6 @@ program program.parse(process.argv); -// console.log(program); - const esHost = `${program.hostname}:${program.port}`; const esIndex = program.index; @@ -119,8 +117,6 @@ async function run() { return dCopy; }); - console.log('sample: ', sample); - const body = sample.flatMap((d) => [{ index: { _index: esIndex, diff --git a/src/server/es/index.js b/src/server/es/index.js index 71dd9201..9ccbda43 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -329,7 +329,6 @@ class ES { const queryBody = { from: offset }; if (typeof filter !== 'undefined') { queryBody.query = getFilterObj(this, esIndex, filter); - log.debug('[ES] filterObj: ', queryBody.query); } queryBody.sort = getESSortBody(sort, this, esIndex); if (typeof size !== 'undefined') { From 4923f7d8b4edae5a44b43e0037ad2869e896de06 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Wed, 15 Apr 2020 17:27:39 -0500 Subject: [PATCH 30/41] chore/explaination for nested vs sub aggs --- doc/queries.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/queries.md b/doc/queries.md index e472030d..914eaf5c 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -534,9 +534,26 @@ Result: ### 5. Sub-aggregations +:warning: **This section is for performing sub-aggregations (terms and missing aggregations) on documents. This section was incorrectly named as "Nested Aggregation" before Guppy 0.5.0 and has been corrected since then.** + +>The difference between Nested Aggregations and Sub-aggregations is that Nested Aggregations are performed on multi-level nested fields, while the sub-aggregations are preformed on different fields within a same level. + Guppy supports sub-aggregations for fields. Currently Guppy only supports two-level-sub-aggregations. There are two types of sub-aggregations that is supported by Guppy: terms aggregation and missing aggregation, user can mix-and-match the using of both aggregations. +For more information about ES terms aggregation and missing aggregation, please read: [Terms Aggregation](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html) and [Missing Aggregation](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-missing-aggregation.html) + +> For examples in the following sections, assume the ES index has a mapping as the following: +>``` +> "mappings": { +> "subject": { +> "properties": { +> "project": { "type": "keyword" }, +> "gender": { "type": "keyword" }, +> }, +> } +> } +>``` #### 5.1. Terms Aggregation Terms aggregation requires a single `field` for parent aggregation and an array of fields for the nested sub-aggregations. The sub-aggregations will be computed for the buckets which their parent aggregation generates. It is intended to show for each of the `key` of the single `field` in the parent aggregation, what is the distribution of each element from the array of fields in the sub-aggregations. From 4e319fc3a252d908c718f9b71aa5d135397982a0 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Wed, 15 Apr 2020 17:30:14 -0500 Subject: [PATCH 31/41] chore/move chapt --- doc/queries.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/queries.md b/doc/queries.md index 914eaf5c..6511a75d 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -399,6 +399,8 @@ Result: ### 4. Nested Aggregation :bangbang: **This section is for performing aggregations on documents which contain nested fields. For information about Guppy supporting nested sub-aggregations such as terms aggregation and missing aggregation, please refer to [Sub-aggregations](#aggs-sub)** +>The difference between Nested Aggregations and Sub-aggregations is that Nested Aggregations are performed on multi-level nested fields, while the sub-aggregations are preformed on different fields within a same level. + Guppy supports performing aggregations (both text and numeric aggregations) on nested fields. For information about using nested fields inside filters, see [Nested Filter](#filter-nested) > Suppose the ES index has a mapping as the following: >``` @@ -536,8 +538,6 @@ Result: ### 5. Sub-aggregations :warning: **This section is for performing sub-aggregations (terms and missing aggregations) on documents. This section was incorrectly named as "Nested Aggregation" before Guppy 0.5.0 and has been corrected since then.** ->The difference between Nested Aggregations and Sub-aggregations is that Nested Aggregations are performed on multi-level nested fields, while the sub-aggregations are preformed on different fields within a same level. - Guppy supports sub-aggregations for fields. Currently Guppy only supports two-level-sub-aggregations. There are two types of sub-aggregations that is supported by Guppy: terms aggregation and missing aggregation, user can mix-and-match the using of both aggregations. From 9f897e06b9a6d9d04d45655759e923d3b025dabd Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Wed, 15 Apr 2020 17:31:22 -0500 Subject: [PATCH 32/41] chore/missing line break --- doc/queries.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/queries.md b/doc/queries.md index 6511a75d..12c9bb7e 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -541,6 +541,7 @@ Result: Guppy supports sub-aggregations for fields. Currently Guppy only supports two-level-sub-aggregations. There are two types of sub-aggregations that is supported by Guppy: terms aggregation and missing aggregation, user can mix-and-match the using of both aggregations. + For more information about ES terms aggregation and missing aggregation, please read: [Terms Aggregation](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html) and [Missing Aggregation](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-missing-aggregation.html) > For examples in the following sections, assume the ES index has a mapping as the following: From 2f7e5a5a04def46c26f5cb255a74d8ab24af7dbd Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Thu, 16 Apr 2020 10:59:21 -0500 Subject: [PATCH 33/41] chore/no dir jumpings in doc --- devHelper/README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/devHelper/README.md b/devHelper/README.md index cac00c09..1f13e587 100644 --- a/devHelper/README.md +++ b/devHelper/README.md @@ -1,13 +1,13 @@ # How to generate mock data and start developing in your local ## Step.1 start elasticsearch -In this directory `(/devHelper)`, do: +Go to the repository's root directory, do: ``` -docker-compose -f ./esearch.yml up -d +docker-compose -f ./devHelper/docker/esearch.yml up -d ``` ## Step.2 import mock data into elasticsearch index -Go to the repository's root directory and run the following command. +In the root directory of this repo, run the following command: ``` sh ./generate_data.sh @@ -35,7 +35,7 @@ Here is a complete list of arguments that `npm run gendata` would take Also, there are some predefined values in `/genData/valueBank.json`. ## Step.3 start server for developing server side code -Go to repo root directory, and run +In the root directory of this repo, run: ``` GUPPY_PORT=3000 INTERNAL_LOCAL_TEST=true npm start @@ -45,11 +45,10 @@ The Guppy server will be hosted at [localhost:3000/graphql](http://localhost:300 We use nodemon to start the server, so all code change will be hot applied to the running server in realtime. ## Step.4 start storybook for developing front-end components -Go to repo root directory, and run +In the root directory of this repo, run: ``` npm run storybook ``` [Storybook](https://storybook.js.org/) will be hosted at [localhost:6006](http://localhost:6006). - From 7e1915ee6e1c3c19017845c277d493d517c43283 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Fri, 17 Apr 2020 16:55:09 -0500 Subject: [PATCH 34/41] fix/dont add missing alias to numeric field by default --- src/server/es/aggs.js | 5 ++++- src/server/es/index.js | 2 ++ src/server/resolvers.js | 14 +++++++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index 594a84e8..3cb7dc03 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -536,6 +536,7 @@ export const textAggregation = async ( defaultAuthFilter, nestedAggFields, nestedPath, + numericField, }, ) => { const queryBody = { size: 0 }; @@ -551,7 +552,9 @@ export const textAggregation = async ( } let missingAlias = {}; - if (config.esConfig.aggregationIncludeMissingData) { + // don't add missing alias to numeric field by default + // since the value of missing alias is a string + if (config.esConfig.aggregationIncludeMissingData && !numericField) { missingAlias = { missing: config.esConfig.missingDataAlias }; } const aggsName = `${field}Aggs`; diff --git a/src/server/es/index.js b/src/server/es/index.js index 9ccbda43..9370eb28 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -448,6 +448,7 @@ class ES { defaultAuthFilter, nestedAggFields, nestedPath, + numericField, }) { return esAggregator.textAggregation( { @@ -462,6 +463,7 @@ class ES { defaultAuthFilter, nestedAggFields, nestedPath, + numericField, }, ); } diff --git a/src/server/resolvers.js b/src/server/resolvers.js index 10365749..cdd3aa4f 100644 --- a/src/server/resolvers.js +++ b/src/server/resolvers.js @@ -2,6 +2,7 @@ import GraphQLJSON from 'graphql-type-json'; import { parseResolveInfo } from 'graphql-parse-resolve-info'; import log from './logger'; import { firstLetterUpperCase } from './utils/utils'; +import { esFieldNumericTextTypeMapping, NumericTextTypeTypeEnum } from './es/const'; /** * This is for getting raw data, by specific es index and es type @@ -103,7 +104,7 @@ const textHistogramResolver = async (parent, args, context) => { log.debug('[resolver.textHistogramResolver] args', args); const { esInstance, esIndex, esType, - filter, field, nestedAggFields, filterSelf, accessibility, nestedPath, + filter, field, nestedAggFields, filterSelf, accessibility, nestedPath, numericField, } = parent; log.debug('[resolver.textHistogramResolver] parent', parent); const { authHelper } = context; @@ -117,15 +118,22 @@ const textHistogramResolver = async (parent, args, context) => { defaultAuthFilter, nestedAggFields, nestedPath, + numericField, }); }; const getFieldAggregationResolverMappingsByField = (field) => { + let numericField = false; + if (esFieldNumericTextTypeMapping[field.type] === NumericTextTypeTypeEnum.ES_NUMERIC_TYPE) { + numericField = true; + } if (field.type !== 'nested') { - return ((parent) => ({ ...parent, field: field.name })); + return ((parent) => ({ ...parent, field: field.name, numericField })); } // if field is nested type, update nestedPath info with parent's nestedPath and pass down - return ((parent) => ({ ...parent, field: field.name, nestedPath: (parent.nestedPath) ? `${parent.nestedPath}.${field.name}` : `${field.name}` })); + return ((parent) => ({ + ...parent, field: field.name, nestedPath: (parent.nestedPath) ? `${parent.nestedPath}.${field.name}` : `${field.name}`, numericField, + })); }; const getFieldAggregationResolverMappings = (esInstance, esIndex) => { From 44ec2353708754b9ddc4cbcb40c63db91a939da7 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Sat, 25 Apr 2020 17:29:16 -0500 Subject: [PATCH 35/41] feat/generate mock array and update config index --- devHelper/scripts/commands.sh | 16 ++++++++-- genData/genData.js | 25 ++++++++++++++- genData/types.js | 57 ++++++++++++++++++++++++++++------- genData/valueBank.json | 32 ++++++++++++++++++++ generate_data.sh | 5 ++- 5 files changed, 118 insertions(+), 17 deletions(-) diff --git a/devHelper/scripts/commands.sh b/devHelper/scripts/commands.sh index 569e60ec..d73932ef 100755 --- a/devHelper/scripts/commands.sh +++ b/devHelper/scripts/commands.sh @@ -93,7 +93,13 @@ curl -iv -X PUT "${ESHOST}/${indexName}" \ "auth_resource_path": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, "file_count": { "type": "integer" }, "whatever_lab_result_value": { "type": "float" }, - "some_string_field": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "some_nested_array_field": { + "type": "nested", + "properties": { + "some_integer_inside_nested": { "type": "integer" }, + "some_string_inside_nested": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } } + } + }, "some_integer_field": { "type": "integer" }, "some_long_field": { "type": "long" }, "sensitive": { "type": "keyword" } @@ -133,7 +139,13 @@ curl -iv -X PUT "${ESHOST}/${configIndexName}" \ "number_of_shards" : 1, "number_of_replicas" : 0 } - } + }, + "mappings": { + "_doc": { + "properties": { + "array": { "type": "keyword" } + } + } } } ' diff --git a/genData/genData.js b/genData/genData.js index d2e5a035..6e5319e9 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -15,6 +15,7 @@ program .option('-p, --port ', 'elasticsearch port', '9200') .option('-i, --index ', 'elasticsearch index') .option('-d, --doc_type ', 'document type', null) + .option('-c, --config_index ', 'array config index') .option('-n, --number ', 'number of documents to generate', 500) .option('-r, --random', 'generate random number of document up to "number"', false); @@ -22,6 +23,7 @@ program.parse(process.argv); const esHost = `${program.hostname}:${program.port}`; const esIndex = program.index; +const configIndex = program.config_index; const client = new Client({ node: esHost }); @@ -48,6 +50,8 @@ const schema = { maxItems: max, }; +const arrayFields = []; + const MAX_INT = (2 ** 31) - 1; const MIN_INT = -1 * (2 ** 31); const MAX_LONG = (2 ** 63) - 1; @@ -80,7 +84,7 @@ async function run() { if (m !== undefined) { Object.entries(m.properties).forEach(([key, value]) => { - schema.items.properties[key] = fakerType(value); + schema.items.properties[key] = fakerType(key, value, arrayFields); schema.items.required.push(key); }); } @@ -155,6 +159,25 @@ async function run() { }); const { body: count } = await client.count({ index: esIndex }); console.log(count); + + if (configIndex) { + const data = [ + { + index: { + _index: configIndex, + _type: '_doc', + _id: esIndex, + }, + }, + { + array: arrayFields, + }, + ]; + client.bulk({ refresh: true, body: data }).then((res) => { + res.body.items.forEach((item) => console.log(item)); + console.log('Successfully updated config index'); + }); + } } run().catch((error) => { diff --git a/genData/types.js b/genData/types.js index f669d649..16b1a26c 100644 --- a/genData/types.js +++ b/genData/types.js @@ -1,33 +1,68 @@ -function fakerType(value) { +function fakerType(key, value, arrayFields) { let fieldType; const properties = {}; const required = []; + switch (value.type) { case 'boolean': - fieldType = { type: 'boolean' }; + if (key.includes('array')) { + fieldType = { + type: 'array', items: { type: 'boolean', properties, required }, minItems: 0, maxItems: 10, + }; + arrayFields.push(key); + } else { + fieldType = { type: 'boolean' }; + } break; case 'keyword': - fieldType = { type: 'string', faker: 'name.findName' }; - break; case 'text': - fieldType = { type: 'string', faker: 'name.findName' }; + if (key.includes('array')) { + fieldType = { + type: 'array', + items: { + type: 'string', faker: 'name.findName', properties, required, + }, + minItems: 0, + maxItems: 10, + }; + arrayFields.push(key); + } else { + fieldType = { type: 'string', faker: 'name.findName' }; + } break; case 'float': case 'double': - fieldType = { type: 'number' }; + if (key.includes('array')) { + fieldType = { + type: 'array', items: { type: 'number', properties, required }, minItems: 0, maxItems: 10, + }; + arrayFields.push(key); + } else { + fieldType = { type: 'number' }; + } break; case 'long': case 'integer': - fieldType = { type: 'integer' }; + if (key.includes('array')) { + fieldType = { + type: 'array', items: { type: 'integer', properties, required }, minItems: 0, maxItems: 10, + }; + arrayFields.push(key); + } else { + fieldType = { type: 'integer' }; + } break; case 'nested': - Object.entries(value.properties).forEach(([key, v]) => { - properties[key] = fakerType(v); - required.push(key); + Object.entries(value.properties).forEach(([k, v]) => { + properties[k] = fakerType(k, v); + required.push(k); }); fieldType = { - type: 'array', items: { type: 'object', properties, required }, minItems: 10, maxItems: 10, + type: 'array', items: { type: 'object', properties, required }, minItems: 0, maxItems: 10, }; + if (key.includes('array')) { + arrayFields.push(key); + } break; default: // console.log(value); diff --git a/genData/valueBank.json b/genData/valueBank.json index c246b240..ef73dda1 100644 --- a/genData/valueBank.json +++ b/genData/valueBank.json @@ -36,5 +36,37 @@ "follow_up_label": "flup_lbl_3" } } + ], + "some_nested_array_field": [ + [ + { + "some_integer_inside_nested": 1, + "some_string_inside_nested": "first" + }, + { + "some_integer_inside_nested": 2, + "some_string_inside_nested": "second" + } + ], + [ + { + "some_integer_inside_nested": 3, + "some_string_inside_nested": "third" + }, + { + "some_integer_inside_nested": 4, + "some_string_inside_nested": "forth" + } + ], + [ + { + "some_integer_inside_nested": 5, + "some_string_inside_nested": "fifth" + }, + { + "some_integer_inside_nested": 6, + "some_string_inside_nested": "sixth" + } + ] ] } diff --git a/generate_data.sh b/generate_data.sh index e9abf0c1..88425bd1 100755 --- a/generate_data.sh +++ b/generate_data.sh @@ -10,8 +10,7 @@ es_delete_all $SUBJECT_INDEX_NAME es_delete_all $FILE_INDEX_NAME es_delete_all $CONFIG_INDEX_NAME es_setup_index $SUBJECT_INDEX_NAME $FILE_INDEX_NAME $CONFIG_INDEX_NAME -npm run gendata -- -i $SUBJECT_INDEX_NAME -d subject -n $DATA_COUNT -npm run gendata -- -i $FILE_INDEX_NAME -d file -n $DATA_COUNT -npm run gendata -- -i $CONFIG_INDEX_NAME -d config -n $DATA_COUNT +npm run gendata -- -i $SUBJECT_INDEX_NAME -d subject -n $DATA_COUNT -c $CONFIG_INDEX_NAME +npm run gendata -- -i $FILE_INDEX_NAME -d file -n $DATA_COUNT -c $CONFIG_INDEX_NAME echo "Successfully generated ${DATA_COUNT} data records" From 562d00558b4667fa18d6ce759b82188575e6c616 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Sat, 25 Apr 2020 17:29:31 -0500 Subject: [PATCH 36/41] feat/handle array as nested field --- src/server/config.js | 2 +- src/server/schema.js | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/server/config.js b/src/server/config.js index e7f304ba..19f7db3d 100644 --- a/src/server/config.js +++ b/src/server/config.js @@ -21,7 +21,7 @@ const config = { type: 'file', }, ], - configIndex: inputConfig.config_index, + configIndex: inputConfig.config_index || 'gen3-dev-config', authFilterField: inputConfig.auth_filter_field || 'auth_resource_path', aggregationIncludeMissingData: typeof inputConfig.aggs_include_missing_data === 'undefined' ? true : inputConfig.aggs_include_missing_data, missingDataAlias: inputConfig.missing_data_alias || 'no data', diff --git a/src/server/schema.js b/src/server/schema.js index 81eef29f..e144a8e4 100644 --- a/src/server/schema.js +++ b/src/server/schema.js @@ -23,10 +23,13 @@ const getGQLType = (esInstance, esIndex, field, esFieldType) => { throw new Error(`Invalid type ${esFieldType} for field ${field} in index ${esIndex}`); } const isArrayField = esInstance.isArrayField(esIndex, field); - if (isArrayField) { + if (isArrayField && esFieldType !== 'nested') { return `[${gqlType}]`; } if (esFieldType === 'nested') { + if (isArrayField) { + return `[${field}]`; + } return `${field}`; } return gqlType; From 455f4041b92819cbb7500286ee73ba683001d0b3 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Sat, 25 Apr 2020 17:33:39 -0500 Subject: [PATCH 37/41] comment --- genData/types.js | 1 + 1 file changed, 1 insertion(+) diff --git a/genData/types.js b/genData/types.js index 16b1a26c..0557934d 100644 --- a/genData/types.js +++ b/genData/types.js @@ -5,6 +5,7 @@ function fakerType(key, value, arrayFields) { switch (value.type) { case 'boolean': + // if a field is an array, say it explicit in the name, since ES does not know if (key.includes('array')) { fieldType = { type: 'array', items: { type: 'boolean', properties, required }, minItems: 0, maxItems: 10, From 12f2acd40803fb1706c762e2adc53620ecb0ad82 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Sun, 26 Apr 2020 15:17:29 -0500 Subject: [PATCH 38/41] chore/config index default --- genData/genData.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genData/genData.js b/genData/genData.js index 6e5319e9..4a60e257 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -23,7 +23,7 @@ program.parse(process.argv); const esHost = `${program.hostname}:${program.port}`; const esIndex = program.index; -const configIndex = program.config_index; +const configIndex = program.config_index || 'gen3-dev-config'; const client = new Client({ node: esHost }); From b0c9b74f133c90e24c4e321d6c91ec0bcd4ed449 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Sun, 26 Apr 2020 15:17:35 -0500 Subject: [PATCH 39/41] chore/doc update --- devHelper/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/devHelper/README.md b/devHelper/README.md index 1f13e587..60e06ece 100644 --- a/devHelper/README.md +++ b/devHelper/README.md @@ -29,11 +29,20 @@ Here is a complete list of arguments that `npm run gendata` would take | -p, --port `` | elasticsearch port | 9200 | | -i, --index `` | elasticsearch index | undefined | | -d, --doc_type `` | document type | undefined | +| -c, --config_index `` | array config index | gen3-dev-config | | -n, --number `` | number of documents to generate | 500 | | -r, --random | generate random number of document up to `number` | false | Also, there are some predefined values in `/genData/valueBank.json`. +:information_source: **Special handling for generating mock data for array type fields** + +In Elasticsearch, arrays do not require a dedicated field datatype. In other words, when defining an ES fields in the mapping object, array fields have no difference than other regular fields in terms of syntax. But GQL does differ array from other data types. + +So in order to add an array field to mock data, we require that field to explicitly contains the word `array` in its field name. And it is also required to put some predefined values for that array field in `/genData/valueBank.json`. + +Doing so will ensure the array config index be updated with names of all the array fields in an ES index. If you have array fields in any of your ES index, then it is necessary to have a correct array config index in order to successfully generate corresponding GQL schemas and resolvers. To specify the name of the array config index, pass a `-c` or `--config_index` argument to the `npm run gendata` command. The default name of test array config index is `gen3-dev-config`. + ## Step.3 start server for developing server side code In the root directory of this repo, run: From 5422c18e428986c0bb372981537a14f11d4c2289 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 27 Apr 2020 11:47:31 -0500 Subject: [PATCH 40/41] chore/rename var --- src/server/es/aggs.js | 4 ++-- src/server/es/index.js | 4 ++-- src/server/resolvers.js | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index 3cb7dc03..9f91b899 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -536,7 +536,7 @@ export const textAggregation = async ( defaultAuthFilter, nestedAggFields, nestedPath, - numericField, + isNumericField, }, ) => { const queryBody = { size: 0 }; @@ -554,7 +554,7 @@ export const textAggregation = async ( let missingAlias = {}; // don't add missing alias to numeric field by default // since the value of missing alias is a string - if (config.esConfig.aggregationIncludeMissingData && !numericField) { + if (config.esConfig.aggregationIncludeMissingData && !isNumericField) { missingAlias = { missing: config.esConfig.missingDataAlias }; } const aggsName = `${field}Aggs`; diff --git a/src/server/es/index.js b/src/server/es/index.js index 9370eb28..f651b9b7 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -448,7 +448,7 @@ class ES { defaultAuthFilter, nestedAggFields, nestedPath, - numericField, + isNumericField, }) { return esAggregator.textAggregation( { @@ -463,7 +463,7 @@ class ES { defaultAuthFilter, nestedAggFields, nestedPath, - numericField, + isNumericField, }, ); } diff --git a/src/server/resolvers.js b/src/server/resolvers.js index cdd3aa4f..03f81bb5 100644 --- a/src/server/resolvers.js +++ b/src/server/resolvers.js @@ -104,7 +104,7 @@ const textHistogramResolver = async (parent, args, context) => { log.debug('[resolver.textHistogramResolver] args', args); const { esInstance, esIndex, esType, - filter, field, nestedAggFields, filterSelf, accessibility, nestedPath, numericField, + filter, field, nestedAggFields, filterSelf, accessibility, nestedPath, isNumericField, } = parent; log.debug('[resolver.textHistogramResolver] parent', parent); const { authHelper } = context; @@ -118,21 +118,21 @@ const textHistogramResolver = async (parent, args, context) => { defaultAuthFilter, nestedAggFields, nestedPath, - numericField, + isNumericField, }); }; const getFieldAggregationResolverMappingsByField = (field) => { - let numericField = false; + let isNumericField = false; if (esFieldNumericTextTypeMapping[field.type] === NumericTextTypeTypeEnum.ES_NUMERIC_TYPE) { - numericField = true; + isNumericField = true; } if (field.type !== 'nested') { - return ((parent) => ({ ...parent, field: field.name, numericField })); + return ((parent) => ({ ...parent, field: field.name, isNumericField })); } // if field is nested type, update nestedPath info with parent's nestedPath and pass down return ((parent) => ({ - ...parent, field: field.name, nestedPath: (parent.nestedPath) ? `${parent.nestedPath}.${field.name}` : `${field.name}`, numericField, + ...parent, field: field.name, nestedPath: (parent.nestedPath) ? `${parent.nestedPath}.${field.name}` : `${field.name}`, isNumericField, })); }; From 48364a15bac8a548cedf9ac2db69bcdc6cf16965 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 27 Apr 2020 12:26:37 -0500 Subject: [PATCH 41/41] fix/typo --- doc/queries.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/queries.md b/doc/queries.md index 12c9bb7e..82a2e337 100644 --- a/doc/queries.md +++ b/doc/queries.md @@ -399,7 +399,7 @@ Result: ### 4. Nested Aggregation :bangbang: **This section is for performing aggregations on documents which contain nested fields. For information about Guppy supporting nested sub-aggregations such as terms aggregation and missing aggregation, please refer to [Sub-aggregations](#aggs-sub)** ->The difference between Nested Aggregations and Sub-aggregations is that Nested Aggregations are performed on multi-level nested fields, while the sub-aggregations are preformed on different fields within a same level. +>The difference between Nested Aggregations and Sub-aggregations is that Nested Aggregations are performed on multi-level nested fields, while the sub-aggregations are performed on different fields within a same level. Guppy supports performing aggregations (both text and numeric aggregations) on nested fields. For information about using nested fields inside filters, see [Nested Filter](#filter-nested) > Suppose the ES index has a mapping as the following: