From d81a675962adbff1791cb7a174fbac1da014b381 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Fri, 21 Apr 2023 14:22:31 -0500 Subject: [PATCH 01/13] feat(es7): make guppy comapatible with es7 --- src/server/es/index.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/server/es/index.js b/src/server/es/index.js index 7059dcaa..460d3095 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -55,7 +55,6 @@ class ES { log.info('[ES.query] index, type, query body: ', esIndex, esType, JSON.stringify(validatedQueryBody)); return this.client.search({ index: esIndex, - type: esType, body: validatedQueryBody, }).then((resp) => resp.body, (err) => { log.error(`[ES.query] error during querying: ${err.message}`); @@ -111,7 +110,6 @@ class ES { if (typeof scrollID === 'undefined') { // first batch const res = await this.client.search({ // eslint-disable-line no-await-in-loop index: esIndex, - type: esType, body: validatedQueryBody, scroll: '1m', size: SCROLL_PAGE_SIZE, @@ -159,7 +157,6 @@ class ES { const errMsg = `[ES.initialize] error getting mapping from ES index "${esIndex}"`; return this.client.indices.getMapping({ index: esIndex, - type: esType, }).then((resp) => { try { const esIndexAlias = Object.keys(resp.body)[0]; From b672558a828d85511b4b3802264cad1e521e9530 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Thu, 22 Jun 2023 07:17:34 -0500 Subject: [PATCH 02/13] fix(type): remove type param from mapping --- src/server/es/index.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/server/es/index.js b/src/server/es/index.js index 460d3095..b308c255 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -151,16 +151,16 @@ class ES { * Return a Promise of an Object: { : } * If error, print error stack * @param {string} esIndex - * @param {string} esType */ - async _getESFieldsTypes(esIndex, esType) { + async _getESFieldsTypes(esIndex) { const errMsg = `[ES.initialize] error getting mapping from ES index "${esIndex}"`; return this.client.indices.getMapping({ index: esIndex, }).then((resp) => { try { const esIndexAlias = Object.keys(resp.body)[0]; - return resp.body[esIndexAlias].mappings[esType].properties; + log.info('Mapping response from ES: ', resp.body[esIndexAlias]); + return resp.body[esIndexAlias].mappings.properties; } catch (err) { throw new Error(`${errMsg}: ${err}`); } @@ -177,7 +177,7 @@ class ES { const fieldTypes = {}; log.info('[ES.initialize] getting mapping from elasticsearch...'); const promiseList = this.config.indices - .map((cfg) => this._getESFieldsTypes(cfg.index, cfg.type) + .map((cfg) => this._getESFieldsTypes(cfg.index) .then((res) => ({ index: cfg.index, fieldTypes: res }))); const resultList = await Promise.all(promiseList); log.info('[ES.initialize] got mapping from elasticsearch'); From 14bc2b004b8c698ddb8fb2a1abcd08dc07d9bc09 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Thu, 22 Jun 2023 08:38:46 -0500 Subject: [PATCH 03/13] fix(test): fix mock data --- src/server/__mocks__/mockDataFromES.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/__mocks__/mockDataFromES.js b/src/server/__mocks__/mockDataFromES.js index f2daac30..d24edbff 100644 --- a/src/server/__mocks__/mockDataFromES.js +++ b/src/server/__mocks__/mockDataFromES.js @@ -343,7 +343,7 @@ const mockESMapping = () => { }; nock(config.esConfig.host) .persist() - .get(/_mapping\/subject/) + .get(/subject\/_mapping/) .reply(200, fakeSubjectMapping); const fakeFileMapping = { 'gen3-dev-file': { @@ -369,7 +369,7 @@ const mockESMapping = () => { }; nock(config.esConfig.host) .persist() - .get(/_mapping\/file/) + .get(/file\/_mapping/) .reply(200, fakeFileMapping); }; From 60416b4ac5d520a842124e17645c16f0b147d4f3 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Thu, 22 Jun 2023 09:24:46 -0500 Subject: [PATCH 04/13] fix(test): fix array-config test --- src/server/__mocks__/mockDataFromES.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server/__mocks__/mockDataFromES.js b/src/server/__mocks__/mockDataFromES.js index d24edbff..c4e01f25 100644 --- a/src/server/__mocks__/mockDataFromES.js +++ b/src/server/__mocks__/mockDataFromES.js @@ -382,7 +382,6 @@ const mockArrayConfig = () => { hits: [ { _index: 'gen3-dev-config', - _type: '_doc', _id: 'gen3-dev-subject', _score: 1.0, _source: { From 87f408fe83303d464174e17f4aaa4dc59b794097 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Thu, 22 Jun 2023 09:30:22 -0500 Subject: [PATCH 05/13] fix(log): add debug info --- src/server/es/index.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server/es/index.js b/src/server/es/index.js index b308c255..33629631 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -216,6 +216,7 @@ class ES { try { resp.body.hits.hits.forEach((doc) => { const index = doc._id; + log.debug(this.fieldTypes) if (!this.fieldTypes[index]) { const errMsg = `[ES.initialize] wrong array entry from config index: index "${index}" not found, skipped.`; log.error(errMsg); From f5367db1753efd2312a17e2482de93d2fa2fbee0 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Mon, 26 Jun 2023 13:31:42 -0500 Subject: [PATCH 06/13] fix(missing): change to use missing_bucket --- src/server/es/aggs.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index d798b826..b09d86ea 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -581,7 +581,7 @@ export const textAggregation = async ( // don't add missing alias to numeric field by default // since the value of missing alias is a string if (config.esConfig.aggregationIncludeMissingData && !isNumericField) { - missingAlias = { missing: config.esConfig.missingDataAlias }; + missingAlias = { missing_bucket: config.esConfig.missingDataAlias }; } const aggsName = `${field}Aggs`; const aggsObj = {}; From 8c0b90452452a89d64a1aba37e726667eb3e224c Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Tue, 27 Jun 2023 10:10:54 -0500 Subject: [PATCH 07/13] fix(debug): add more debug info --- src/server/es/aggs.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index b09d86ea..568bb636 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -581,7 +581,7 @@ export const textAggregation = async ( // don't add missing alias to numeric field by default // since the value of missing alias is a string if (config.esConfig.aggregationIncludeMissingData && !isNumericField) { - missingAlias = { missing_bucket: config.esConfig.missingDataAlias }; + missingAlias = { missing: config.esConfig.missingDataAlias }; } const aggsName = `${field}Aggs`; const aggsObj = {}; @@ -649,6 +649,8 @@ export const textAggregation = async ( }, }; } + console.log("Query body:") + console.log(queryBody); let resultSize; let finalResults = []; /* eslint-disable */ From 017fd15b47f526c8c953ccf2e4c1a8fa18217bb6 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Wed, 28 Jun 2023 09:50:29 -0500 Subject: [PATCH 08/13] fix(no_data): using missing_bucket and replace null with no_data --- src/server/__mocks__/mockDataFromES.js | 9 +++-- .../__mocks__/mockESData/mockNestedAggs.js | 5 +-- .../__mocks__/mockESData/mockTextAggs.js | 15 ++++---- src/server/es/aggs.js | 34 +++++++++++++------ 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/src/server/__mocks__/mockDataFromES.js b/src/server/__mocks__/mockDataFromES.js index c4e01f25..c844c40d 100644 --- a/src/server/__mocks__/mockDataFromES.js +++ b/src/server/__mocks__/mockDataFromES.js @@ -24,7 +24,8 @@ const mockResourcePath = () => { gen3_resource_path: { terms: { field: 'gen3_resource_path', - missing: 'no data', + missing_bucket: true, + order: "desc" }, }, }, @@ -98,7 +99,8 @@ const mockResourcePath = () => { gen3_resource_path: { terms: { field: 'gen3_resource_path', - missing: 'no data', + missing_bucket: true, + order: "desc" }, }, }, @@ -157,7 +159,8 @@ const mockResourcePath = () => { gen3_resource_path: { terms: { field: 'gen3_resource_path', - missing: 'no data', + missing_bucket: true, + order: "desc" }, }, }, diff --git a/src/server/__mocks__/mockESData/mockNestedAggs.js b/src/server/__mocks__/mockESData/mockNestedAggs.js index 23b1aa17..d5af2569 100644 --- a/src/server/__mocks__/mockESData/mockNestedAggs.js +++ b/src/server/__mocks__/mockESData/mockNestedAggs.js @@ -17,7 +17,8 @@ const mockNestedAggs = () => { 'visits.visit_label': { terms: { field: 'visits.visit_label', - missing: 'no data', + missing_bucket: true, + order: "desc" }, }, }, @@ -58,7 +59,7 @@ const mockNestedAggs = () => { }, { key: { - 'visits.visit_label': 'no data', + 'visits.visit_label': null, }, doc_count: 40, }, diff --git a/src/server/__mocks__/mockESData/mockTextAggs.js b/src/server/__mocks__/mockESData/mockTextAggs.js index 1fb30d20..1d892aaa 100644 --- a/src/server/__mocks__/mockESData/mockTextAggs.js +++ b/src/server/__mocks__/mockESData/mockTextAggs.js @@ -12,7 +12,8 @@ const mockTextAggs = () => { gender: { terms: { field: 'gender', - missing: 'no data', + missing_bucket: true, + order: "desc" }, }, }, @@ -31,7 +32,7 @@ const mockTextAggs = () => { buckets: [ { key: { - gender: 'no data', + gender: null, }, doc_count: 40, }, @@ -78,7 +79,8 @@ const mockTextAggs = () => { gender: { terms: { field: 'gender', - missing: 'no data', + missing_bucket: true, + order: "desc" }, }, }, @@ -97,7 +99,7 @@ const mockTextAggs = () => { buckets: [ { key: { - gender: 'no data', + gender: null, }, doc_count: 40, }, @@ -138,7 +140,8 @@ const mockTextAggs = () => { gender: { terms: { field: 'gender', - missing: 'no data', + missing_bucket: true, + order: "desc" }, }, }, @@ -157,7 +160,7 @@ const mockTextAggs = () => { buckets: [ { key: { - gender: 'no data', + gender: null, }, doc_count: 20, }, diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index 568bb636..b4c48f2c 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -581,7 +581,7 @@ export const textAggregation = async ( // don't add missing alias to numeric field by default // since the value of missing alias is a string if (config.esConfig.aggregationIncludeMissingData && !isNumericField) { - missingAlias = { missing: config.esConfig.missingDataAlias }; + missingAlias = { missing_bucket: true, order: "desc" }; } const aggsName = `${field}Aggs`; const aggsObj = {}; @@ -649,8 +649,6 @@ export const textAggregation = async ( }, }; } - console.log("Query body:") - console.log(queryBody); let resultSize; let finalResults = []; /* eslint-disable */ @@ -660,6 +658,8 @@ export const textAggregation = async ( resultSize = 0; const resultBuckets = (aggsNestedName) ? result.aggregations[aggsNestedName][aggsName].buckets : result.aggregations[aggsName].buckets; + console.log("results in bucket:"); + console.log(resultBuckets); resultBuckets.forEach((item) => { const resultObj = processResultsForNestedAgg (nestedAggFields, item, {}) @@ -677,17 +677,31 @@ export const textAggregation = async ( /* eslint-enable */ // order aggregations by doc count - finalResults = finalResults.sort((e1, e2) => e2.count - e1.count); + console.log("Final results before replace:"); + console.log(finalResults); + finalResults = finalResults.sort((e1, e2) => { + if (e1.key === null) + return 1; + if (e2.key === null) + return -1; + return e2.count - e1.count; + }); // make the missing data bucket to the bottom of the list + console.log("Final result length:"); + console.log(finalResults.length); + let lastIndex = finalResults.length - 1; if (config.esConfig.aggregationIncludeMissingData) { - const missingDataIndex = finalResults - .findIndex((b) => b.key === config.esConfig.missingDataAlias); - const missingDataItem = finalResults.find((b) => b.key === config.esConfig.missingDataAlias); - if (missingDataItem) { - finalResults.splice(missingDataIndex, 1); - finalResults.splice(finalResults.length, 0, missingDataItem); + const missingDataItem = finalResults[lastIndex]; + console.log("Right before replacing key"); + console.log(lastIndex); + console.log(missingDataItem) + if (missingDataItem.key === null) { + missingDataItem.key = config.esConfig.missingDataAlias; + finalResults[lastIndex] = missingDataItem; } } + console.log("Final results:"); + console.log(finalResults); return finalResults; }; From 78beb867a12c8e9ab8c01a6aeb46e8f645f3fd21 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Thu, 29 Jun 2023 03:06:23 -0500 Subject: [PATCH 09/13] fix(totalCount): fix totalCount agg --- src/server/__mocks__/mockDataFromES.js | 132 ++++++++++++------------- src/server/es/aggs.js | 22 +---- src/server/es/filter.js | 10 +- src/server/es/index.js | 5 +- 4 files changed, 75 insertions(+), 94 deletions(-) diff --git a/src/server/__mocks__/mockDataFromES.js b/src/server/__mocks__/mockDataFromES.js index c844c40d..335bf9bb 100644 --- a/src/server/__mocks__/mockDataFromES.js +++ b/src/server/__mocks__/mockDataFromES.js @@ -25,7 +25,7 @@ const mockResourcePath = () => { terms: { field: 'gen3_resource_path', missing_bucket: true, - order: "desc" + order: 'desc', }, }, }, @@ -100,7 +100,7 @@ const mockResourcePath = () => { terms: { field: 'gen3_resource_path', missing_bucket: true, - order: "desc" + order: 'desc', }, }, }, @@ -160,7 +160,7 @@ const mockResourcePath = () => { terms: { field: 'gen3_resource_path', missing_bucket: true, - order: "desc" + order: 'desc', }, }, }, @@ -280,65 +280,63 @@ const mockESMapping = () => { const fakeSubjectMapping = { 'gen3-dev-subject': { mappings: { - subject: { - properties: { - gen3_resource_path: { - type: 'keyword', - }, - visits: { - type: 'nested', - properties: { - days_to_visit: { type: 'integer' }, - visit_label: { - type: 'keyword', - fields: { - analyzed: { - type: 'text', - analyzer: 'ngram_analyzer', - search_analyzer: 'search_analyzer', - term_vector: 'with_positions_offsets', - }, + properties: { + gen3_resource_path: { + type: 'keyword', + }, + visits: { + type: 'nested', + properties: { + days_to_visit: { type: 'integer' }, + visit_label: { + type: 'keyword', + fields: { + analyzed: { + type: 'text', + analyzer: 'ngram_analyzer', + search_analyzer: 'search_analyzer', + term_vector: 'with_positions_offsets', }, }, - follow_ups: { - type: 'nested', - properties: { - days_to_follow_up: { - type: 'integer', - }, - follow_up_label: { - type: 'keyword', - fields: { - analyzed: { - type: 'text', - analyzer: 'ngram_analyzer', - search_analyzer: 'search_analyzer', - term_vector: 'with_positions_offsets', - }, + }, + follow_ups: { + type: 'nested', + properties: { + days_to_follow_up: { + type: 'integer', + }, + follow_up_label: { + type: 'keyword', + fields: { + analyzed: { + type: 'text', + analyzer: 'ngram_analyzer', + search_analyzer: 'search_analyzer', + term_vector: 'with_positions_offsets', }, }, }, }, }, }, - gender: { - type: 'keyword', - }, - file_count: { - type: 'integer', - }, - name: { - type: 'text', - }, - some_array_integer_field: { - type: 'integer', - }, - some_array_string_field: { - type: 'keyword', - }, - whatever_lab_result_value: { - type: 'float', - }, + }, + gender: { + type: 'keyword', + }, + file_count: { + type: 'integer', + }, + name: { + type: 'text', + }, + some_array_integer_field: { + type: 'integer', + }, + some_array_string_field: { + type: 'keyword', + }, + whatever_lab_result_value: { + type: 'float', }, }, }, @@ -351,20 +349,18 @@ const mockESMapping = () => { const fakeFileMapping = { 'gen3-dev-file': { mappings: { - file: { - properties: { - gen3_resource_path: { - type: 'keyword', - }, - file_id: { - type: 'keyword', - }, - file_size: { - type: 'long', - }, - subject_id: { - type: 'keyword', - }, + properties: { + gen3_resource_path: { + type: 'keyword', + }, + file_id: { + type: 'keyword', + }, + file_size: { + type: 'long', + }, + subject_id: { + type: 'keyword', }, }, }, diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index b4c48f2c..847b73f5 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -581,7 +581,7 @@ export const textAggregation = async ( // don't add missing alias to numeric field by default // since the value of missing alias is a string if (config.esConfig.aggregationIncludeMissingData && !isNumericField) { - missingAlias = { missing_bucket: true, order: "desc" }; + missingAlias = { missing_bucket: true, order: 'desc' }; } const aggsName = `${field}Aggs`; const aggsObj = {}; @@ -658,9 +658,6 @@ export const textAggregation = async ( resultSize = 0; const resultBuckets = (aggsNestedName) ? result.aggregations[aggsNestedName][aggsName].buckets : result.aggregations[aggsName].buckets; - console.log("results in bucket:"); - console.log(resultBuckets); - resultBuckets.forEach((item) => { const resultObj = processResultsForNestedAgg (nestedAggFields, item, {}) finalResults.push({ @@ -677,31 +674,20 @@ export const textAggregation = async ( /* eslint-enable */ // order aggregations by doc count - console.log("Final results before replace:"); - console.log(finalResults); finalResults = finalResults.sort((e1, e2) => { - if (e1.key === null) - return 1; - if (e2.key === null) - return -1; + if (e1.key === null) return 1; + if (e2.key === null) return -1; return e2.count - e1.count; }); // make the missing data bucket to the bottom of the list - console.log("Final result length:"); - console.log(finalResults.length); - let lastIndex = finalResults.length - 1; + const lastIndex = finalResults.length - 1; if (config.esConfig.aggregationIncludeMissingData) { const missingDataItem = finalResults[lastIndex]; - console.log("Right before replacing key"); - console.log(lastIndex); - console.log(missingDataItem) if (missingDataItem.key === null) { missingDataItem.key = config.esConfig.missingDataAlias; finalResults[lastIndex] = missingDataItem; } } - console.log("Final results:"); - console.log(finalResults); return finalResults; }; diff --git a/src/server/es/filter.js b/src/server/es/filter.js index e92b5ac8..8c4fc6b6 100644 --- a/src/server/es/filter.js +++ b/src/server/es/filter.js @@ -241,10 +241,10 @@ const getESSearchFilterFragment = (esInstance, esIndex, fields, keyword) => { * It first parse graphql filter object recursively from top to down, * until reach the bottom level, it translate gql filter unit to ES filter unit. * And finally combines all filter units from down to top. - * @param {string} esInstance + * @param {ES} esInstance * @param {string} esIndex * @param {object} graphqlFilterObj - * @param {string[]} aggsField - target agg field, only need for agg queries + * @param {string} aggsField - target agg field, only need for agg queries * @param {boolean} filterSelf - whether we want to filter this field or not, * only need for agg queries * @param {object} defaultAuthFilter - once graphqlFilterObj is empty, @@ -255,9 +255,9 @@ const getFilterObj = ( esInstance, esIndex, graphqlFilterObj, - aggsField, - filterSelf = true, // eslint-disable-line default-param-last - defaultAuthFilter, + aggsField = null, + filterSelf = true, + defaultAuthFilter = null, objPath = null, ) => { if (!graphqlFilterObj diff --git a/src/server/es/index.js b/src/server/es/index.js index 33629631..6ec5c702 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -216,7 +216,6 @@ class ES { try { resp.body.hits.hits.forEach((doc) => { const index = doc._id; - log.debug(this.fieldTypes) if (!this.fieldTypes[index]) { const errMsg = `[ES.initialize] wrong array entry from config index: index "${index}" not found, skipped.`; log.error(errMsg); @@ -426,7 +425,7 @@ class ES { { esInstance: this, esIndex, esType }, { filter, fields: false, size: 0 }, ); - return result.hits.total; + return result.hits.total.value; } async getFieldCount(esIndex, esType, filter, field) { @@ -441,7 +440,7 @@ class ES { }, }; if (typeof filter !== 'undefined') { - queryBody.query = getFilterObj(this, esIndex, filter); + queryBody.query = getFilterObj(this, esIndex, filter, field); } const result = await this.query(esIndex, esType, queryBody); From d53ae0f5ef90e90986ca5227ba7389a15994c69d Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Fri, 30 Jun 2023 09:37:36 -0500 Subject: [PATCH 10/13] fix(docker): correct version in docker files --- devHelper/docker/esearch.yml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/devHelper/docker/esearch.yml b/devHelper/docker/esearch.yml index 4cd7d9ae..d998f243 100644 --- a/devHelper/docker/esearch.yml +++ b/devHelper/docker/esearch.yml @@ -3,7 +3,7 @@ version: "3.3" services: # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#docker-cli-run-prod-mode elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.8.12 + image: docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.0 ports: - "9200:9200" - "9300:9300" @@ -12,9 +12,12 @@ services: volumes: - "esdata:/usr/share/elasticsearch/data" environment: - "cluster.name": "localdev" - "discovery.type": "single-node" - ES_JAVA_OPTS: "-Xms1g -Xmx1g" + - cluster.name=localdev + - bootstrap.memory_lock=false + - ES_JAVA_OPTS=-Xms1g -Xmx1g + - discovery.type=single-node + - network.host=0.0.0.0 + - http.port=9200 deploy: replicas: 1 restart_policy: @@ -24,7 +27,7 @@ services: window: 120s kibana: - image: docker.elastic.co/kibana/kibana-oss:6.5.4 + image: docker.elastic.co/kibana/kibana-oss:7.10.0 ports: - "5601:5601" logging: @@ -38,6 +41,9 @@ services: - elasticsearch environment: LOGGING_QUIET: "true" + SERVER_NAME: elasticsearch + ELASTICSEARCH_URL: http://elasticsearch:9200 + network.host: 0.0.0.0 volumes: esdata: From 4f3afcd236d3dc544291086ddb2c8f81e3412570 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Fri, 30 Jun 2023 10:06:41 -0500 Subject: [PATCH 11/13] fix(command): correct payload to ES --- devHelper/scripts/commands.sh | 94 ++++++++++++++++------------------- 1 file changed, 44 insertions(+), 50 deletions(-) diff --git a/devHelper/scripts/commands.sh b/devHelper/scripts/commands.sh index d73932ef..2182c519 100755 --- a/devHelper/scripts/commands.sh +++ b/devHelper/scripts/commands.sh @@ -42,7 +42,7 @@ curl -iv -X PUT "${ESHOST}/${indexName}" \ "ngram_tokenizer": { "type": "ngram", "min_gram": 2, - "max_gram": 20, + "max_gram": 3, "token_chars": [ "letter", "digit" ] } }, @@ -64,46 +64,44 @@ curl -iv -X PUT "${ESHOST}/${indexName}" \ } }, "mappings": { - "subject": { - "properties": { - "subject_id": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "name": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "project": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "study": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "visits": { - "type": "nested", - "properties": { - "days_to_visit": { "type": "integer" }, - "visit_label": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "follow_ups": { - "type": "nested", - "properties": { - "days_to_follow_up": { "type": "integer" }, - "follow_up_label": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } } - } + "properties": { + "subject_id": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "name": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "project": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "study": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "visits": { + "type": "nested", + "properties": { + "days_to_visit": { "type": "integer" }, + "visit_label": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "follow_ups": { + "type": "nested", + "properties": { + "days_to_follow_up": { "type": "integer" }, + "follow_up_label": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } } } } - }, - "gender": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "race": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "ethnicity": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "vital_status": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "file_type": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "file_format": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "auth_resource_path": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, - "file_count": { "type": "integer" }, - "whatever_lab_result_value": { "type": "float" }, - "some_nested_array_field": { - "type": "nested", - "properties": { - "some_integer_inside_nested": { "type": "integer" }, - "some_string_inside_nested": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } } - } - }, - "some_integer_field": { "type": "integer" }, - "some_long_field": { "type": "long" }, - "sensitive": { "type": "keyword" } - } + } + }, + "gender": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "race": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "ethnicity": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "vital_status": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "file_type": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "file_format": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "auth_resource_path": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, + "file_count": { "type": "integer" }, + "whatever_lab_result_value": { "type": "float" }, + "some_nested_array_field": { + "type": "nested", + "properties": { + "some_integer_inside_nested": { "type": "integer" }, + "some_string_inside_nested": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } } + } + }, + "some_integer_field": { "type": "integer" }, + "some_long_field": { "type": "long" }, + "sensitive": { "type": "keyword" } } } } @@ -119,13 +117,11 @@ curl -iv -X PUT "${ESHOST}/${fileIndexName}" \ } }, "mappings": { - "file": { - "properties": { - "file_id": { "type": "keyword" }, - "auth_resource_path": { "type": "keyword" }, - "subject_id": { "type": "keyword" }, - "sensitive": { "type": "keyword" } - } + "properties": { + "file_id": { "type": "keyword" }, + "auth_resource_path": { "type": "keyword" }, + "subject_id": { "type": "keyword" }, + "sensitive": { "type": "keyword" } } } } @@ -141,10 +137,8 @@ curl -iv -X PUT "${ESHOST}/${configIndexName}" \ } }, "mappings": { - "_doc": { - "properties": { - "array": { "type": "keyword" } - } + "properties": { + "array": { "type": "keyword" } } } } From 790b1a87cade7d17b5d6962d6a1e1217bb6f3c48 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Tue, 11 Jul 2023 09:46:32 -0500 Subject: [PATCH 12/13] fix(test): correct test case --- guppy_config.json | 1 + src/server/es/aggs.js | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 guppy_config.json diff --git a/guppy_config.json b/guppy_config.json new file mode 100644 index 00000000..ceb12a5e --- /dev/null +++ b/guppy_config.json @@ -0,0 +1 @@ +{ "indices": [ { "index": "tb", "type": "subject" }, { "index": "tb_file", "type": "file" } ], "config_index": "tb-array-config", "auth_filter_field": "auth_resource_path" } \ No newline at end of file diff --git a/src/server/es/aggs.js b/src/server/es/aggs.js index 847b73f5..b02d27df 100644 --- a/src/server/es/aggs.js +++ b/src/server/es/aggs.js @@ -684,7 +684,7 @@ export const textAggregation = async ( const lastIndex = finalResults.length - 1; if (config.esConfig.aggregationIncludeMissingData) { const missingDataItem = finalResults[lastIndex]; - if (missingDataItem.key === null) { + if (missingDataItem !== undefined && missingDataItem.key === null) { missingDataItem.key = config.esConfig.missingDataAlias; finalResults[lastIndex] = missingDataItem; } From 549991604b26bef1f6daabf69c86c4f304d3c176 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Wed, 8 Nov 2023 10:29:43 -0600 Subject: [PATCH 13/13] remove es6.Dockerfile --- devHelper/docker/es6.Dockerfile | 51 --------------------------------- devHelper/docker/esearch.yml | 1 + 2 files changed, 1 insertion(+), 51 deletions(-) delete mode 100644 devHelper/docker/es6.Dockerfile diff --git a/devHelper/docker/es6.Dockerfile b/devHelper/docker/es6.Dockerfile deleted file mode 100644 index 95fda820..00000000 --- a/devHelper/docker/es6.Dockerfile +++ /dev/null @@ -1,51 +0,0 @@ -# From https://gist.github.com/rluvaton/3a8d5953e1ad8236e8953c2e7691e5de - -FROM ubuntu:bionic-20220531 - -# Must be root to install the packages -USER root - -# Install required deps -RUN apt update -RUN apt -y install gnupg wget apt-transport-https coreutils java-common - -# Import Elasticsearch GPG Key -RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | apt-key add - - -# Add Elasticsearch 6.x APT repository -# setting CPU architecture to be amd64 explicity as in case this is being built from ARM (which it should) it would find the elasticsearch package (elasticsearch 6.x doesn't have ARM binary) -RUN echo "deb [arch=amd64] https://artifacts.elastic.co/packages/6.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-6.x.list - -# update after elastic-search repo added -RUN apt-get update - -# Install ARM Amazon JDK -RUN wget https://corretto.aws/downloads/latest/amazon-corretto-8-aarch64-linux-jdk.deb -O amazon-jdk.deb -RUN dpkg --skip-same-version -i amazon-jdk.deb -RUN rm amazon-jdk.deb - -# Install Elasticsearch 6.x -RUN apt-get -y install elasticsearch - -# the user was created when installed the elasticsearch -# Must not be root: -# org.elasticsearch.bootstrap.StartupException: java.lang.RuntimeException: can not run elasticsearch as root -USER elasticsearch - -WORKDIR /usr/share/elasticsearch - -# Append the custom conf - -RUN echo "# ---------------------------------- CUSTOM -----------------------------------" >> /etc/elasticsearch/elasticsearch.yml -RUN echo "" >> /etc/elasticsearch/elasticsearch.yml -RUN echo "# Added because of the following error (TL;DR: X-Pack features are not supported in ARM):" >> /etc/elasticsearch/elasticsearch.yml -RUN echo "# > org.elasticsearch.bootstrap.StartupException:" >> /etc/elasticsearch/elasticsearch.yml -RUN echo "# > ElasticsearchException[X-Pack is not supported and Machine Learning is not available for [linux-aarch64];" >> /etc/elasticsearch/elasticsearch.yml -RUN echo "# > you can use the other X-Pack features (unsupported) by setting xpack.ml.enabled: false in elasticsearch.yml]" >> /etc/elasticsearch/elasticsearch.yml -RUN echo "xpack.ml.enabled: false" >> /etc/elasticsearch/elasticsearch.yml -RUN echo "" >> /etc/elasticsearch/elasticsearch.yml -RUN echo "# Added because we want to listen to requests coming from computers in the network" >> /etc/elasticsearch/elasticsearch.yml -RUN echo "network.host: 0.0.0.0" >> /etc/elasticsearch/elasticsearch.yml - - -ENTRYPOINT [ "./bin/elasticsearch" ] diff --git a/devHelper/docker/esearch.yml b/devHelper/docker/esearch.yml index d998f243..4bf6ba75 100644 --- a/devHelper/docker/esearch.yml +++ b/devHelper/docker/esearch.yml @@ -3,6 +3,7 @@ version: "3.3" services: # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#docker-cli-run-prod-mode elasticsearch: + # use elasticsearch/elasticsearch:7.10.0-arm64 for ARM architecture image: docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.0 ports: - "9200:9200"