diff --git a/src/server/__mocks__/config.js b/src/server/__mocks__/config.js index 4b66f9da4..12c1be473 100644 --- a/src/server/__mocks__/config.js +++ b/src/server/__mocks__/config.js @@ -24,6 +24,8 @@ const config = { arboristEndpoint: 'http://mock-arborist', analyzedTextFieldSuffix: '.analyzed', matchedTextHighlightTagName: 'em', + ignoredFields: ['@version'], + doubleUnderscorePrefix: 'x__', }; export default config; diff --git a/src/server/__tests__/config.test.js b/src/server/__tests__/config.test.js index 4da4c9340..d40d633cd 100644 --- a/src/server/__tests__/config.test.js +++ b/src/server/__tests__/config.test.js @@ -110,4 +110,14 @@ describe('config', () => { const config = require('../config').default; expect(config.allowRefresh).toBe(false); }); + + test('could ignoredFields fields from mapping', async () => { + const config = require('../config').default; + expect(config.ignoredFields).toEqual(['@version']); + }); + + test('could remap "__field" from to "prefix__field"', async () => { + const config = require('../config').default; + expect(config.doubleUnderscorePrefix).toEqual('x__'); + }); }); diff --git a/src/server/config.js b/src/server/config.js index 3aceeb5e2..47beba674 100644 --- a/src/server/config.js +++ b/src/server/config.js @@ -56,6 +56,8 @@ const config = { analyzedTextFieldSuffix: '.analyzed', matchedTextHighlightTagName: 'em', allowedMinimumSearchLen: 2, + ignoredFields: ['@version'], + doubleUnderscorePrefix: 'x__', allowRefresh: inputConfig.allowRefresh || false, }; @@ -74,6 +76,18 @@ if (process.env.GUPPY_PORT) { config.port = process.env.GUPPY_PORT; } +if (process.env.DOUBLE_UNDERSCORE) { + config.doubleUnderscorePrefix = process.env.DOUBLE_UNDERSCORE; +} + +// comma separated string of fields to ignore +if (process.env.IGNORED_FIELDS) { + if (typeof process.env.IGNORED_FIELDS !== 'string') { + throw new Error('IGNORED_FIELDS must be a comma separated string'); + } + config.ignoredFields = process.env.IGNORED_FIELDS.split(','); +} + const allowedTierAccessLevels = ['private', 'regular', 'libre']; if (process.env.TIER_ACCESS_LEVEL) { diff --git a/src/server/es/index.js b/src/server/es/index.js index 815c6c08c..9cd698bf2 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -10,6 +10,33 @@ import { SCROLL_PAGE_SIZE } from './const'; import CodedError from '../utils/error'; import { fromFieldsToSource, buildNestedField, processNestedFieldNames } from '../utils/utils'; +/** + * Modifies the properties of the index root object. + * This function has a side effect of modifying the index root object which + * is done to make the code more readable. + * It removes disabled fields, ignored fields, and converts double underscore prefix to single underscore. + * @param {object} root - The index root object to be modified. + */ +function modifyIndexRootProperties(root) { + // Changes root object by updating in place + if (root) { + Object.keys(root).forEach((fieldName) => { + if (root[fieldName].enabled === false) { + // eslint-disable-next-line no-param-reassign + delete root[fieldName]; + } + if (root[fieldName] && config.ignoredFields.includes(fieldName)) { + log.info(`[ES] deleting field ${fieldName} because it should be ignored.`); + // eslint-disable-next-line no-param-reassign + delete root[fieldName]; + } + if (root[fieldName] && fieldName.startsWith('__')) { + delete Object.assign(root, { [fieldName.replace('__', config.doubleUnderscorePrefix)]: root[fieldName] })[fieldName]; + } + }); + } +} + class ES { constructor(esConfig = config.esConfig) { this.config = esConfig; @@ -176,6 +203,11 @@ class ES { }); } + /** + * Gets the mappings for all indices from Elasticsearch. + * @returns {Promise} A promise that resolves to an object containing the field types for each index. + * @throws {Error} Throws an error if the "config.indices" block is empty. + */ async _getMappingsForAllIndices() { if (!this.config.indices || this.config.indices === 0) { const errMsg = '[ES.initialize] Error when initializing: empty "config.indices" block'; @@ -183,9 +215,9 @@ class ES { } const fieldTypes = {}; log.info('[ES.initialize] getting mapping from elasticsearch...'); - const promiseList = this.config.indices - .map((cfg) => this._getESFieldsTypes(cfg.index) - .then((res) => ({ index: cfg.index, fieldTypes: res }))); + + const promiseList = this.config.indices.map((indexConfig) => this._processEachIndex(indexConfig)); + const resultList = await Promise.all(promiseList); log.info('[ES.initialize] got mapping from elasticsearch'); resultList.forEach((res) => { @@ -195,6 +227,30 @@ class ES { return fieldTypes; } + /** + * Processes each index configuration and retrieves the field types for the specified index. + * Modifies the index root properties and nested properties if necessary. + * + * @param {object} indexConfig - The index configuration object. + * @param {string} indexConfig.index - The index name. + * @returns {Promise} - A promise that resolves to an object containing the index name and field types. + */ + async _processEachIndex(indexConfig) { + const res = await this._getESFieldsTypes(indexConfig.index); + Object.keys(res).forEach((fieldName) => { + modifyIndexRootProperties(res); + if (res[fieldName] && 'properties' in res[fieldName] && res[fieldName].type === 'nested') { + const root = res[fieldName].properties; + modifyIndexRootProperties(root); + } + }); + + return { + index: indexConfig.index, + fieldTypes: res, + }; + } + /** * Read array config and check if there's any array fields for each index. * Array fields are grouped and stored by index as a doc in array config, @@ -231,6 +287,7 @@ class ES { const fields = doc._source.array; fields.forEach((field) => { const fieldArr = field.split('.'); + const fn = (field.indexOf('__') === 0) ? field.replace('__', config.doubleUnderscorePrefix) : field; if (!(this.fieldTypes[index][field] || ( fieldArr.length > 1 @@ -245,7 +302,7 @@ class ES { return; } if (!arrayFields[index]) arrayFields[index] = []; - arrayFields[index].push(field); + arrayFields[index].push(fn); }); }); log.info('[ES.initialize] got array fields from es config index:', JSON.stringify(arrayFields, null, 4)); @@ -418,7 +475,7 @@ class ES { } if (fields !== undefined) { if (fields) { - const esFields = fromFieldsToSource(fields); + const esFields = fromFieldsToSource(fields, config.doubleUnderscorePrefix); if (esFields.length > 0) queryBody._source = esFields; } else { queryBody._source = false; @@ -499,8 +556,14 @@ class ES { ); const { hits } = result.hits; const hitsWithMatchedResults = hits.map((h) => { + Object.keys(h._source) + .forEach((fieldName) => { + if (fieldName in h._source && fieldName.indexOf('__') === 0) { + delete Object.assign(h._source, { [fieldName.replace('__', config.doubleUnderscorePrefix)]: h._source[fieldName] })[fieldName]; + } + }); if (!('highlight' in h)) { - // ES doesn't returns "highlight" + // ES doesn't return "highlight" return h._source; } // ES returns highlight, transfer them into "_matched" schema diff --git a/src/server/schema.js b/src/server/schema.js index 5ca381413..61b2e2a26 100644 --- a/src/server/schema.js +++ b/src/server/schema.js @@ -3,6 +3,7 @@ import { firstLetterUpperCase } from './utils/utils'; const esgqlTypeMapping = { text: 'String', + date: 'String', keyword: 'String', integer: 'Int', long: 'Float', diff --git a/src/server/utils/__test__/utils.test.js b/src/server/utils/__test__/utils.test.js index f996d7c41..9932a42bd 100644 --- a/src/server/utils/__test__/utils.test.js +++ b/src/server/utils/__test__/utils.test.js @@ -3,6 +3,8 @@ import UtilsData from '../__mockData__/utils.data'; describe('Parse fields from GraphQL query to fields in ES query', () => { test('could parse fields in GraphQL query correctly', async () => { - expect(fromFieldsToSource(UtilsData.parsedInfo)).toEqual(UtilsData.fields); + expect(fromFieldsToSource(UtilsData.parsedInfo, 'x__')).toEqual( + UtilsData.fields, + ); }); }); diff --git a/src/server/utils/utils.js b/src/server/utils/utils.js index 65a36f8f1..1f7fa124c 100644 --- a/src/server/utils/utils.js +++ b/src/server/utils/utils.js @@ -43,10 +43,11 @@ export const isWhitelisted = (key) => { /** * Convert from fields of graphql query produced by graphql library to list of querying fields * This list will be put to _source fields of the ES query - * @param parsedInfo: parsing information from graphql library + * @param - parsedInfo: parsing information from graphql library + * @param - underscorePrefix: prefix to use for fields that start with __ * @returns: list of selected fields. */ -export const fromFieldsToSource = (parsedInfo) => { +export const fromFieldsToSource = (parsedInfo, underscorePrefix) => { let stack = Object.values(parsedInfo.fieldsByTypeName[firstLetterUpperCase(parsedInfo.name)]); const levels = { 0: stack.length }; const fields = []; @@ -60,6 +61,7 @@ export const fromFieldsToSource = (parsedInfo) => { curNodeName = curNodeName.slice(0, (lastPeriod !== -1) ? lastPeriod : 0); } else { const cur = stack.pop(); + cur.name = (cur.name.indexOf(underscorePrefix) === 0) ? cur.name.replace(underscorePrefix, '__') : cur.name; const newTypeName = cur.name; const fieldName = [curNodeName, newTypeName].filter((s) => s.length > 0).join('.'); if (newTypeName in cur.fieldsByTypeName) {