From b9be4754807819d0f05d41c5f711ea6eecf2496f Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Fri, 25 Jul 2025 11:58:03 -0500 Subject: [PATCH 1/6] update gentestdata --- devHelper/scripts/commands.sh | 7 +++++++ genData/types.js | 2 +- genData/valueBank.json | 24 +++++++++++++++++++++--- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/devHelper/scripts/commands.sh b/devHelper/scripts/commands.sh index 1ba6122c..ccb9a0fd 100755 --- a/devHelper/scripts/commands.sh +++ b/devHelper/scripts/commands.sh @@ -80,6 +80,13 @@ curl -iv -X PUT "${ESHOST}/${indexName}" \ "days_to_follow_up": { "type": "integer" }, "follow_up_label": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } } } + }, + "treatments_array_under_subjects": { + "type": "nested", + "properties": { + "dose_amount": { "type": "integer" }, + "submitter_id": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } } + } } } }, diff --git a/genData/types.js b/genData/types.js index 0557934d..4a4b8eaf 100644 --- a/genData/types.js +++ b/genData/types.js @@ -55,7 +55,7 @@ function fakerType(key, value, arrayFields) { break; case 'nested': Object.entries(value.properties).forEach(([k, v]) => { - properties[k] = fakerType(k, v); + properties[k] = fakerType(k, v, arrayFields); required.push(k); }); fieldType = { diff --git a/genData/valueBank.json b/genData/valueBank.json index ef73dda1..f61eca4c 100644 --- a/genData/valueBank.json +++ b/genData/valueBank.json @@ -18,7 +18,13 @@ "follow_ups": { "days_to_follow_up": 1, "follow_up_label": "flup_lbl_1" - } + }, + "treatments_array_under_subjects": [ + { + "submitter_id": "treatment_1", + "dose_amount": 1 + } + ] }, { "days_to_visit": 2, @@ -26,7 +32,13 @@ "follow_ups": { "days_to_follow_up": 2, "follow_up_label": "flup_lbl_2" - } + }, + "treatments_array_under_subjects": [ + { + "submitter_id": "treatment_2", + "dose_amount": 2 + } + ] }, { "days_to_visit": 3, @@ -34,7 +46,13 @@ "follow_ups": { "days_to_follow_up": 3, "follow_up_label": "flup_lbl_3" - } + }, + "treatments_array_under_subjects": [ + { + "submitter_id": "treatment_3", + "dose_amount": 3 + } + ] } ], "some_nested_array_field": [ From 3b89e3da4bb09f56cb9188e0d8fd64df6fd7fc15 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 12 Aug 2025 16:17:14 -0500 Subject: [PATCH 2/6] fix handle deeply nested arrays --- src/server/es/index.js | 2 +- src/server/schema.js | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/server/es/index.js b/src/server/es/index.js index 815c6c08..91691c61 100644 --- a/src/server/es/index.js +++ b/src/server/es/index.js @@ -508,7 +508,7 @@ class ES { let field = f; if (f.endsWith(config.analyzedTextFieldSuffix)) { // remove ".analyzed" suffix from field name - field = f.substr(0, f.length - config.analyzedTextFieldSuffix.length); + field = f.substring(0, f.length - config.analyzedTextFieldSuffix.length); } return { field, diff --git a/src/server/schema.js b/src/server/schema.js index 5ca38141..92abc37e 100644 --- a/src/server/schema.js +++ b/src/server/schema.js @@ -18,12 +18,16 @@ const esgqlTypeMapping = { const histogramTypePrefix = 'RegularAccess'; -const getGQLType = (esInstance, esIndex, field, esFieldType) => { +const getGQLType = (esInstance, esIndex, field, esFieldType, nestedFieldKeys = []) => { const gqlType = esgqlTypeMapping[esFieldType]; if (!gqlType) { throw new Error(`Invalid type ${esFieldType} for field ${field} in index ${esIndex}`); } - const isArrayField = esInstance.isArrayField(esIndex, field); + let fieldForArrayCheck = field; + if (nestedFieldKeys.length) { + fieldForArrayCheck = `${nestedFieldKeys.join('.')}.${field}`; + } + const isArrayField = esInstance.isArrayField(esIndex, fieldForArrayCheck); if (isArrayField && esFieldType !== 'nested') { return `[${gqlType}]`; } @@ -68,11 +72,11 @@ const getQuerySchemaForType = (esType) => { ): [${esTypeObjName}]`; }; -const getFieldGQLTypeMapForProperties = (esInstance, esIndex, properties) => { +const getFieldGQLTypeMapForProperties = (esInstance, esIndex, properties, nestedFieldKeys = []) => { const result = Object.keys(properties).map((field) => { const esFieldType = (properties[field].esType) ? properties[field].esType : properties[field].type; - const gqlType = getGQLType(esInstance, esIndex, field, esFieldType); + const gqlType = getGQLType(esInstance, esIndex, field, esFieldType, nestedFieldKeys); return { field, type: gqlType, esType: esFieldType, properties: properties[field].properties, @@ -109,7 +113,7 @@ const getTypeSchemaForOneIndex = (esInstance, esIndex, esType) => { const esFieldType = fieldESTypeMap[fieldKey].type; if (esFieldType === 'nested' && !existingFields.has(fieldKey)) { const { properties } = fieldESTypeMap[fieldKey]; - queueTypes.push({ type: `${fieldKey}`, properties }); + queueTypes.push({ type: `${fieldKey}`, properties, nestedFieldKeys: [fieldKey] }); existingFields.add(fieldKey); } }); @@ -123,10 +127,10 @@ const getTypeSchemaForOneIndex = (esInstance, esIndex, esType) => { while (queueTypes.length > 0) { const t = queueTypes.shift(); - const gqlTypes = getFieldGQLTypeMapForProperties(esInstance, esIndex, t.properties); + const gqlTypes = getFieldGQLTypeMapForProperties(esInstance, esIndex, t.properties, t.nestedFieldKeys); gqlTypes.forEach((entry) => { if (entry.esType === 'nested' && !existingFields.has(entry.field)) { - queueTypes.push({ type: `${entry.field}`, properties: entry.properties }); + queueTypes.push({ type: `${entry.field}`, properties: entry.properties, nestedFieldKeys: [...t.nestedFieldKeys, entry.field] }); existingFields.add(entry.field); fieldToArgs[entry.field] = getArgsByField(entry.field, entry.properties); } From 326f4aedb9ddc878df5ca208e9ea5cf81b5cef4d Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 12 Aug 2025 16:17:25 -0500 Subject: [PATCH 3/6] fix gendata --- genData/genData.js | 2 ++ genData/types.js | 35 ++++++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/genData/genData.js b/genData/genData.js index 4b63e050..6c50b474 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -154,6 +154,8 @@ async function run() { const { body: count } = await client.count({ index: esIndex }); console.log(count); + console.log(arrayFields); + if (configIndex) { const data = [ { diff --git a/genData/types.js b/genData/types.js index 4a4b8eaf..cfeb2918 100644 --- a/genData/types.js +++ b/genData/types.js @@ -1,4 +1,4 @@ -function fakerType(key, value, arrayFields) { +function fakerType(key, value, arrayFields, nestedFieldKeys = []) { let fieldType; const properties = {}; const required = []; @@ -10,7 +10,11 @@ function fakerType(key, value, arrayFields) { fieldType = { type: 'array', items: { type: 'boolean', properties, required }, minItems: 0, maxItems: 10, }; - arrayFields.push(key); + if (nestedFieldKeys.length) { + arrayFields.push(nestedFieldKeys.join('.')); + } else { + arrayFields.push(key); + } } else { fieldType = { type: 'boolean' }; } @@ -26,7 +30,11 @@ function fakerType(key, value, arrayFields) { minItems: 0, maxItems: 10, }; - arrayFields.push(key); + if (nestedFieldKeys.length) { + arrayFields.push(nestedFieldKeys.join('.')); + } else { + arrayFields.push(key); + } } else { fieldType = { type: 'string', faker: 'name.findName' }; } @@ -37,7 +45,11 @@ function fakerType(key, value, arrayFields) { fieldType = { type: 'array', items: { type: 'number', properties, required }, minItems: 0, maxItems: 10, }; - arrayFields.push(key); + if (nestedFieldKeys.length) { + arrayFields.push(nestedFieldKeys.join('.')); + } else { + arrayFields.push(key); + } } else { fieldType = { type: 'number' }; } @@ -48,21 +60,30 @@ function fakerType(key, value, arrayFields) { fieldType = { type: 'array', items: { type: 'integer', properties, required }, minItems: 0, maxItems: 10, }; - arrayFields.push(key); + if (nestedFieldKeys.length) { + arrayFields.push(nestedFieldKeys.join('.')); + } else { + arrayFields.push(key); + } } else { fieldType = { type: 'integer' }; } break; case 'nested': + nestedFieldKeys.push(key); Object.entries(value.properties).forEach(([k, v]) => { - properties[k] = fakerType(k, v, arrayFields); + properties[k] = fakerType(k, v, arrayFields, [...nestedFieldKeys]); required.push(k); }); fieldType = { type: 'array', items: { type: 'object', properties, required }, minItems: 0, maxItems: 10, }; if (key.includes('array')) { - arrayFields.push(key); + if (nestedFieldKeys.length) { + arrayFields.push(nestedFieldKeys.join('.')); + } else { + arrayFields.push(key); + } } break; default: From 62fd75f702fecea9008ff38953438a7cd18fcaa3 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 12 Aug 2025 16:22:18 -0500 Subject: [PATCH 4/6] clean up --- genData/genData.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/genData/genData.js b/genData/genData.js index 6c50b474..4b63e050 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -154,8 +154,6 @@ async function run() { const { body: count } = await client.count({ index: esIndex }); console.log(count); - console.log(arrayFields); - if (configIndex) { const data = [ { From 3497b4efa1ebec7d6da17e13fa333f44d664b48a Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Wed, 13 Aug 2025 16:21:24 -0500 Subject: [PATCH 5/6] add multiple level nested array example --- devHelper/scripts/commands.sh | 1 + genData/genData.js | 1 + genData/types.js | 25 ++---- genData/valueBank.json | 138 +++++++++++++++++++++++++--------- 4 files changed, 109 insertions(+), 56 deletions(-) diff --git a/devHelper/scripts/commands.sh b/devHelper/scripts/commands.sh index ccb9a0fd..694f8ebc 100755 --- a/devHelper/scripts/commands.sh +++ b/devHelper/scripts/commands.sh @@ -85,6 +85,7 @@ curl -iv -X PUT "${ESHOST}/${indexName}" \ "type": "nested", "properties": { "dose_amount": { "type": "integer" }, + "test_article_name_array_under_treatments": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }, "submitter_id": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } } } } diff --git a/genData/genData.js b/genData/genData.js index 4b63e050..cff665d5 100644 --- a/genData/genData.js +++ b/genData/genData.js @@ -153,6 +153,7 @@ async function run() { }); const { body: count } = await client.count({ index: esIndex }); console.log(count); + console.log(arrayFields); if (configIndex) { const data = [ diff --git a/genData/types.js b/genData/types.js index cfeb2918..31b33567 100644 --- a/genData/types.js +++ b/genData/types.js @@ -10,11 +10,7 @@ function fakerType(key, value, arrayFields, nestedFieldKeys = []) { fieldType = { type: 'array', items: { type: 'boolean', properties, required }, minItems: 0, maxItems: 10, }; - if (nestedFieldKeys.length) { - arrayFields.push(nestedFieldKeys.join('.')); - } else { - arrayFields.push(key); - } + arrayFields.push(`${nestedFieldKeys.join('.')}.${key}`); } else { fieldType = { type: 'boolean' }; } @@ -30,11 +26,7 @@ function fakerType(key, value, arrayFields, nestedFieldKeys = []) { minItems: 0, maxItems: 10, }; - if (nestedFieldKeys.length) { - arrayFields.push(nestedFieldKeys.join('.')); - } else { - arrayFields.push(key); - } + arrayFields.push(`${nestedFieldKeys.join('.')}.${key}`); } else { fieldType = { type: 'string', faker: 'name.findName' }; } @@ -45,11 +37,7 @@ function fakerType(key, value, arrayFields, nestedFieldKeys = []) { fieldType = { type: 'array', items: { type: 'number', properties, required }, minItems: 0, maxItems: 10, }; - if (nestedFieldKeys.length) { - arrayFields.push(nestedFieldKeys.join('.')); - } else { - arrayFields.push(key); - } + arrayFields.push(`${nestedFieldKeys.join('.')}.${key}`); } else { fieldType = { type: 'number' }; } @@ -60,11 +48,7 @@ function fakerType(key, value, arrayFields, nestedFieldKeys = []) { fieldType = { type: 'array', items: { type: 'integer', properties, required }, minItems: 0, maxItems: 10, }; - if (nestedFieldKeys.length) { - arrayFields.push(nestedFieldKeys.join('.')); - } else { - arrayFields.push(key); - } + arrayFields.push(`${nestedFieldKeys.join('.')}.${key}`); } else { fieldType = { type: 'integer' }; } @@ -79,6 +63,7 @@ function fakerType(key, value, arrayFields, nestedFieldKeys = []) { type: 'array', items: { type: 'object', properties, required }, minItems: 0, maxItems: 10, }; if (key.includes('array')) { + // since we already pushed the current key into this array if (nestedFieldKeys.length) { arrayFields.push(nestedFieldKeys.join('.')); } else { diff --git a/genData/valueBank.json b/genData/valueBank.json index f61eca4c..dbcf96bc 100644 --- a/genData/valueBank.json +++ b/genData/valueBank.json @@ -1,16 +1,79 @@ { - "gender": ["male", "female", "unknown"], - "ethnicity": ["American Indian", "Pacific Islander", "Black", "Multi-racial", "White", "Haspanic" ], - "race": ["white", "black", "hispanic", "asian", "mixed", "not reported" ], - "vital_status": ["Alive", "Dead", "no data" ], - "file_type": ["mRNA Array", "Unaligned Reads", "Lipdomic MS", "Protionic MS", "1Gs Ribosomes", "Unknown" ], - "file_format": ["BEM", "BAM", "BED", "CSV", "FASTQ", "RAW", "TAR", "TSV", "TXT", "IDAT" ], - "auth_resource_path": ["/programs/jnkns/projects/jenkins", "/programs/DEV/projects/test", "/programs/external/projects/test"], - "sensitive": [ "true", "false" ], - "study": ["study_1", "study_2", "study_3"], - "file_id": ["file_id_1", "file_id_2", "file_id_3"], - "subject_id": ["subject_id_1", "subject_id_2", "subject_id_3"], - "project": ["jnkns-jenkins", "DEV-test", "external-test" ], + "gender": [ + "male", + "female", + "unknown" + ], + "ethnicity": [ + "American Indian", + "Pacific Islander", + "Black", + "Multi-racial", + "White", + "Haspanic" + ], + "race": [ + "white", + "black", + "hispanic", + "asian", + "mixed", + "not reported" + ], + "vital_status": [ + "Alive", + "Dead", + "no data" + ], + "file_type": [ + "mRNA Array", + "Unaligned Reads", + "Lipdomic MS", + "Protionic MS", + "1Gs Ribosomes", + "Unknown" + ], + "file_format": [ + "BEM", + "BAM", + "BED", + "CSV", + "FASTQ", + "RAW", + "TAR", + "TSV", + "TXT", + "IDAT" + ], + "auth_resource_path": [ + "/programs/jnkns/projects/jenkins", + "/programs/DEV/projects/test", + "/programs/external/projects/test" + ], + "sensitive": [ + "true", + "false" + ], + "study": [ + "study_1", + "study_2", + "study_3" + ], + "file_id": [ + "file_id_1", + "file_id_2", + "file_id_3" + ], + "subject_id": [ + "subject_id_1", + "subject_id_2", + "subject_id_3" + ], + "project": [ + "jnkns-jenkins", + "DEV-test", + "external-test" + ], "visits": [ { "days_to_visit": 1, @@ -20,11 +83,12 @@ "follow_up_label": "flup_lbl_1" }, "treatments_array_under_subjects": [ - { - "submitter_id": "treatment_1", - "dose_amount": 1 - } - ] + { + "submitter_id": "treatment_1", + "dose_amount": 1, + "test_article_name_array_under_treatments": ["article_1a", "article_1b"] + } + ] }, { "days_to_visit": 2, @@ -34,11 +98,12 @@ "follow_up_label": "flup_lbl_2" }, "treatments_array_under_subjects": [ - { - "submitter_id": "treatment_2", - "dose_amount": 2 - } - ] + { + "submitter_id": "treatment_2", + "dose_amount": 2, + "test_article_name_array_under_treatments": ["article_2a", "article_2b"] + } + ] }, { "days_to_visit": 3, @@ -48,43 +113,44 @@ "follow_up_label": "flup_lbl_3" }, "treatments_array_under_subjects": [ - { - "submitter_id": "treatment_3", - "dose_amount": 3 - } - ] + { + "submitter_id": "treatment_3", + "dose_amount": 3, + "test_article_name_array_under_treatments": ["article_3a", "article_3b"] + } + ] } ], "some_nested_array_field": [ [ { - "some_integer_inside_nested": 1, - "some_string_inside_nested": "first" + "some_integer_inside_nested": 1, + "some_string_inside_nested": "first" }, { "some_integer_inside_nested": 2, "some_string_inside_nested": "second" - } + } ], [ { - "some_integer_inside_nested": 3, - "some_string_inside_nested": "third" + "some_integer_inside_nested": 3, + "some_string_inside_nested": "third" }, { "some_integer_inside_nested": 4, "some_string_inside_nested": "forth" - } + } ], [ { - "some_integer_inside_nested": 5, - "some_string_inside_nested": "fifth" + "some_integer_inside_nested": 5, + "some_string_inside_nested": "fifth" }, { "some_integer_inside_nested": 6, "some_string_inside_nested": "sixth" - } + } ] ] } From b201efed88cf1a7139c62d64350bde8fc5bfdf67 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Wed, 13 Aug 2025 16:29:07 -0500 Subject: [PATCH 6/6] update schema unit test for multi level nested array --- src/server/__mocks__/mockDataFromES.js | 24 +++++++++++++++++++++++- src/server/__tests__/schema.test.js | 16 +++++++++++----- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/server/__mocks__/mockDataFromES.js b/src/server/__mocks__/mockDataFromES.js index 98d484e9..e947403a 100644 --- a/src/server/__mocks__/mockDataFromES.js +++ b/src/server/__mocks__/mockDataFromES.js @@ -325,6 +325,28 @@ const mockESMapping = () => { }, }, }, + treatments_array_under_subjects: { + type: 'nested', + properties: { + dose_amount: { type: 'integer' }, + test_article_name_array_under_treatments: { + type: 'keyword', + fields: { + analyzed: { + type: 'text', analyzer: 'ngram_analyzer', search_analyzer: 'search_analyzer', term_vector: 'with_positions_offsets', + }, + }, + }, + submitter_id: { + type: 'keyword', + fields: { + analyzed: { + type: 'text', analyzer: 'ngram_analyzer', search_analyzer: 'search_analyzer', term_vector: 'with_positions_offsets', + }, + }, + }, + }, + }, }, }, gender: { @@ -393,7 +415,7 @@ const mockArrayConfig = () => { _id: 'gen3-dev-subject', _score: 1.0, _source: { - array: ['some_array_integer_field', 'some_array_string_field'], + array: ['some_array_integer_field', 'some_array_string_field', 'visits.treatments_array_under_subjects', 'visits.treatments_array_under_subjects.test_article_name_array_under_treatments'], }, }, ], diff --git a/src/server/__tests__/schema.test.js b/src/server/__tests__/schema.test.js index 431911b2..fe1f8378 100644 --- a/src/server/__tests__/schema.test.js +++ b/src/server/__tests__/schema.test.js @@ -65,13 +65,19 @@ describe('Schema', () => { _matched:[MatchedItem] } type visits { - days_to_visit:Int, - visit_label:String, - follow_ups:follow_ups, + days_to_visit: Int, + visit_label: String, + follow_ups: follow_ups, + treatments_array_under_subjects: [treatments_array_under_subjects], } type follow_ups { - days_to_follow_up:Int, - follow_up_label:String, + days_to_follow_up: Int, + follow_up_label: String, + } + type treatments_array_under_subjects { + dose_amount: Int, + test_article_name_array_under_treatments: [String], + submitter_id: String, } type File { gen3_resource_path: String,