uc-cdis · thanh-nguyen-dang · Feb 10, 2020 · Aug 28, 2019 · Oct 1, 2019 · Oct 30, 2019
diff --git a/.eslintrc.js b/.eslintrc.js
@@ -23,6 +23,7 @@ module.exports = {
     'no-underscore-dangle': 'off',
     'react/destructuring-assignment': 'off',
     'react/no-array-index-key': 'off',
+    "import/no-extraneous-dependencies": ["error", {"devDependencies": true}]
   },
   overrides: [
     {

diff --git a/devHelper/README.md b/devHelper/README.md
@@ -7,9 +7,10 @@ docker-compose -f ./esearch.yml up -d
 ```
 
 ## Step.2 import mock data into elasticsearch index
+Go to the repository's root directory and run the following command.
 
 ```
-cd scripts/ && sh ./generate_data.sh
+sh ./generate_data.sh
 ```
 
 ## Step.3 start server for developing server side code

diff --git a/devHelper/docker/esearch.env b/devHelper/docker/esearch.env
@@ -1,3 +1,3 @@
 cluster.name=localdev
 discovery.type=single-node
-"ES_JAVA_OPTS=-Xms512m -Xmx512m"
+"ES_JAVA_OPTS=-Xms1g -Xmx1g"
diff --git a/devHelper/docker/esearch.yml b/devHelper/docker/esearch.yml
@@ -3,7 +3,7 @@ version: "3.3"
 services:
   # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#docker-cli-run-prod-mode
   elasticsearch:
-    image: docker.elastic.co/elasticsearch/elasticsearch:6.3.1
+    image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.5.4
     ports:
       - "9200:9200"
       - "9300:9300"
@@ -23,7 +23,7 @@ services:
         window: 120s
 
   kibana:
-    image: docker.elastic.co/kibana/kibana:6.2.1
+    image: docker.elastic.co/kibana/kibana-oss:6.5.4
     ports:
       - "5601:5601"
     logging:

diff --git a/devHelper/scripts/commands.sh b/devHelper/scripts/commands.sh
@@ -70,13 +70,27 @@ curl -iv -X PUT "${ESHOST}/${indexName}" \
           "name": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
           "project": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
           "study": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
+          "visits": {
+            "type": "nested",
+            "properties": {
+              "days_to_visit": { "type": "integer" },
+              "visit_label": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
+              "follow_ups": {
+                "type": "nested",
+                "properties": {
+                  "days_to_follow_up": { "type": "integer" },
+                  "follow_up_label": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } }
+                }
+              }
+            }
+          },
           "gender": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
           "race": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
           "ethnicity": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
           "vital_status": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
           "file_type": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
           "file_format": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
-          "gen3_resource_path": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
+          "auth_resource_path": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
           "file_count": { "type": "integer" },
           "whatever_lab_result_value": { "type": "float" },
           "some_string_field": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
@@ -102,7 +116,7 @@ curl -iv -X PUT "${ESHOST}/${fileIndexName}" \
       "file": {
         "properties": {
           "file_id": { "type": "keyword" },
-          "gen3_resource_path": { "type": "keyword" },
+          "auth_resource_path": { "type": "keyword" },
           "subject_id": { "type": "keyword" },
           "sensitive": { "type": "keyword" }
         }
@@ -134,111 +148,3 @@ function es_indices() {
   curl -X GET "${ESHOST}/_cat/indices?v"
 }
 
-#
-# Generate test data in index
-#
-function es_gen_data() {
-  local startIndex
-  local endIndex
-  local COUNT
-  local tmpName
-  local indexName
-  startIndex="${1:-0}"
-  endIndex="${2:-0}"
-  indexName="${3:-gen3-dev-subject}"
-  fileIndexName="${4:-gen3-dev-file}"
-  configIndexName="${5:-gen3-dev-config}"
-
-declare -a genderList
-declare -a ethnicityList
-declare -a raceList
-declare -a vitalList
-declare -a fileTypeList
-declare -a fileFormat
-
-genderList=( "male" "female" "unknown")
-ethnicityList=( "American Indian" "Pacific Islander" "Black" "Multi-racial" "White" "Haspanic" "__missing__" )
-raceList=( "white" "black" "hispanic" "asian" "mixed" "not reported" )
-vitalList=( "Alive" "Dead" "no data" )
-fileTypeList=( "mRNA Array" "Unaligned Reads" "Lipdomic MS" "Protionic MS" "1Gs Ribosomes" "Unknown" )
-fileFormatList=( "BEM" "BAM" "BED" "CSV" "FASTQ" "RAW" "TAR" "TSV" "TXT" "IDAT" "__missing__" )
-resourceList=( "/programs/jnkns/projects/jenkins" "/programs/DEV/projects/test" "/programs/external/projects/test")
-projectList=( "jnkns-jenkins" "DEV-test" "external-test" )
-sensitiveList=( "true" "false" )
-
-COUNT=$startIndex
-XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/tmp}"
-tmpName="$(mktemp $XDG_RUNTIME_DIR/es.json.XXXXXX)"
-while [[ $COUNT -lt $endIndex ]]; do
-  projectIndex=$(( $RANDOM % ${#projectList[@]} ))
-  projectName="${projectList[$projectIndex]}"
-  resourceName="${resourceList[$projectIndex]}"
-  studyIndex=$(( $RANDOM % 10 ))
-  gender="${genderList[$(( $RANDOM % ${#genderList[@]} ))]}"
-  ethnicity="${ethnicityList[$(( $RANDOM % ${#ethnicityList[@]} ))]}"
-  race="${raceList[$(( $RANDOM % ${#raceList[@]} ))]}"
-  vital="${vitalList[$(( $RANDOM % ${#vitalList[@]} ))]}"
-  fileType="${fileTypeList[$(( $RANDOM % ${#fileTypeList[@]} ))]}"
-  fileFormat="${fileFormatList[$(( $RANDOM % ${#fileFormatList[@]} ))]}"
-  fileCounts=$(( $RANDOM % 100 ))
-  randomFloatNumber="$(( $RANDOM % 100 )).$(( $RANDOM % 100 ))"
-  stringArray='["1", "2"]'
-  intArray='[1, 2]'
-  longNumber="10737418240"
-  sensitive="${sensitiveList[$(( $RANDOM % ${#sensitiveList[@]} ))]}"
-
-  cat - > "$tmpName" <<EOM
-{
-  "subject_id": "$COUNT",
-  "name": "Subject-$COUNT",
-  "project": "${projectName}",
-  "study": "${projectName}-Study-${studyIndex}",
-  "gender": "${gender}",
-  "ethnicity": "${ethnicity}",
-  "race": "${race}",
-  "vital_status": "${vital}",
-  "file_type": "${fileType}",
-  "file_format": "${fileFormat}",
-  "gen3_resource_path": "${resourceName}",
-  "file_count": $fileCounts,
-  "whatever_lab_result_value": $randomFloatNumber,
-  "some_string_field": $stringArray,
-  "some_integer_field": $intArray,
-  "some_long_field": $longNumber,
-  "sensitive": $sensitive
-
-}
-EOM
-  cat - $tmpName <<EOM
-Loading record:
-EOM
-  curl -X PUT "${ESHOST}/${indexName}/subject/${COUNT}?pretty" \
-       -H 'Content-Type: application/json' "-d@$tmpName"
-
-
-  cat - > "$tmpName" <<EOM
-{
-  "subject_id": "$COUNT",
-  "gen3_resource_path": "${resourceName}",
-  "file_id": "file_id_$(( $RANDOM % 1000 ))",
-  "sensitive": $sensitive
-}
-EOM
-  cat - $tmpName <<EOM
-Loading record:
-EOM
-  curl -X PUT "${ESHOST}/${fileIndexName}/file/${COUNT}?pretty" \
-       -H 'Content-Type: application/json' "-d@$tmpName"
-  let COUNT+=1
-done
-
-curl -X PUT "${ESHOST}/${configIndexName}/_doc/gen3-dev-subject?pretty" \
-  -H 'Content-Type: application/json' -d '
-  {
-    "array": [
-      "some_string_field",
-      "some_integer_field"
-    ]
-  }
-  '
-}
diff --git a/doc/queries.md b/doc/queries.md
@@ -35,6 +35,12 @@ Example query:
     ethnicity
     vital_status
     file_count
+    experiments {
+      experimental_description
+      number_experimental_group
+      type_of_sample
+      type_of_data
+    }
   }
 }
 ```
@@ -50,21 +56,63 @@ Example result:
         "gender": "female",
         "ethnicity": "American Indian",
         "vital_status": "no data",
-        "file_count": 13
+        "file_count": 13,
+        "experiments": [
+          {
+            "experimental_description": "experiment for fun",
+            "number_experimental_group": 1,
+            "type_of_sample": "story",
+            "type_of_data": "text"
+          },
+          {
+            "experimental_description": "experiment for horror",
+            "number_experimental_group": 2,
+            "type_of_sample": "mv",
+            "type_of_data": "text"
+          }
+        ]
       },
       {
         "subject_id": "12",
         "gender": "male",
         "ethnicity": "Pacific Islander",
         "vital_status": "Alive",
-        "file_count": 60
+        "file_count": 60,
+        "experiments": [
+          {
+            "experimental_description": "experiment for fun",
+            "number_experimental_group": 1,
+            "type_of_sample": "story",
+            "type_of_data": "text"
+          },
+          {
+            "experimental_description": "experiment for horror",
+            "number_experimental_group": 2,
+            "type_of_sample": "mv",
+            "type_of_data": "text"
+          }
+        ]
       },
       {
         "subject_id": "13",
         "gender": "male",
         "ethnicity": "__missing__",
         "vital_status": "Dead",
-        "file_count": 88
+        "file_count": 88,
+        "experiments": [
+          {
+            "experimental_description": "experiment for fun",
+            "number_experimental_group": 1,
+            "type_of_sample": "story",
+            "type_of_data": "text"
+          },
+          {
+            "experimental_description": "experiment for horror",
+            "number_experimental_group": 2,
+            "type_of_sample": "mv",
+            "type_of_data": "text"
+          }
+        ]
       },
       ...
     ]
@@ -743,7 +791,49 @@ In future Guppy will support `SQL` like syntax for filter, like `
 
 <a name="other"></a>
 
-## Some other queries and arguments
+### Nested filter
+Guppy now supports query on nested ElasticSearch schema. The way to query and filter the nested index is similar to the ES query.
+Assuming that there is `File` node nested inside `subject`. The nested query will be written as below:
+```
+{
+  "filter": {
+    "AND": [
+      {
+        "OR": [
+          {
+            "=": {
+              "race": "hispanic"
+            }
+          },
+          {
+            "=": {
+              "race": "asian"
+            }
+          }
+        ]
+      },
+      {
+        "nested": {
+          "path": "File",
+          "AND": [
+            {
+              ">=": {"file_count": 15}
+            },
+            {
+              "<=": {"file_count": 75}
+            }
+          ]
+        }
+      }
+    ]
+  }
+}
+```
+
+ElasticSearch only support the nested filter on the level of document for returning data. It means that the filter `file_count >=15` and `file_count<=75` will return the whole document having a `file_count` in the range of `[15, 75]`.
+The returned data will not filter the nested `file_count`(s) that are out of that range for that document.
+
+## Some other queries and arguments 
 
 ### Mapping query
 Mapping query simply returns all fields under a doc type. Example:

diff --git a/example_config.json b/example_config.json
@@ -10,5 +10,5 @@
     }
   ],
   "config_index": "gen3-dev-config",
-  "auth_filter_field": "gen3_resource_path"
+  "auth_filter_field": "auth_resource_path"
 }