Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 39 additions & 15 deletions devHelper/scripts/commands.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,28 +34,52 @@ curl -iv -X PUT "${ESHOST}/${indexName}" \
-H 'Content-Type: application/json' -d'
{
"settings" : {
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 0,
"analysis": {
"tokenizer": {
"ngram_tokenizer": {
"type": "ngram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [ "letter", "digit" ]
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": [
"lowercase"
]
},
"search_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
}
},
"mappings": {
"subject": {
"properties": {
"subject_id": { "type": "keyword" },
"name": { "type": "text" },
"project": { "type": "keyword" },
"study": { "type": "keyword" },
"gender": { "type": "keyword" },
"race": { "type": "keyword" },
"ethnicity": { "type": "keyword" },
"vital_status": { "type": "keyword" },
"file_type": { "type": "keyword" },
"file_format": { "type": "keyword" },
"gen3_resource_path": { "type": "keyword" },
"subject_id": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"name": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"project": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"study": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"gender": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"race": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"ethnicity": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"vital_status": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"file_type": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"file_format": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"gen3_resource_path": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"file_count": { "type": "integer" },
"whatever_lab_result_value": { "type": "float" },
"some_string_field": { "type": "keyword" },
"some_string_field": { "type": "keyword", "fields": { "analyzed": {"type": "text", "analyzer": "ngram_analyzer", "search_analyzer": "search_analyzer", "term_vector": "with_positions_offsets"} } },
"some_integer_field": { "type": "integer" },
"some_long_field": { "type": "long" }
}
Expand Down
130 changes: 127 additions & 3 deletions doc/queries.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ Table of Contents
- [Numeric Aggregation](#aggs-numeric)
- [Nested Aggregation](#aggs-nested)
- [Filters](#filter)
- [Basic Filter Unit](#filter-unit)
- [Text Search Unit in Filter](#filter-search)
- [Combined Filters](#filter-comb)
- [Some other queries and arguments](#other)

<a name="query"></a>
Expand Down Expand Up @@ -556,15 +559,136 @@ Result:
<a name="filter"></a>

## Filters
Currently Guppy uses `JSON`-based syntax for filters. The JSON object key could be an operation like `=`, `>`. One simple example could be:

<a name="filter-unit"></a>

### Basic filter unit
Currently Guppy uses `JSON`-based syntax for filters.
The JSON object key could be an operation like `=`, `>`.
A very basic filter unit would look like: `{<operater>: {<field_name> : <value_expression>}}`.
One simple example could look like:

```
{
"filter": {
"=": {
"subject_id": "sbj_69"
}
}
}
```


Currently we support following operators:


| operator | meaning | support field type | example |
|--------------|--------------------------|--------------------|------------------------------------------------------------------|
| eq, EQ, = | equal | string, number | {"eq": {"gender": "female"}} |
| in, IN | inside | string, number | {"in": {"gender": ["female", "F"]}} |
| != | is not | string, number | {"!=": {"gender": "male"}} |
| gt, GT, > | greater than | number | {">": {"age": 50}} |
| gte, GTE, >= | greater than or equal to | number | {">=": {"age": 50}} |
| lt, LT, < | less then | number | {"<": {"age": 50}} |
| lte, LTE, <= | less than or equal to | number | {"<=": {"age": 50}} |
| search | [search text](#filter-search) | text | {"search": {"keyword": "asian","fields": ["race", "ethnicity"]}} |



<a name="filter-search"></a>
Copy link
Contributor

@m0nhawk m0nhawk Nov 27, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor comment: you don't need to put the anchors in Markdown manually, all heading on Github already have them. See more here. So for "Basic filter unit" it would be: https://github.com/uc-cdis/guppy/blob/feat/search/doc/queries.md#basic-filter-unit

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah good to know! thanks!


### A search unit in filter
You could add a search unit into your filter, the syntax looks like:

```
{
"search": {
"keyword": <any text to search>,
"fields": <a list of fields for search>
}
}
```

Notice that `keyword` is required. But `fields` is optional,
and if not set, guppy will search thru all analyzed text fields that matched the suffix pattern set in `ANALYZED_TEXT_FIELD_SUFFIX` (by default `.analyzed`, which means search thru all `*.analyzed` fields).

#### Matched results and highlights
Guppy will return matched fields and highlight partial in `_matched` keyword,
with the matched field name, and highlighted partial words wrapped inside `<em>` tags.
A example search filter:

```
query ($filter: JSON) {
subject (filter: $filter, first: 20) {
gender
race
ethnicity
_matched {
field
highlights
}
}
}
```

with variable:

```
{
"filter": {"=": {"subject_id": "69"}}
"filter": {
"search": {
"keyword": "asia",
"fields": "race"
}
}
}
```

Or you could use binary combination (`AND` or `OR`)to combine simple filter units into more complicated big filters. Example:
example result:

```
{
"data": {
"subject": [
{
"gender": "female",
"race": "asian",
"ethnicity": "__missing__",
"_matched": [
{
"field": "race",
"highlights": [
"<em>asia</em>n"
]
}
]
},
{
"gender": "male",
"race": "asian",
"ethnicity": "White",
"_matched": [
{
"field": "race",
"highlights": [
"<em>asia</em>n"
]
}
]
},
...
]
}
}
```




<a name="filter-comb"></a>

### Combine into advanced filters
You could use binary combination (`AND` or `OR`) to combine simple filter units into more complicated big filters. Example:

```
{
Expand Down
2 changes: 2 additions & 0 deletions src/server/__mocks__/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const config = {
tierAccessLevel: 'regular',
tierAccessLimit: 20,
arboristEndpoint: 'http://mock-arborist',
analyzedTextFieldSuffix: '.analyzed',
matchedTextHighlightTagName: 'em',
};

export default config;
33 changes: 33 additions & 0 deletions src/server/__mocks__/mockDataFromES.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ const mockResourcePath = () => {
},
},
},
highlight: {
pre_tags: [
'<em>',
],
post_tags: [
'</em>',
],
fields: {
'*.analyzed': {},
},
},
};
const fakeResource = {
aggregations: {
Expand Down Expand Up @@ -101,6 +112,17 @@ const mockResourcePath = () => {
},
},
},
highlight: {
pre_tags: [
'<em>',
],
post_tags: [
'</em>',
],
fields: {
'*.analyzed': {},
},
},
};
const fakeResourceWithFilter1 = {
aggregations: {
Expand Down Expand Up @@ -155,6 +177,17 @@ const mockResourcePath = () => {
},
},
},
highlight: {
pre_tags: [
'<em>',
],
post_tags: [
'</em>',
],
fields: {
'*.analyzed': {},
},
},
};
const fakeResourceWithFilter2 = {
aggregations: {
Expand Down
16 changes: 15 additions & 1 deletion src/server/__mocks__/mockESData/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,23 @@ import nock from 'nock';
import config from '../config';

const mockSearchEndpoint = (mockRequest, mockResult) => {
const mockRequestPatched = {
...mockRequest,
highlight: {
pre_tags: [
'<em>',
],
post_tags: [
'</em>',
],
fields: {
'*.analyzed': {},
},
},
};
nock(config.esConfig.host)
.persist()
.post(/_search$/, mockRequest)
.post(/_search$/, mockRequestPatched)
.reply(200, mockResult);
};

Expand Down
2 changes: 2 additions & 0 deletions src/server/__tests__/schema.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,14 @@ describe('Schema', () => {
some_array_integer_field: [Int],
some_array_string_field: [String],
whatever_lab_result_value: Float,
_matched:[MatchedItem]
}
type File {
gen3_resource_path: String,
file_id: String,
file_size: Float,
subject_id: String,
_matched:[MatchedItem]
}`;
test('could create type schemas', async () => {
await esInstance.initialize();
Expand Down
7 changes: 7 additions & 0 deletions src/server/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ const config = {
logLevel: 'INFO',
enableEncryptWhiteList: typeof inputConfig.enable_encrypt_whitelist === 'undefined' ? false : inputConfig.enable_encrypt_whitelist,
encryptWhitelist: inputConfig.encrypt_whitelist || ['__missing__', 'unknown', 'not reported', 'no data'],
analyzedTextFieldSuffix: '.analyzed',
matchedTextHighlightTagName: 'em',
allowedMinimumSearchLen: 2,
};

if (process.env.GEN3_ES_ENDPOINT) {
Expand Down Expand Up @@ -64,6 +67,10 @@ if (process.env.LOG_LEVEL) {
config.logLevel = process.env.LOG_LEVEL;
}

if (process.env.ANALYZED_TEXT_FIELD_SUFFIX) {
config.analyzedTextFieldSuffix = process.env.ANALYZED_TEXT_FIELD_SUFFIX;
}

// only three options for tier access level: 'private' (default), 'regular', and 'libre'
if (process.env.TIER_ACCESS_LEVEL) {
if (process.env.TIER_ACCESS_LEVEL !== 'private'
Expand Down
23 changes: 23 additions & 0 deletions src/server/es/__tests__/filter.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { UserInputError } from 'apollo-server';
import getFilterObj from '../filter';
import esInstance from '../index';
import setupMockDataEndpoint from '../../__mocks__/mockDataFromES';
import config from '../../config';

jest.mock('../../config');
jest.mock('../../logger');
Expand Down Expand Up @@ -181,6 +182,28 @@ describe('Transfer GraphQL filter to ES filter, filter unit', () => {
expect(resultESFilter3).toEqual(expectedESFilter);
});

test('could transfer graphql filter to ES filter object, "search" operator', async () => {
await esInstance.initialize();
const keyword = 'male';
const gqlFilter1 = { search: { keyword } };
const resultESFilter1 = getFilterObj(esInstance, esIndex, esType, gqlFilter1);
const expectedESFilter1 = { multi_match: { query: keyword, fields: [`*${config.analyzedTextFieldSuffix}`] } };
expect(resultESFilter1).toEqual(expectedESFilter1);

const targetFields = ['gender', 'name'];
const gqlFilter2 = { search: { keyword, fields: targetFields } };
const resultESFilter2 = getFilterObj(esInstance, esIndex, esType, gqlFilter2);
const expectedTargetFields = targetFields.map(f => `${f}${config.analyzedTextFieldSuffix}`);
const expectedESFilter2 = { multi_match: { query: keyword, fields: expectedTargetFields } };
expect(resultESFilter2).toEqual(expectedESFilter2);

const targetField = 'gender';
const gqlFilter3 = { search: { keyword, fields: targetField } };
const resultESFilter3 = getFilterObj(esInstance, esIndex, esType, gqlFilter3);
const expectedESFilter3 = { multi_match: { query: keyword, fields: [`${targetField}${config.analyzedTextFieldSuffix}`] } };
expect(resultESFilter3).toEqual(expectedESFilter3);
});

test('could throw err for invalid operator', async () => {
await esInstance.initialize();

Expand Down
Loading