Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c2f4a60

Browse files
committed
NLPchina#114 - csv aggregations - stats/exteneded_stats/percentiles
1 parent 2928429 commit c2f4a60

File tree

4 files changed

+164
-25
lines changed

4 files changed

+164
-25
lines changed

src/main/java/org/elasticsearch/plugin/nlpcn/executors/CSVResultsExtractor.java

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,12 @@
1313
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGrid;
1414
import org.elasticsearch.search.aggregations.metrics.MetricsAggregator;
1515
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
16+
import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile;
17+
import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles;
1618
import org.elasticsearch.search.aggregations.metrics.scripted.ScriptedMetric;
19+
import org.elasticsearch.search.aggregations.metrics.stats.Stats;
1720
import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStats;
21+
import org.nlpcn.es4sql.Util;
1822

1923
import java.util.*;
2024

@@ -28,7 +32,7 @@ public CSVResultsExtractor() {
2832
this.currentLineIndex = 0;
2933
}
3034

31-
public CSVResult extractResults(Object queryResult, boolean flat, String separator) {
35+
public CSVResult extractResults(Object queryResult, boolean flat, String separator) throws CsvExtractorException {
3236
if(queryResult instanceof SearchHits){
3337
SearchHit[] hits = ((SearchHits) queryResult).getHits();
3438
List<Map<String,Object>> docsAsMap = new ArrayList<>();
@@ -48,7 +52,6 @@ public CSVResult extractResults(Object queryResult, boolean flat, String separat
4852
}
4953

5054
//todo: need to handle more options for aggregations:
51-
//NumericMetricsAggregation.Multi : ExtendedStats,Stats,Percentiles
5255
//Aggregations that inhrit from base
5356
//ScriptedMetric
5457
//TopHits
@@ -60,15 +63,15 @@ public CSVResult extractResults(Object queryResult, boolean flat, String separat
6063
return null;
6164
}
6265

63-
private void handleAggregations(Aggregations aggregations, List<String> headers, List<List<String>> lines) {
66+
private void handleAggregations(Aggregations aggregations, List<String> headers, List<List<String>> lines) throws CsvExtractorException {
6467
if(allNumericAggregations(aggregations)){
6568
lines.get(this.currentLineIndex).addAll(fillHeaderAndCreateLineForNumericAggregations(aggregations, headers));
6669
return;
6770
}
6871
//aggregations with size one only supported when not metrics.
6972
List<Aggregation> aggregationList = aggregations.asList();
7073
if(aggregationList.size() > 1){
71-
//todo: throw exception
74+
throw new CsvExtractorException("currently support only one aggregation at same level (Except for numeric metrics)");
7275
}
7376
Aggregation aggregation = aggregationList.get(0);
7477
//we want to skip singleBucketAggregations (nested,reverse_nested,filters)
@@ -115,7 +118,7 @@ private void handleAggregations(Aggregations aggregations, List<String> headers
115118

116119
}
117120

118-
private List<String> fillHeaderAndCreateLineForNumericAggregations(Aggregations aggregations, List<String> header) {
121+
private List<String> fillHeaderAndCreateLineForNumericAggregations(Aggregations aggregations, List<String> header) throws CsvExtractorException {
119122
List<String> line = new ArrayList<>();
120123
List<Aggregation> aggregationList = aggregations.asList();
121124
for(Aggregation aggregation : aggregationList){
@@ -124,17 +127,69 @@ private List<String> fillHeaderAndCreateLineForNumericAggregations(Aggregations
124127
return line;
125128
}
126129

127-
private void handleNumericMetricAggregation(List<String> header, List<String> line, Aggregation aggregation) {
130+
private void handleNumericMetricAggregation(List<String> header, List<String> line, Aggregation aggregation) throws CsvExtractorException {
128131
String name = aggregation.getName();
129-
if(!header.contains(name)){
130-
header.add(aggregation.getName());
131-
}
132+
132133
if(aggregation instanceof NumericMetricsAggregation.SingleValue){
134+
if(!header.contains(name)){
135+
header.add(name);
136+
}
133137
line.add(((NumericMetricsAggregation.SingleValue) aggregation).getValueAsString());
134138
}
135139
//todo:Numeric MultiValue - Stats,ExtendedStats,Percentile...
140+
else if(aggregation instanceof NumericMetricsAggregation.MultiValue){
141+
if(aggregation instanceof Stats) {
142+
String[] statsHeaders = new String[]{"count", "sum", "avg", "min", "max"};
143+
boolean isExtendedStats = aggregation instanceof ExtendedStats;
144+
if(isExtendedStats){
145+
String[] extendedHeaders = new String[]{"sumOfSquares", "variance", "stdDeviation"};
146+
statsHeaders = Util.concatStringsArrays(statsHeaders,extendedHeaders);
147+
}
148+
mergeHeadersWithPrefix(header, name, statsHeaders);
149+
Stats stats = (Stats) aggregation;
150+
line.add(stats.getCountAsString());
151+
line.add(stats.getSumAsString());
152+
line.add(stats.getAvgAsString());
153+
line.add(stats.getMinAsString());
154+
line.add(stats.getMaxAsString());
155+
if(isExtendedStats){
156+
ExtendedStats extendedStats = (ExtendedStats) aggregation;
157+
line.add(extendedStats.getSumOfSquaresAsString());
158+
line.add(extendedStats.getVarianceAsString());
159+
line.add(extendedStats.getStdDeviationAsString());
160+
}
161+
}
162+
else if( aggregation instanceof Percentiles){
163+
String[] percentileHeaders = new String[]{"1.0", "5.0", "25.0", "50.0", "75.0", "95.0", "99.0"};
164+
mergeHeadersWithPrefix(header, name, percentileHeaders);
165+
Percentiles percentiles = (Percentiles) aggregation;
166+
line.add(percentiles.percentileAsString(1.0));
167+
line.add(percentiles.percentileAsString(5.0));
168+
line.add(percentiles.percentileAsString(25.0));
169+
line.add(percentiles.percentileAsString(50.0));
170+
line.add(percentiles.percentileAsString(75));
171+
line.add(percentiles.percentileAsString(95.0));
172+
line.add(percentiles.percentileAsString(99.0));
173+
}
174+
else {
175+
throw new CsvExtractorException("unknown NumericMetricsAggregation.MultiValue:" + aggregation.getClass());
176+
}
177+
178+
}
136179
else {
180+
throw new CsvExtractorException("unknown NumericMetricsAggregation" + aggregation.getClass());
181+
}
182+
}
137183

184+
private void mergeHeadersWithPrefix(List<String> header, String prefix, String[] newHeaders) {
185+
for (int i = 0; i < newHeaders.length; i++) {
186+
String newHeader = newHeaders[i];
187+
if(prefix != null && !prefix.equals("")) {
188+
newHeader = prefix + "." + newHeader;
189+
}
190+
if (!header.contains(newHeader)) {
191+
header.add(newHeader);
192+
}
138193
}
139194
}
140195

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package org.elasticsearch.plugin.nlpcn.executors;
2+
3+
/**
4+
* Created by Eliran on 29/12/2015.
5+
*/
6+
public class CsvExtractorException extends Exception {
7+
public CsvExtractorException(String message) {
8+
super(message);
9+
}
10+
}

src/main/java/org/nlpcn/es4sql/Util.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,15 @@ public static String extendedToString(SQLExpr sqlExpr) {
7979
}
8080
return sqlExpr.toString();
8181
}
82+
83+
public static String[] concatStringsArrays(String[] a1,String[] a2){
84+
String[] strings = new String[a1.length + a2.length];
85+
for(int i=0;i<a1.length;i++){
86+
strings[i] = a1[i];
87+
}
88+
for(int i = 0;i<a2.length;i++){
89+
strings[a1.length+i] = a2[i];
90+
}
91+
return strings;
92+
}
8293
}

src/test/java/org/nlpcn/es4sql/CSVResultsExtractorTests.java

Lines changed: 79 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import org.elasticsearch.plugin.nlpcn.QueryActionElasticExecutor;
55
import org.elasticsearch.plugin.nlpcn.executors.CSVResult;
66
import org.elasticsearch.plugin.nlpcn.executors.CSVResultsExtractor;
7+
import org.elasticsearch.plugin.nlpcn.executors.CsvExtractorException;
78
import org.elasticsearch.search.SearchHits;
89
import org.elasticsearch.search.aggregations.Aggregations;
910
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
@@ -14,10 +15,11 @@
1415
import org.nlpcn.es4sql.query.QueryAction;
1516
import org.nlpcn.es4sql.query.SqlElasticSearchRequestBuilder;
1617

17-
import java.io.IOException;
1818
import java.sql.SQLFeatureNotSupportedException;
1919
import java.util.List;
2020

21+
import static org.hamcrest.MatcherAssert.assertThat;
22+
import static org.hamcrest.Matchers.equalTo;
2123
import static org.nlpcn.es4sql.TestsConstants.TEST_INDEX;
2224

2325
/**
@@ -27,7 +29,7 @@ public class CSVResultsExtractorTests {
2729

2830

2931
@Test
30-
public void simpleSearchResultNotNestedNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
32+
public void simpleSearchResultNotNestedNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
3133
String query = String.format("select name,age from %s/dog order by age",TEST_INDEX);
3234
CSVResult csvResult = getCsvResult(false, query);
3335

@@ -45,7 +47,7 @@ public void simpleSearchResultNotNestedNotFlatNoAggs() throws SqlParseException,
4547

4648

4749
@Test
48-
public void simpleSearchResultWithNestedNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
50+
public void simpleSearchResultWithNestedNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
4951
String query = String.format("select name,house from %s/gotCharacters",TEST_INDEX);
5052
CSVResult csvResult = getCsvResult(false, query);
5153

@@ -68,7 +70,7 @@ public void simpleSearchResultWithNestedNotFlatNoAggs() throws SqlParseException
6870

6971

7072
@Test
71-
public void simpleSearchResultWithNestedOneFieldNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
73+
public void simpleSearchResultWithNestedOneFieldNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
7274
String query = String.format("select name.firstname,house from %s/gotCharacters",TEST_INDEX);
7375
CSVResult csvResult = getCsvResult(false, query);
7476

@@ -87,7 +89,7 @@ public void simpleSearchResultWithNestedOneFieldNotFlatNoAggs() throws SqlParseE
8789
}
8890

8991
@Test
90-
public void simpleSearchResultWithNestedTwoFieldsFromSameNestedNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
92+
public void simpleSearchResultWithNestedTwoFieldsFromSameNestedNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
9193
String query = String.format("select name.firstname,name.lastname,house from %s/gotCharacters", TEST_INDEX);
9294
CSVResult csvResult = getCsvResult(false, query);
9395

@@ -110,7 +112,7 @@ public void simpleSearchResultWithNestedTwoFieldsFromSameNestedNotFlatNoAggs() t
110112
}
111113

112114
@Test
113-
public void simpleSearchResultWithNestedWithFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
115+
public void simpleSearchResultWithNestedWithFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
114116
String query = String.format("select name.firstname,house from %s/gotCharacters",TEST_INDEX);
115117
CSVResult csvResult = getCsvResult(true, query);
116118

@@ -128,7 +130,7 @@ public void simpleSearchResultWithNestedWithFlatNoAggs() throws SqlParseExceptio
128130

129131
}
130132
@Test
131-
public void joinSearchResultNotNestedNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
133+
public void joinSearchResultNotNestedNotFlatNoAggs() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
132134
String query = String.format("select c.gender , h.name,h.words from %s/gotCharacters c " +
133135
"JOIN %s/gotHouses h " +
134136
"on h.name = c.house ",TEST_INDEX,TEST_INDEX);
@@ -154,7 +156,7 @@ public void joinSearchResultNotNestedNotFlatNoAggs() throws SqlParseException, S
154156
}
155157

156158
@Test
157-
public void simpleNumericValueAgg() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
159+
public void simpleNumericValueAgg() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
158160
String query = String.format("select count(*) from %s/dog ",TEST_INDEX);
159161
CSVResult csvResult = getCsvResult(false, query);
160162

@@ -169,7 +171,7 @@ public void simpleNumericValueAgg() throws SqlParseException, SQLFeatureNotSuppo
169171

170172
}
171173
@Test
172-
public void simpleNumericValueAggWithAlias() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
174+
public void simpleNumericValueAggWithAlias() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
173175
String query = String.format("select avg(age) as myAlias from %s/dog ",TEST_INDEX);
174176
CSVResult csvResult = getCsvResult(false, query);
175177

@@ -185,7 +187,7 @@ public void simpleNumericValueAggWithAlias() throws SqlParseException, SQLFeatur
185187
}
186188

187189
@Test
188-
public void twoNumericAggWithAlias() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
190+
public void twoNumericAggWithAlias() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
189191
String query = String.format("select count(*) as count, avg(age) as myAlias from %s/dog ",TEST_INDEX);
190192
CSVResult csvResult = getCsvResult(false, query);
191193

@@ -208,7 +210,7 @@ public void twoNumericAggWithAlias() throws SqlParseException, SQLFeatureNotSupp
208210
}
209211

210212
@Test
211-
public void aggAfterTermsGroupBy() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
213+
public void aggAfterTermsGroupBy() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
212214
String query = String.format("SELECT COUNT(*) FROM %s/account GROUP BY gender",TEST_INDEX);
213215
CSVResult csvResult = getCsvResult(false, query);
214216
List<String> headers = csvResult.getHeaders();
@@ -223,7 +225,7 @@ public void aggAfterTermsGroupBy() throws SqlParseException, SQLFeatureNotSuppor
223225

224226
}
225227
@Test
226-
public void aggAfterTwoTermsGroupBy() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
228+
public void aggAfterTwoTermsGroupBy() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
227229
String query = String.format("SELECT COUNT(*) FROM %s/account where age in (35,36) GROUP BY gender,age",TEST_INDEX);
228230
CSVResult csvResult = getCsvResult(false, query);
229231
List<String> headers = csvResult.getHeaders();
@@ -241,7 +243,7 @@ public void aggAfterTwoTermsGroupBy() throws SqlParseException, SQLFeatureNotSup
241243

242244
}
243245
@Test
244-
public void multipleAggAfterTwoTermsGroupBy() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
246+
public void multipleAggAfterTwoTermsGroupBy() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
245247
String query = String.format("SELECT COUNT(*) , sum(balance) FROM %s/account where age in (35,36) GROUP BY gender,age",TEST_INDEX);
246248
CSVResult csvResult = getCsvResult(false, query);
247249
List<String> headers = csvResult.getHeaders();
@@ -261,7 +263,7 @@ public void multipleAggAfterTwoTermsGroupBy() throws SqlParseException, SQLFeatu
261263
}
262264

263265
@Test
264-
public void dateHistogramTest() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
266+
public void dateHistogramTest() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
265267
String query = String.format("select count(*) from %s/online" +
266268
" group by date_histogram('field'='insert_time','interval'='4d','alias'='days')",TEST_INDEX);
267269
CSVResult csvResult = getCsvResult(false, query);
@@ -278,16 +280,77 @@ public void dateHistogramTest() throws SqlParseException, SQLFeatureNotSupported
278280

279281
}
280282

283+
@Test
284+
public void statsAggregationTest() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
285+
String query = String.format("SELECT STATS(age) FROM %s/account", TEST_INDEX);
286+
CSVResult csvResult = getCsvResult(false, query);
287+
List<String> headers = csvResult.getHeaders();
288+
Assert.assertEquals(5, headers.size());
289+
Assert.assertEquals("STATS(age).count", headers.get(0));
290+
Assert.assertEquals("STATS(age).sum", headers.get(1));
291+
Assert.assertEquals("STATS(age).avg", headers.get(2));
292+
Assert.assertEquals("STATS(age).min", headers.get(3));
293+
Assert.assertEquals("STATS(age).max", headers.get(4));
294+
295+
List<String> lines = csvResult.getLines();
296+
Assert.assertEquals(1, lines.size());
297+
Assert.assertEquals("1000.0,30171.0,30.171,20.0,40.0", lines.get(0));
298+
299+
}
300+
301+
@Test
302+
public void extendedStatsAggregationTest() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
303+
String query = String.format("SELECT EXTENDED_STATS(age) FROM %s/account", TEST_INDEX);
304+
CSVResult csvResult = getCsvResult(false, query);
305+
List<String> headers = csvResult.getHeaders();
306+
Assert.assertEquals(8, headers.size());
307+
Assert.assertEquals("EXTENDED_STATS(age).count", headers.get(0));
308+
Assert.assertEquals("EXTENDED_STATS(age).sum", headers.get(1));
309+
Assert.assertEquals("EXTENDED_STATS(age).avg", headers.get(2));
310+
Assert.assertEquals("EXTENDED_STATS(age).min", headers.get(3));
311+
Assert.assertEquals("EXTENDED_STATS(age).max", headers.get(4));
312+
Assert.assertEquals("EXTENDED_STATS(age).sumOfSquares", headers.get(5));
313+
Assert.assertEquals("EXTENDED_STATS(age).variance", headers.get(6));
314+
Assert.assertEquals("EXTENDED_STATS(age).stdDeviation", headers.get(7));
315+
316+
List<String> lines = csvResult.getLines();
317+
Assert.assertEquals(1, lines.size());
318+
String line = lines.get(0);
319+
Assert.assertTrue(line.startsWith("1000.0,30171.0,30.171,20.0,40.0,946393.0"));
320+
Assert.assertTrue(line.contains(",6.008"));
321+
Assert.assertTrue(line.contains(",36.103"));
322+
}
323+
324+
@Test
325+
public void percentileAggregationTest() throws SqlParseException, SQLFeatureNotSupportedException, Exception {
326+
String query = String.format("select percentiles(age) as per from %s/account where age > 31", TEST_INDEX);
327+
CSVResult csvResult = getCsvResult(false, query);
328+
List<String> headers = csvResult.getHeaders();
329+
Assert.assertEquals(7, headers.size());
330+
Assert.assertEquals("per.1.0", headers.get(0));
331+
Assert.assertEquals("per.5.0", headers.get(1));
332+
Assert.assertEquals("per.25.0", headers.get(2));
333+
Assert.assertEquals("per.50.0", headers.get(3));
334+
Assert.assertEquals("per.75.0", headers.get(4));
335+
Assert.assertEquals("per.95.0", headers.get(5));
336+
Assert.assertEquals("per.99.0", headers.get(6));
337+
338+
339+
List<String> lines = csvResult.getLines();
340+
Assert.assertEquals(1, lines.size());
341+
Assert.assertEquals("32.0,32.0,34.0,36.0,38.0,40.0,40.0",lines.get(0));
342+
}
343+
344+
281345

282346
/* todo: more tests:
283-
* multi_numeric extended_stats , stats , percentiles.
284347
* filter/nested and than metric
285348
* histogram
286349
* geo
287350
*/
288351

289352

290-
private CSVResult getCsvResult(boolean flat, String query) throws SqlParseException, SQLFeatureNotSupportedException, IOException {
353+
private CSVResult getCsvResult(boolean flat, String query) throws SqlParseException, SQLFeatureNotSupportedException, Exception, CsvExtractorException {
291354
SearchDao searchDao = MainTestSuite.getSearchDao();
292355
QueryAction queryAction = searchDao.explain(query);
293356
Object execution = QueryActionElasticExecutor.executeAnyAction(searchDao.getClient(), queryAction);

0 commit comments

Comments
 (0)