Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b2fb76c

Browse files
committed
NLPchina#114 csv - aggregation support for singleNumerics (sum,avg,count etc) and buckets (terms,date_histogram etc)
1 parent b83e233 commit b2fb76c

File tree

3 files changed

+299
-21
lines changed

3 files changed

+299
-21
lines changed

src/main/java/org/elasticsearch/plugin/nlpcn/executors/CSVResultRestExecutor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public void execute(Client client, Map<String, String> params, QueryAction query
2626
if(params.containsKey("separator")){
2727
separator = params.get("separator");
2828
}
29-
CSVResult result = CSVResultsExtractor.extractResults(queryResult,flat,separator);
29+
CSVResult result = new CSVResultsExtractor().extractResults(queryResult,flat,separator);
3030
String newLine = "\n";
3131
if(params.containsKey("newLine")){
3232
newLine = params.get("newLine");

src/main/java/org/elasticsearch/plugin/nlpcn/executors/CSVResultsExtractor.java

Lines changed: 164 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,187 @@
11
package org.elasticsearch.plugin.nlpcn.executors;
22

3+
import com.sun.org.apache.xpath.internal.operations.Mult;
4+
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
5+
import org.elasticsearch.common.base.Joiner;
36
import org.elasticsearch.search.SearchHit;
47
import org.elasticsearch.search.SearchHits;
8+
import org.elasticsearch.search.aggregations.Aggregation;
9+
import org.elasticsearch.search.aggregations.AggregationBuilder;
10+
import org.elasticsearch.search.aggregations.Aggregations;
11+
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
12+
import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregation;
13+
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGrid;
14+
import org.elasticsearch.search.aggregations.metrics.MetricsAggregator;
15+
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
16+
import org.elasticsearch.search.aggregations.metrics.scripted.ScriptedMetric;
17+
import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStats;
518

619
import java.util.*;
720

821
/**
922
* Created by Eliran on 27/12/2015.
1023
*/
1124
public class CSVResultsExtractor {
12-
public static CSVResult extractResults(Object queryResult, boolean flat, String separator) {
25+
private int currentLineIndex;
26+
27+
public CSVResultsExtractor() {
28+
this.currentLineIndex = 0;
29+
}
30+
31+
public CSVResult extractResults(Object queryResult, boolean flat, String separator) {
1332
if(queryResult instanceof SearchHits){
1433
SearchHit[] hits = ((SearchHits) queryResult).getHits();
15-
Set<String> csvHeaders = new HashSet<>();
1634
List<Map<String,Object>> docsAsMap = new ArrayList<>();
17-
for(SearchHit hit : hits){
18-
Map<String, Object> doc = hit.sourceAsMap();
19-
mergeHeaders(csvHeaders,doc,flat);
20-
docsAsMap.add(doc);
21-
}
22-
List<String> headers = new ArrayList<>(csvHeaders);
35+
List<String> headers = createHeadersAndFillDocsMap(flat, hits, docsAsMap);
36+
List<String> csvLines = createCSVLinesFromDocs(flat, separator, docsAsMap, headers);
37+
return new CSVResult(headers,csvLines);
38+
}
39+
if(queryResult instanceof Aggregations){
40+
List<String> headers = new ArrayList<>();
41+
List<List<String>> lines = new ArrayList<>();
42+
lines.add(new ArrayList<String>());
43+
handleAggregations((Aggregations) queryResult, headers, lines);
2344

24-
List<String> csvLines = new ArrayList<>();
25-
for(Map<String,Object> doc : docsAsMap){
26-
String line = "";
27-
for(String header : headers){
28-
line += findFieldValue(header, doc, flat, separator);
29-
}
30-
csvLines.add(line.substring(0, line.length() - 1));
45+
List<String> csvLines = new ArrayList<>();
46+
for(List<String> simpleLine : lines){
47+
csvLines.add(Joiner.on(separator).join(simpleLine));
3148
}
3249

50+
//todo: need to handle more options for aggregations:
51+
//NumericMetricsAggregation.Multi : ExtendedStats,Stats,Percentiles
52+
//Aggregations that inhrit from base
53+
//ScriptedMetric
54+
//TopHits
55+
//GeoBounds
56+
3357
return new CSVResult(headers,csvLines);
58+
3459
}
3560
return null;
3661
}
3762

38-
private static String findFieldValue(String header, Map<String, Object> doc, boolean flat, String separator) {
63+
private void handleAggregations(Aggregations aggregations, List<String> headers, List<List<String>> lines) {
64+
if(allNumericAggregations(aggregations)){
65+
lines.get(this.currentLineIndex).addAll(fillHeaderAndCreateLineForNumericAggregations(aggregations, headers));
66+
return;
67+
}
68+
//aggregations with size one only supported when not metrics.
69+
List<Aggregation> aggregationList = aggregations.asList();
70+
if(aggregationList.size() > 1){
71+
//todo: throw exception
72+
}
73+
Aggregation aggregation = aggregationList.get(0);
74+
//we want to skip singleBucketAggregations (nested,reverse_nested,filters)
75+
if(aggregation instanceof SingleBucketAggregation){
76+
Aggregations singleBucketAggs = ((SingleBucketAggregation) aggregation).getAggregations();
77+
handleAggregations(singleBucketAggs,headers,lines);
78+
return;
79+
}
80+
if(aggregation instanceof NumericMetricsAggregation){
81+
handleNumericMetricAggregation(headers,lines.get(currentLineIndex),aggregation);
82+
return;
83+
}
84+
if(aggregation instanceof MultiBucketsAggregation){
85+
MultiBucketsAggregation bucketsAggregation = (MultiBucketsAggregation) aggregation;
86+
String name = bucketsAggregation.getName();
87+
//checking because it can comes from sub aggregation again
88+
if(!headers.contains(name)){
89+
headers.add(name);
90+
}
91+
Collection<? extends MultiBucketsAggregation.Bucket> buckets = bucketsAggregation.getBuckets();
92+
93+
//clone current line.
94+
List<String> currentLine = lines.get(this.currentLineIndex);
95+
List<String> clonedLine = new ArrayList<>(currentLine);
96+
97+
//call handle_Agg with current_line++
98+
boolean firstLine = true;
99+
for (MultiBucketsAggregation.Bucket bucket : buckets) {
100+
//each bucket need to add new line with current line copied => except for first line
101+
String key = bucket.getKeyAsText().string();
102+
if(firstLine){
103+
firstLine = false;
104+
}
105+
else {
106+
currentLineIndex++;
107+
currentLine = new ArrayList<String>(clonedLine);
108+
lines.add(currentLine);
109+
}
110+
currentLine.add(key);
111+
handleAggregations(bucket.getAggregations(),headers,lines);
112+
113+
}
114+
}
115+
116+
}
117+
118+
private List<String> fillHeaderAndCreateLineForNumericAggregations(Aggregations aggregations, List<String> header) {
119+
List<String> line = new ArrayList<>();
120+
List<Aggregation> aggregationList = aggregations.asList();
121+
for(Aggregation aggregation : aggregationList){
122+
handleNumericMetricAggregation(header, line, aggregation);
123+
}
124+
return line;
125+
}
126+
127+
private void handleNumericMetricAggregation(List<String> header, List<String> line, Aggregation aggregation) {
128+
String name = aggregation.getName();
129+
if(!header.contains(name)){
130+
header.add(aggregation.getName());
131+
}
132+
if(aggregation instanceof NumericMetricsAggregation.SingleValue){
133+
line.add(((NumericMetricsAggregation.SingleValue) aggregation).getValueAsString());
134+
}
135+
//todo:Numeric MultiValue - Stats,ExtendedStats,Percentile...
136+
else {
137+
138+
}
139+
}
140+
141+
private boolean allNumericAggregations(Aggregations aggregations) {
142+
List<Aggregation> aggregationList = aggregations.asList();
143+
for(Aggregation aggregation : aggregationList){
144+
if(!(aggregation instanceof NumericMetricsAggregation)){
145+
return false;
146+
}
147+
}
148+
return true;
149+
}
150+
151+
private Aggregation skipAggregations(Aggregation firstAggregation) {
152+
while(firstAggregation instanceof SingleBucketAggregation){
153+
firstAggregation = getFirstAggregation(((SingleBucketAggregation) firstAggregation).getAggregations());
154+
}
155+
return firstAggregation;
156+
}
157+
158+
private Aggregation getFirstAggregation(Aggregations aggregations){
159+
return aggregations.asList().get(0);
160+
}
161+
162+
private List<String> createCSVLinesFromDocs(boolean flat, String separator, List<Map<String, Object>> docsAsMap, List<String> headers) {
163+
List<String> csvLines = new ArrayList<>();
164+
for(Map<String,Object> doc : docsAsMap){
165+
String line = "";
166+
for(String header : headers){
167+
line += findFieldValue(header, doc, flat, separator);
168+
}
169+
csvLines.add(line.substring(0, line.length() - 1));
170+
}
171+
return csvLines;
172+
}
173+
174+
private List<String> createHeadersAndFillDocsMap(boolean flat, SearchHit[] hits, List<Map<String, Object>> docsAsMap) {
175+
Set<String> csvHeaders = new HashSet<>();
176+
for(SearchHit hit : hits){
177+
Map<String, Object> doc = hit.sourceAsMap();
178+
mergeHeaders(csvHeaders,doc,flat);
179+
docsAsMap.add(doc);
180+
}
181+
return new ArrayList<>(csvHeaders);
182+
}
183+
184+
private String findFieldValue(String header, Map<String, Object> doc, boolean flat, String separator) {
39185
if(flat && header.contains(".")){
40186
String[] split = header.split("\\.");
41187
Object innerDoc = doc;
@@ -59,15 +205,15 @@ private static String findFieldValue(String header, Map<String, Object> doc, boo
59205
return separator;
60206
}
61207

62-
private static void mergeHeaders(Set<String> headers, Map<String, Object> doc, boolean flat) {
208+
private void mergeHeaders(Set<String> headers, Map<String, Object> doc, boolean flat) {
63209
if (!flat) {
64210
headers.addAll(doc.keySet());
65211
return;
66212
}
67213
mergeFieldNamesRecursive(headers, doc, "");
68214
}
69215

70-
private static void mergeFieldNamesRecursive(Set<String> headers, Map<String, Object> doc, String prefix) {
216+
private void mergeFieldNamesRecursive(Set<String> headers, Map<String, Object> doc, String prefix) {
71217
for(Map.Entry<String,Object> field : doc.entrySet()){
72218
Object value = field.getValue();
73219
if(value instanceof Map){

src/test/java/org/nlpcn/es4sql/CSVResultsExtractorTests.java

Lines changed: 134 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
import org.elasticsearch.plugin.nlpcn.executors.CSVResult;
66
import org.elasticsearch.plugin.nlpcn.executors.CSVResultsExtractor;
77
import org.elasticsearch.search.SearchHits;
8+
import org.elasticsearch.search.aggregations.Aggregations;
9+
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
10+
import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCount;
811
import org.junit.Assert;
912
import org.junit.Test;
1013
import org.nlpcn.es4sql.exception.SqlParseException;
@@ -149,11 +152,140 @@ public void joinSearchResultNotNestedNotFlatNoAggs() throws SqlParseException, S
149152
);
150153

151154
}
155+
156+
@Test
157+
public void simpleNumericValueAgg() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
158+
String query = String.format("select count(*) from %s/dog ",TEST_INDEX);
159+
CSVResult csvResult = getCsvResult(false, query);
160+
161+
List<String> headers = csvResult.getHeaders();
162+
Assert.assertEquals(1, headers.size());
163+
Assert.assertEquals("COUNT(*)", headers.get(0));
164+
165+
166+
List<String> lines = csvResult.getLines();
167+
Assert.assertEquals(1, lines.size());
168+
Assert.assertEquals("2.0", lines.get(0));
169+
170+
}
171+
@Test
172+
public void simpleNumericValueAggWithAlias() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
173+
String query = String.format("select avg(age) as myAlias from %s/dog ",TEST_INDEX);
174+
CSVResult csvResult = getCsvResult(false, query);
175+
176+
List<String> headers = csvResult.getHeaders();
177+
Assert.assertEquals(1, headers.size());
178+
Assert.assertEquals("myAlias", headers.get(0));
179+
180+
181+
List<String> lines = csvResult.getLines();
182+
Assert.assertEquals(1, lines.size());
183+
Assert.assertEquals("3.0", lines.get(0));
184+
185+
}
186+
187+
@Test
188+
public void twoNumericAggWithAlias() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
189+
String query = String.format("select count(*) as count, avg(age) as myAlias from %s/dog ",TEST_INDEX);
190+
CSVResult csvResult = getCsvResult(false, query);
191+
192+
List<String> headers = csvResult.getHeaders();
193+
Assert.assertEquals(2, headers.size());
194+
Assert.assertEquals("count", headers.get(0));
195+
Assert.assertEquals("myAlias", headers.get(1));
196+
197+
198+
List<String> lines = csvResult.getLines();
199+
Assert.assertEquals(1, lines.size());
200+
Assert.assertEquals("2.0,3.0", lines.get(0));
201+
202+
}
203+
204+
@Test
205+
public void aggAfterTermsGroupBy() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
206+
String query = String.format("SELECT COUNT(*) FROM %s/account GROUP BY gender",TEST_INDEX);
207+
CSVResult csvResult = getCsvResult(false, query);
208+
List<String> headers = csvResult.getHeaders();
209+
Assert.assertEquals(2, headers.size());
210+
Assert.assertEquals("gender", headers.get(0));
211+
Assert.assertEquals("COUNT(*)", headers.get(1));
212+
213+
List<String> lines = csvResult.getLines();
214+
Assert.assertEquals(2, lines.size());
215+
Assert.assertTrue("m,507.0", lines.contains("m,507.0"));
216+
Assert.assertTrue("f,493.0", lines.contains("f,493.0"));
217+
218+
}
219+
@Test
220+
public void aggAfterTwoTermsGroupBy() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
221+
String query = String.format("SELECT COUNT(*) FROM %s/account where age in (35,36) GROUP BY gender,age",TEST_INDEX);
222+
CSVResult csvResult = getCsvResult(false, query);
223+
List<String> headers = csvResult.getHeaders();
224+
Assert.assertEquals(3, headers.size());
225+
Assert.assertEquals("gender", headers.get(0));
226+
Assert.assertEquals("age", headers.get(1));
227+
Assert.assertEquals("COUNT(*)", headers.get(2));
228+
229+
List<String> lines = csvResult.getLines();
230+
Assert.assertEquals(4, lines.size());
231+
Assert.assertTrue("m,36,31.0", lines.contains("m,36,31.0"));
232+
Assert.assertTrue("m,35,28.0", lines.contains("m,36,31.0"));
233+
Assert.assertTrue("f,36,21.0", lines.contains("f,36,21.0"));
234+
Assert.assertTrue("f,35,24.0", lines.contains("f,35,24.0"));
235+
236+
}
237+
@Test
238+
public void multipleAggAfterTwoTermsGroupBy() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
239+
String query = String.format("SELECT COUNT(*) , sum(balance) FROM %s/account where age in (35,36) GROUP BY gender,age",TEST_INDEX);
240+
CSVResult csvResult = getCsvResult(false, query);
241+
List<String> headers = csvResult.getHeaders();
242+
Assert.assertEquals(4, headers.size());
243+
Assert.assertEquals("gender", headers.get(0));
244+
Assert.assertEquals("age", headers.get(1));
245+
Assert.assertEquals("COUNT(*)", headers.get(2));
246+
Assert.assertEquals("SUM(balance)", headers.get(3));
247+
248+
List<String> lines = csvResult.getLines();
249+
Assert.assertEquals(4, lines.size());
250+
Assert.assertTrue("m,36,31.0,647425.0", lines.contains("m,36,31.0,647425.0"));
251+
Assert.assertTrue("m,35,28.0,678337.0", lines.contains("m,35,28.0,678337.0"));
252+
Assert.assertTrue("f,36,21.0,505660.0", lines.contains("f,36,21.0,505660.0"));
253+
Assert.assertTrue("f,35,24.0,472771.0", lines.contains("f,35,24.0,472771.0"));
254+
255+
}
256+
257+
@Test
258+
public void dateHistogramTest() throws SqlParseException, SQLFeatureNotSupportedException, IOException {
259+
String query = String.format("select count(*) from %s/online" +
260+
" group by date_histogram('field'='insert_time','interval'='4d','alias'='days')",TEST_INDEX);
261+
CSVResult csvResult = getCsvResult(false, query);
262+
List<String> headers = csvResult.getHeaders();
263+
Assert.assertEquals(2, headers.size());
264+
Assert.assertEquals("days", headers.get(0));
265+
Assert.assertEquals("COUNT(*)", headers.get(1));
266+
267+
List<String> lines = csvResult.getLines();
268+
Assert.assertEquals(3, lines.size());
269+
Assert.assertTrue("2014-08-14 00:00:00,477.0", lines.contains("2014-08-14 00:00:00,477.0"));
270+
Assert.assertTrue("2014-08-18 00:00:00,5664.0", lines.contains("2014-08-18 00:00:00,5664.0"));
271+
Assert.assertTrue("2014-08-22 00:00:00,3795.0", lines.contains("2014-08-22 00:00:00,3795.0"));
272+
273+
}
274+
275+
276+
/* todo: more tests:
277+
* multi_numeric extended_stats , stats , percentiles.
278+
* filter/nested and than metric
279+
* histogram
280+
* geo
281+
*/
282+
283+
152284
private CSVResult getCsvResult(boolean flat, String query) throws SqlParseException, SQLFeatureNotSupportedException, IOException {
153285
SearchDao searchDao = MainTestSuite.getSearchDao();
154286
QueryAction queryAction = searchDao.explain(query);
155-
SearchHits searchHits = (SearchHits) QueryActionElasticExecutor.executeAnyAction(searchDao.getClient(), queryAction);
156-
return CSVResultsExtractor.extractResults(searchHits, flat, ",");
287+
Object execution = QueryActionElasticExecutor.executeAnyAction(searchDao.getClient(), queryAction);
288+
return new CSVResultsExtractor().extractResults(execution, flat, ",");
157289
}
158290

159291

0 commit comments

Comments
 (0)