improve StackOverFlowSurvey

James Lee · James Lee · commit 1ce27415435e · 2017-02-18T15:30:03.000Z
diff --git a/src/main/java/com/sparkTutorial/sparkSql/StackOverFlowSurvey.java b/src/main/java/com/sparkTutorial/sparkSql/StackOverFlowSurvey.java
@@ -53,12 +53,17 @@ public static void main(String[] args) throws Exception {
         castedResponse.orderBy(col(SALARY_MIDPOINT ).desc()).show();
 
         System.out.println("=== Group by country and aggregate by average salary middle point and max age middle point ===");
-        castedResponse.groupBy("country").agg(avg(SALARY_MIDPOINT), max(AGE_MIDPOINT)).show();
+        RelationalGroupedDataset datasetGroupByCountry = castedResponse.groupBy("country");
+        datasetGroupByCountry.agg(avg(SALARY_MIDPOINT), max(AGE_MIDPOINT)).show();
+
 
-        System.out.println("=== Group by salary bucket ===");
         Dataset<Row> responseWithSalaryBucket = castedResponse.withColumn(SALARY_MIDPOINT_BUCKET, col(SALARY_MIDPOINT).divide(20000).cast("integer").multiply(20000));
-        responseWithSalaryBucket.groupBy(SALARY_MIDPOINT_BUCKET).count().orderBy(col(SALARY_MIDPOINT_BUCKET)).show();
 
+        System.out.println("=== With salary bucket column ===");
+        responseWithSalaryBucket.select(col(SALARY_MIDPOINT), col(SALARY_MIDPOINT_BUCKET)).show();
+
+        System.out.println("=== Group by salary bucket ===");
+        responseWithSalaryBucket.groupBy(SALARY_MIDPOINT_BUCKET).count().orderBy(col(SALARY_MIDPOINT_BUCKET)).show();
 
         session.stop();
     }