add salary bucket

James Lee · James Lee · commit 1c4966f72b68 · 2017-02-12T17:22:42.000Z
diff --git a/src/main/java/com/sparkTutorial/sparkSql/StackOverFlowSurvey.java b/src/main/java/com/sparkTutorial/sparkSql/StackOverFlowSurvey.java
@@ -14,6 +14,7 @@ public class StackOverFlowSurvey {
 
     private static final String AGE_MIDPOINT = "age_midpoint";
     private static final String SALARY_MIDPOINT = "salary_midpoint";
+    public static final String SALARY_MIDPOINT_BUCKET = "salary_midpoint_bucket";
 
     public static void main(String[] args) throws Exception {
 
@@ -57,5 +58,8 @@ public static void main(String[] args) throws Exception {
         System.out.println("=== Group by country and aggregate by average salary middle point and max age middle point ===");
         castedResponse.groupBy("country").agg(avg(SALARY_MIDPOINT), max(AGE_MIDPOINT)).show();
 
+        System.out.println("=== Group by salary bucket ===");
+        Dataset<Row> responseWithSalaryBucket = castedResponse.withColumn(SALARY_MIDPOINT_BUCKET, new Column(SALARY_MIDPOINT).divide(20000).cast("integer").multiply(20000));
+        responseWithSalaryBucket.groupBy(SALARY_MIDPOINT_BUCKET).count().orderBy(new Column(SALARY_MIDPOINT_BUCKET)).show();
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,7 @@ public class StackOverFlowSurvey {`
`14`	`14`
`15`	`15`	`private static final String AGE_MIDPOINT = "age_midpoint";`
`16`	`16`	`private static final String SALARY_MIDPOINT = "salary_midpoint";`
	`17`	`+ public static final String SALARY_MIDPOINT_BUCKET = "salary_midpoint_bucket";`
`17`	`18`
`18`	`19`	`public static void main(String[] args) throws Exception {`
`19`	`20`
`@@ -57,5 +58,8 @@ public static void main(String[] args) throws Exception {`
`57`	`58`	`System.out.println("=== Group by country and aggregate by average salary middle point and max age middle point ===");`
`58`	`59`	`castedResponse.groupBy("country").agg(avg(SALARY_MIDPOINT), max(AGE_MIDPOINT)).show();`
`59`	`60`
	`61`	`+ System.out.println("=== Group by salary bucket ===");`
	`62`	`+ Dataset<Row> responseWithSalaryBucket = castedResponse.withColumn(SALARY_MIDPOINT_BUCKET, new Column(SALARY_MIDPOINT).divide(20000).cast("integer").multiply(20000));`
	`63`	`+ responseWithSalaryBucket.groupBy(SALARY_MIDPOINT_BUCKET).count().orderBy(new Column(SALARY_MIDPOINT_BUCKET)).show();`
`60`	`64`	`}`
`61`	`65`	`}`