prefer column

James Lee · James Lee · commit e3e07c328277 · 2017-02-12T18:19:59.000Z
diff --git a/src/main/java/com/sparkTutorial/sparkSql/HousePriceSolution.java b/src/main/java/com/sparkTutorial/sparkSql/HousePriceSolution.java
@@ -3,12 +3,12 @@
 
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
-import org.apache.spark.sql.Column;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 
 import static org.apache.spark.sql.functions.avg;
+import static org.apache.spark.sql.functions.col;
 import static org.apache.spark.sql.functions.max;
 
 public class HousePriceSolution {
@@ -23,11 +23,11 @@ public static void main(String[] args) throws Exception {
 
         Dataset<Row> realEstate = session.read().option("header", "true").csv("in/RealEstate.csv");
 
-        Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, new Column(PRICE).cast("long")).withColumn(PRICE_SQ_FT, new Column(PRICE_SQ_FT).cast("long"));
+        Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, col(PRICE).cast("long")).withColumn(PRICE_SQ_FT, col(PRICE_SQ_FT).cast("long"));
 
         castedRealEstate.groupBy("Location")
                         .agg(avg(PRICE_SQ_FT), max(PRICE))
-                        .orderBy(new Column("avg(" + PRICE_SQ_FT + ")").desc())
+                        .orderBy(col("avg(" + PRICE_SQ_FT + ")").desc())
                         .show();
     }
 }
diff --git a/src/main/java/com/sparkTutorial/sparkSql/StackOverFlowSurvey.java b/src/main/java/com/sparkTutorial/sparkSql/StackOverFlowSurvey.java
@@ -2,12 +2,12 @@
 
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
-import org.apache.spark.sql.Column;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 
 import static org.apache.spark.sql.functions.avg;
+import static org.apache.spark.sql.functions.col;
 import static org.apache.spark.sql.functions.max;
 
 public class StackOverFlowSurvey {
@@ -33,33 +33,33 @@ public static void main(String[] args) throws Exception {
         responses.show(20);
 
         System.out.println("=== Print the so_region and self_identification columns of gender table ===");
-        responses.select(new Column("so_region"), new Column("self_identification")).show();
+        responses.select(col("so_region"),  col("self_identification")).show();
 
         System.out.println("=== Print records where the response is from Afghanistan ===");
-        responses.filter(new Column("country").equalTo("Afghanistan")).show();
+        responses.filter(col("country").equalTo("Afghanistan")).show();
 
         System.out.println("=== Print the count of occupations ===");
-        responses.groupBy(new Column("occupation")).count().show();
+        responses.groupBy(col("occupation")).count().show();
 
 
         System.out.println("=== Cast the salary mid point and age mid point to integer ===");
-        Dataset<Row> castedResponse = responses.withColumn(SALARY_MIDPOINT, new Column(SALARY_MIDPOINT).cast("integer"))
-                                               .withColumn(AGE_MIDPOINT, new Column(AGE_MIDPOINT).cast("integer"));
+        Dataset<Row> castedResponse = responses.withColumn(SALARY_MIDPOINT, col(SALARY_MIDPOINT).cast("integer"))
+                                               .withColumn(AGE_MIDPOINT, col(AGE_MIDPOINT).cast("integer"));
 
         System.out.println("=== Print out casted schema ===");
         castedResponse.printSchema();
 
         System.out.println("=== Print records with average mid age less than 20 ===");
-        castedResponse.filter(new Column(AGE_MIDPOINT).$less(20)).show();
+        castedResponse.filter(col(AGE_MIDPOINT).$less(20)).show();
 
         System.out.println("=== Print the result with salary middle point in descending order ===");
-        castedResponse.orderBy(new Column(SALARY_MIDPOINT ).desc()).show();
+        castedResponse.orderBy(col(SALARY_MIDPOINT ).desc()).show();
 
         System.out.println("=== Group by country and aggregate by average salary middle point and max age middle point ===");
         castedResponse.groupBy("country").agg(avg(SALARY_MIDPOINT), max(AGE_MIDPOINT)).show();
 
         System.out.println("=== Group by salary bucket ===");
-        Dataset<Row> responseWithSalaryBucket = castedResponse.withColumn(SALARY_MIDPOINT_BUCKET, new Column(SALARY_MIDPOINT).divide(20000).cast("integer").multiply(20000));
-        responseWithSalaryBucket.groupBy(SALARY_MIDPOINT_BUCKET).count().orderBy(new Column(SALARY_MIDPOINT_BUCKET)).show();
+        Dataset<Row> responseWithSalaryBucket = castedResponse.withColumn(SALARY_MIDPOINT_BUCKET, col(SALARY_MIDPOINT).divide(20000).cast("integer").multiply(20000));
+        responseWithSalaryBucket.groupBy(SALARY_MIDPOINT_BUCKET).count().orderBy(col(SALARY_MIDPOINT_BUCKET)).show();
     }
 }