Skip to content

Commit e3e07c3

Browse files
author
James Lee
committed
prefer column
1 parent 77d7e3a commit e3e07c3

File tree

2 files changed

+13
-13
lines changed

2 files changed

+13
-13
lines changed

src/main/java/com/sparkTutorial/sparkSql/HousePriceSolution.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33

44
import org.apache.log4j.Level;
55
import org.apache.log4j.Logger;
6-
import org.apache.spark.sql.Column;
76
import org.apache.spark.sql.Dataset;
87
import org.apache.spark.sql.Row;
98
import org.apache.spark.sql.SparkSession;
109

1110
import static org.apache.spark.sql.functions.avg;
11+
import static org.apache.spark.sql.functions.col;
1212
import static org.apache.spark.sql.functions.max;
1313

1414
public class HousePriceSolution {
@@ -23,11 +23,11 @@ public static void main(String[] args) throws Exception {
2323

2424
Dataset<Row> realEstate = session.read().option("header", "true").csv("in/RealEstate.csv");
2525

26-
Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, new Column(PRICE).cast("long")).withColumn(PRICE_SQ_FT, new Column(PRICE_SQ_FT).cast("long"));
26+
Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, col(PRICE).cast("long")).withColumn(PRICE_SQ_FT, col(PRICE_SQ_FT).cast("long"));
2727

2828
castedRealEstate.groupBy("Location")
2929
.agg(avg(PRICE_SQ_FT), max(PRICE))
30-
.orderBy(new Column("avg(" + PRICE_SQ_FT + ")").desc())
30+
.orderBy(col("avg(" + PRICE_SQ_FT + ")").desc())
3131
.show();
3232
}
3333
}

src/main/java/com/sparkTutorial/sparkSql/StackOverFlowSurvey.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22

33
import org.apache.log4j.Level;
44
import org.apache.log4j.Logger;
5-
import org.apache.spark.sql.Column;
65
import org.apache.spark.sql.Dataset;
76
import org.apache.spark.sql.Row;
87
import org.apache.spark.sql.SparkSession;
98

109
import static org.apache.spark.sql.functions.avg;
10+
import static org.apache.spark.sql.functions.col;
1111
import static org.apache.spark.sql.functions.max;
1212

1313
public class StackOverFlowSurvey {
@@ -33,33 +33,33 @@ public static void main(String[] args) throws Exception {
3333
responses.show(20);
3434

3535
System.out.println("=== Print the so_region and self_identification columns of gender table ===");
36-
responses.select(new Column("so_region"), new Column("self_identification")).show();
36+
responses.select(col("so_region"), col("self_identification")).show();
3737

3838
System.out.println("=== Print records where the response is from Afghanistan ===");
39-
responses.filter(new Column("country").equalTo("Afghanistan")).show();
39+
responses.filter(col("country").equalTo("Afghanistan")).show();
4040

4141
System.out.println("=== Print the count of occupations ===");
42-
responses.groupBy(new Column("occupation")).count().show();
42+
responses.groupBy(col("occupation")).count().show();
4343

4444

4545
System.out.println("=== Cast the salary mid point and age mid point to integer ===");
46-
Dataset<Row> castedResponse = responses.withColumn(SALARY_MIDPOINT, new Column(SALARY_MIDPOINT).cast("integer"))
47-
.withColumn(AGE_MIDPOINT, new Column(AGE_MIDPOINT).cast("integer"));
46+
Dataset<Row> castedResponse = responses.withColumn(SALARY_MIDPOINT, col(SALARY_MIDPOINT).cast("integer"))
47+
.withColumn(AGE_MIDPOINT, col(AGE_MIDPOINT).cast("integer"));
4848

4949
System.out.println("=== Print out casted schema ===");
5050
castedResponse.printSchema();
5151

5252
System.out.println("=== Print records with average mid age less than 20 ===");
53-
castedResponse.filter(new Column(AGE_MIDPOINT).$less(20)).show();
53+
castedResponse.filter(col(AGE_MIDPOINT).$less(20)).show();
5454

5555
System.out.println("=== Print the result with salary middle point in descending order ===");
56-
castedResponse.orderBy(new Column(SALARY_MIDPOINT ).desc()).show();
56+
castedResponse.orderBy(col(SALARY_MIDPOINT ).desc()).show();
5757

5858
System.out.println("=== Group by country and aggregate by average salary middle point and max age middle point ===");
5959
castedResponse.groupBy("country").agg(avg(SALARY_MIDPOINT), max(AGE_MIDPOINT)).show();
6060

6161
System.out.println("=== Group by salary bucket ===");
62-
Dataset<Row> responseWithSalaryBucket = castedResponse.withColumn(SALARY_MIDPOINT_BUCKET, new Column(SALARY_MIDPOINT).divide(20000).cast("integer").multiply(20000));
63-
responseWithSalaryBucket.groupBy(SALARY_MIDPOINT_BUCKET).count().orderBy(new Column(SALARY_MIDPOINT_BUCKET)).show();
62+
Dataset<Row> responseWithSalaryBucket = castedResponse.withColumn(SALARY_MIDPOINT_BUCKET, col(SALARY_MIDPOINT).divide(20000).cast("integer").multiply(20000));
63+
responseWithSalaryBucket.groupBy(SALARY_MIDPOINT_BUCKET).count().orderBy(col(SALARY_MIDPOINT_BUCKET)).show();
6464
}
6565
}

0 commit comments

Comments
 (0)