Skip to content

Commit 77d7e3a

Browse files
author
James Lee
committed
add UkMarketSpaces join
1 parent 716cde3 commit 77d7e3a

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package com.sparkTutorial.sparkSql.join;
2+
3+
import org.apache.log4j.Level;
4+
import org.apache.log4j.Logger;
5+
import org.apache.spark.sql.Dataset;
6+
import org.apache.spark.sql.Row;
7+
import org.apache.spark.sql.SparkSession;
8+
9+
public class UkMarketSpaces {
10+
11+
public static void main(String[] args) throws Exception {
12+
13+
Logger.getLogger("org").setLevel(Level.ERROR);
14+
15+
SparkSession session = SparkSession.builder().appName("StackOverFlowSurvey").master("local[1]").getOrCreate();
16+
17+
Dataset<Row> marketSpace = session.read().option("header", "true").csv("in/uk-market-spaces-identifiable-data.csv");
18+
Dataset<Row> postCode = session.read().option("header", "true").csv("in/uk-postcode.csv");
19+
20+
21+
postCode.show();
22+
marketSpace.show();
23+
24+
Dataset<Row> joined = marketSpace.join(postCode, marketSpace.col("Postcode").startsWith(postCode.col("Postcode")));
25+
26+
joined.show();
27+
28+
joined.groupBy("Region").count().show(200);
29+
}
30+
}

0 commit comments

Comments
 (0)