Skip to content

Commit

Permalink
fix SQLContext as sqlCtx
Browse files Browse the repository at this point in the history
  • Loading branch information
yennanliu committed Oct 15, 2017
1 parent 3cf6b66 commit cfed330
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions spark_/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ def filter_column():
df_train = sqlCtx.read.format('com.databricks.spark.csv')\
.options(header='true', inferschema='true')\
.load('/Users/yennanliu/NYC_Taxi_Trip_Duration/data/train.csv')
# rdd
# dataframe -> rdd
rdd_ = df_train.select('id','vendor_id','pickup_datetime').rdd
# rdd -> dataframe
df_xx = spark.createDataFrame(rdd_)
df_xx = sqlCtx.createDataFrame(rdd_)
# dataframe -> sql
df_xx.registerTempTable("df_xx_table")
spark.sql(""" #
sqlCtx.sql("""
SELECT id, count(*)
FROM df_xx_table
group by 1
Expand Down

0 comments on commit cfed330

Please sign in to comment.