第46课:Spark中的新解析引擎Catalyst源码初探
spark 1.6.0 与spark 2.0.2 中sql的版本迭代更新, 刚看一个开头,SQLContext 就不同了。sparksession闪亮登场
但以后的学习仍基于spark 1.6.0 学习。
spark 1.6.0
/** * The entry point for working with structured data (rows and columns) in Spark. Allows the * creation of [[DataFrame]] objects as well as the execution of SQL queries. * * @groupname basic Basic Operations * @groupname ddl_ops Persistent Catalog DDL * @groupname cachemgmt Cached Table Management * @groupname genericdata Generic Data Sources * @groupname specificdata Specific Data Sources * @groupname config Configuration * @groupname dataframes Custom DataFrame Creation * @groupname Ungrouped Support functions for language integrated queries * * @since 1.0.0 */ class SQLContext private[sql]( @transient val sparkContext: SparkContext, @transient protected[sql] val cacheManager: CacheManager, @transient private[sql] val listener: SQLListener, val isRootContext: Boolean) extends org.apache.spark.Logging with Serializable {
spark 2.0.2
/** * The entry point for working with structured data (rows and columns) in Spark 1.x. * * As of Spark 2.0, this is replaced by [[SparkSession]]. However, we are keeping the class * here for backward compatibility. * * @groupname basic Basic Operations * @groupname ddl_ops Persistent Catalog DDL * @groupname cachemgmt Cached Table Management * @groupname genericdata Generic Data Sources * @groupname specificdata Specific Data Sources * @groupname config Configuration * @groupname dataframes Custom DataFrame Creation * @groupname dataset Custom Dataset Creation * @groupname Ungrouped Support functions for language integrated queries * @since 1.0.0 */ class SQLContext private[sql](val sparkSession: SparkSession) extends Logging with Serializable {