File tree Expand file tree Collapse file tree 1 file changed +27
-0
lines changed
Expand file tree Collapse file tree 1 file changed +27
-0
lines changed Original file line number Diff line number Diff line change @@ -619,6 +619,11 @@ def uncacheTable(tableName):
619619 self ._ssql_ctx .uncacheTable (tableName )
620620
621621class HiveContext (SQLContext ):
622+ """
623+ An instance of the Spark SQL execution engine that integrates with data stored in Hive.
624+ Configuration for Hive is read from hive-site.xml on the classpath. It supports running both SQL
625+ and HiveQL commands.
626+ """
622627
623628 @property
624629 def _ssql_ctx (self ):
@@ -646,6 +651,28 @@ def hql(self, hqlQuery):
646651 return self .hiveql (hqlQuery )
647652
648653class LocalHiveContext (HiveContext ):
654+ """
655+ Starts up an instance of hive where metadata is stored locally. An in-process metadata data is
656+ created with data stored in ./metadata. Warehouse data is stored in in ./warehouse.
657+
658+ >>> import os
659+ >>> from pyspark.context import LocalHiveContext
660+ >>> hiveCtx = LocalHiveContext(sc)
661+ >>> try:
662+ ... supress = hiveCtx.hql("DROP TABLE src")
663+ ... except Exception:
664+ ... pass
665+ >>> kv1 = os.path.join(os.environ["SPARK_HOME"], 'examples/src/main/resources/kv1.txt')
666+ >>> supress = hiveCtx.hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
667+ >>> supress = hiveCtx.hql("LOAD DATA LOCAL INPATH '%s' INTO TABLE src" % kv1)
668+ >>> results = hiveCtx.hql("FROM src SELECT value").map(lambda r: int(r.value.split('_')[1]))
669+ >>> num = results.count()
670+ >>> reduce_sum = results.reduce(lambda x, y: x + y)
671+ >>> num
672+ 500
673+ >>> reduce_sum
674+ 130091
675+ """
649676
650677 def _get_hive_ctx (self ):
651678 return self ._jvm .LocalHiveContext (self ._jsc .sc ())
You can’t perform that action at this time.
0 commit comments