Skip to content

Commit ded03e7

Browse files
committed
Added doc test for HiveContext
1 parent 22de1d4 commit ded03e7

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

python/pyspark/context.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,11 @@ def uncacheTable(tableName):
619619
self._ssql_ctx.uncacheTable(tableName)
620620

621621
class HiveContext(SQLContext):
622+
"""
623+
An instance of the Spark SQL execution engine that integrates with data stored in Hive.
624+
Configuration for Hive is read from hive-site.xml on the classpath. It supports running both SQL
625+
and HiveQL commands.
626+
"""
622627

623628
@property
624629
def _ssql_ctx(self):
@@ -646,6 +651,28 @@ def hql(self, hqlQuery):
646651
return self.hiveql(hqlQuery)
647652

648653
class LocalHiveContext(HiveContext):
654+
"""
655+
Starts up an instance of hive where metadata is stored locally. An in-process metadata data is
656+
created with data stored in ./metadata. Warehouse data is stored in in ./warehouse.
657+
658+
>>> import os
659+
>>> from pyspark.context import LocalHiveContext
660+
>>> hiveCtx = LocalHiveContext(sc)
661+
>>> try:
662+
... supress = hiveCtx.hql("DROP TABLE src")
663+
... except Exception:
664+
... pass
665+
>>> kv1 = os.path.join(os.environ["SPARK_HOME"], 'examples/src/main/resources/kv1.txt')
666+
>>> supress = hiveCtx.hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
667+
>>> supress = hiveCtx.hql("LOAD DATA LOCAL INPATH '%s' INTO TABLE src" % kv1)
668+
>>> results = hiveCtx.hql("FROM src SELECT value").map(lambda r: int(r.value.split('_')[1]))
669+
>>> num = results.count()
670+
>>> reduce_sum = results.reduce(lambda x, y: x + y)
671+
>>> num
672+
500
673+
>>> reduce_sum
674+
130091
675+
"""
649676

650677
def _get_hive_ctx(self):
651678
return self._jvm.LocalHiveContext(self._jsc.sc())

0 commit comments

Comments
 (0)