fix a couple of errors

zasdfgbnm · zasdfgbnm · commit 1b1c419ff731 · 2017-07-01T00:04:08.000-04:00
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
@@ -32,6 +32,7 @@
 import datetime
 import array
 import math
+import types
 
 import py4j
 try:
@@ -2277,17 +2278,18 @@ def collected(a):
             return df.collect()[0]["myarray"][0]
 
         # test whether pyspark can correctly handle string types
-        string_types = set()
+        string_types = []
         if sys.version < "4":
-            string_types += set(['u'])
+            string_types += ['u']
             self.assertEqual(collected(array.array('u', ["a"])), "a")
         if sys.version < "3":
-            string_types += set(['c'])
+            string_types += ['c']
             self.assertEqual(collected(array.array('c', ["a"])), "a")
 
         # test whether pyspark can correctly handle int types
-        int_types = set(['b', 'h', 'i', 'l'])
-        for t in int_types:
+        int_types = ['b', 'h', 'i', 'l']
+        unsigned_types = ['B', 'H', 'I']
+        for t in int_types + unsigned_types:
             # Start from 1 and keep doubling the number until overflow.
             a = array.array(t, [1])
             while True:
@@ -2296,6 +2298,7 @@ def collected(a):
                     a[0] *= 2
                 except OverflowError:
                     break
+        for t in int_types:
             # Start from -1 and keep doubling the number until overflow
             a = array.array(t, [-1])
             while True:
@@ -2306,7 +2309,7 @@ def collected(a):
                     break
 
         # test whether pyspark can correctly handle float types
-        float_types = set(['f', 'd'])
+        float_types = ['f', 'd']
         for t in float_types:
             # test upper bound and precision
             a = array.array(t, [1.0])
@@ -2321,14 +2324,13 @@ def collected(a):
                 a[0] /= 2
 
         # make sure that the test case cover all supported types
-        supported_types = int_types + float_types + string_types
-        self.assertEqual(supported_types, _array_type_mappings.keys)
+        supported_types = int_types + unsigned_types + float_types + string_types
+        self.assertEqual(supported_types, types._array_type_mappings.keys)
 
-        all_type_codes = set()
         if sys.version < "3":
-            all_type_codes += set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'f', 'd'])
+            all_type_codes = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'f', 'd'])
         else:
-            all_type_codes += set(array.typecodes)
+            all_type_codes = set(array.typecodes)
         unsupported_types = all_type_codes - supported_types
 
         # test whether pyspark can correctly handle unsupported types
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
@@ -929,25 +929,33 @@ def _parse_datatype_json_value(json_value):
     datetime.time: TimestampType,
 }
 
+if sys.version < "3":
+    _type_mappings.update({
+        unicode: StringType,
+        long: LongType,
+    })
+
 # Mapping Python array types to Spark SQL DataType
 # We should be careful here. The size of these types in python depends on
 # C implementation (See: https://docs.python.org/2/library/array.html).
 # We need to make sure that this conversion does not lose any precision. And
 # this should be considered in test cases.
 _array_type_mappings = {
     'b': ByteType,
+    'B': ShortType,
     'h': ShortType,
+    'H': IntegerType,
     'i': IntegerType,
+    'I': LongType,
     'l': LongType,
+     #'L': not supported
+     #'q': not supported
+     #'Q': not supported
     'f': FloatType,
     'd': DoubleType
 }
 
-if sys.version < "3":
-    _type_mappings.update({
-        unicode: StringType,
-        long: LongType,
-    })
+
 
 # Type code 'u' in Python's array is deprecated since version 3.3, and will be
 # removed in version 4.0. See: https://docs.python.org/3/library/array.html
@@ -956,6 +964,7 @@ def _parse_datatype_json_value(json_value):
         'u': StringType
     })
 
+# Type code 'c' are only available at python 2
 if sys.version < "3":
     _array_type_mappings.update({
         'c': StringType