1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """
19 >>> from pyspark.context import SparkContext
20 >>> sc = SparkContext('local', 'test')
21 >>> b = sc.broadcast([1, 2, 3, 4, 5])
22 >>> b.value
23 [1, 2, 3, 4, 5]
24
25 >>> from pyspark.broadcast import _broadcastRegistry
26 >>> _broadcastRegistry[b.bid] = b
27 >>> from cPickle import dumps, loads
28 >>> loads(dumps(b)).value
29 [1, 2, 3, 4, 5]
30
31 >>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
32 [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
33
34 >>> large_broadcast = sc.broadcast(list(range(10000)))
35 """
36
37 _broadcastRegistry = {}
38
39
45
46
48 """
49 A broadcast variable created with
50 L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}.
51 Access its value through C{.value}.
52 """
53
54 - def __init__(self, bid, value, java_broadcast=None, pickle_registry=None):
55 """
56 Should not be called directly by users -- use
57 L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}
58 instead.
59 """
60 self.value = value
61 self.bid = bid
62 self._jbroadcast = java_broadcast
63 self._pickle_registry = pickle_registry
64
66 self._pickle_registry.add(self)
67 return (_from_id, (self.bid, ))
68