1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """
19 >>> from pyspark.conf import SparkConf
20 >>> from pyspark.context import SparkContext
21 >>> conf = SparkConf()
22 >>> conf.setMaster("local").setAppName("My app")
23 <pyspark.conf.SparkConf object at ...>
24 >>> conf.get("spark.master")
25 u'local'
26 >>> conf.get("spark.app.name")
27 u'My app'
28 >>> sc = SparkContext(conf=conf)
29 >>> sc.master
30 u'local'
31 >>> sc.appName
32 u'My app'
33 >>> sc.sparkHome == None
34 True
35
36 >>> conf = SparkConf(loadDefaults=False)
37 >>> conf.setSparkHome("/path")
38 <pyspark.conf.SparkConf object at ...>
39 >>> conf.get("spark.home")
40 u'/path'
41 >>> conf.setExecutorEnv("VAR1", "value1")
42 <pyspark.conf.SparkConf object at ...>
43 >>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")])
44 <pyspark.conf.SparkConf object at ...>
45 >>> conf.get("spark.executorEnv.VAR1")
46 u'value1'
47 >>> print conf.toDebugString()
48 spark.executorEnv.VAR1=value1
49 spark.executorEnv.VAR3=value3
50 spark.executorEnv.VAR4=value4
51 spark.home=/path
52 >>> sorted(conf.getAll(), key=lambda p: p[0])
53 [(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), (u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')]
54 """
55
56
58 """
59 Configuration for a Spark application. Used to set various Spark
60 parameters as key-value pairs.
61
62 Most of the time, you would create a SparkConf object with
63 C{SparkConf()}, which will load values from C{spark.*} Java system
64 properties as well. In this case, any parameters you set directly on
65 the C{SparkConf} object take priority over system properties.
66
67 For unit tests, you can also call C{SparkConf(false)} to skip
68 loading external settings and get the same configuration no matter
69 what the system properties are.
70
71 All setter methods in this class support chaining. For example,
72 you can write C{conf.setMaster("local").setAppName("My app")}.
73
74 Note that once a SparkConf object is passed to Spark, it is cloned
75 and can no longer be modified by the user.
76 """
77
78 - def __init__(self, loadDefaults=True, _jvm=None, _jconf=None):
79 """
80 Create a new Spark configuration.
81
82 @param loadDefaults: whether to load values from Java system
83 properties (True by default)
84 @param _jvm: internal parameter used to pass a handle to the
85 Java VM; does not need to be set by users
86 @param _jconf: Optionally pass in an existing SparkConf handle
87 to use its parameters
88 """
89 if _jconf:
90 self._jconf = _jconf
91 else:
92 from pyspark.context import SparkContext
93 SparkContext._ensure_initialized()
94 _jvm = _jvm or SparkContext._jvm
95 self._jconf = _jvm.SparkConf(loadDefaults)
96
97 - def set(self, key, value):
98 """Set a configuration property."""
99 self._jconf.set(key, unicode(value))
100 return self
101
103 """Set master URL to connect to."""
104 self._jconf.setMaster(value)
105 return self
106
108 """Set application name."""
109 self._jconf.setAppName(value)
110 return self
111
113 """Set path where Spark is installed on worker nodes."""
114 self._jconf.setSparkHome(value)
115 return self
116
118 """Set an environment variable to be passed to executors."""
119 if (key != None and pairs != None) or (key == None and pairs == None):
120 raise Exception("Either pass one key-value pair or a list of pairs")
121 elif key != None:
122 self._jconf.setExecutorEnv(key, value)
123 elif pairs != None:
124 for (k, v) in pairs:
125 self._jconf.setExecutorEnv(k, v)
126 return self
127
129 """
130 Set multiple parameters, passed as a list of key-value pairs.
131
132 @param pairs: list of key-value pairs to set
133 """
134 for (k, v) in pairs:
135 self._jconf.set(k, v)
136 return self
137
138 - def get(self, key, defaultValue=None):
139 """Get the configured value for some key, or return a default otherwise."""
140 if defaultValue == None:
141 if not self._jconf.contains(key):
142 return None
143 return self._jconf.get(key)
144 else:
145 return self._jconf.get(key, defaultValue)
146
148 """Get all values as a list of key-value pairs."""
149 pairs = []
150 for elem in self._jconf.getAll():
151 pairs.append((elem._1(), elem._2()))
152 return pairs
153
155 """Does this configuration contain a given key?"""
156 return self._jconf.contains(key)
157
159 """
160 Returns a printable version of the configuration, as a list of
161 key=value pairs, one per line.
162 """
163 return self._jconf.toDebugString()
164
165
167 import doctest
168 (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
169 if failure_count:
170 exit(-1)
171
172
173 if __name__ == "__main__":
174 _test()
175