5 def __init__(self, spark):
6 self.spark = spark
7
8 def process_data(self, data): 9 # Process the data using Spark Structured Streaming
10 # For example, calculate the average value of the data
11 df = self.spark.createDataFrame(data, ["value"])
2from pyspark.sql.functions import col, avg
3
4class SparkStructuredStreamingProcessor:
5 def __init__(self, spark): 6 self.spark = spark
7
8 def process_data(self, data):
4 def __init__(self, sc):
5 self.sc = sc
6
7 def process_data(self, data): 8 # Process the data using Spark Streaming
9 # For example, calculate the average value of the data
10 stream = self.sc.parallelize(data)
1from pyspark.streaming import StreamingContext
2
3class SparkStreamingProcessor:
4 def __init__(self, sc): 5 self.sc = sc
6
7 def process_data(self, data):
5 def __init__(self, buffer_size=1000):
6 self.buffer = deque(maxlen=buffer_size)
7
8 def process_data(self, data): 9 # Process the data in real-time
10 # For example, calculate the average value of the data
11 self.buffer.append(data)
The local variable name hides the variable defined in the outer scope, making it inaccessible and might confuse.
filename = 'myfile.txt'
def read_file(filename): # This shadows the global `filename`
with open(filename) as file:
return file.readlines()
FILENAME = 'myfile.txt' # renamed global to UPPER_CASE as convention
def read_file(filename):
with open(filename) as file:
return file.readlines()
Another usual suspect of this is when you use the same parameter name inside a function as the global variable you are using. For example:
def run_app(app):
# This `app` shadows the global app...
app.run()
if __name__ == '__main__':
app = MyApp() # This is a global variable!
run_app(app)
To avoid this re-defining of a global, consider not defining app
as a global, but inside a main()
function instead:
def run_app(app):
# There is no longer a global `app` variable.
app.run()
def main():
app = MyApp()
run_app(app)
if __name__ == '__main__':
main()