您可以使用来上传文件,sc.addFile
并使用来获取工作人员的路径SparkFiles.get
:
from pyspark import SparkFiles
sc = (SparkContext(conf = conf,
pyFiles=["All", "Python", "Files", "in", "your", "project"])
# Assuming both files are in your working directory
sc.addFile("nonbreaking_prefix.en")
sc.addFile("tokenizer.perl")
def classifier(path, content):
# Get path for uploaded files
print SparkFiles.get("tokenizer.perl")
with open(SparkFiles.get("nonbreaking_prefix.en")) as fr:
lines = [line for line in fr]