6161import sys
6262import tempfile
6363
64-
65- from apache_beam import utils
6664from apache_beam import version as beam_version
6765from apache_beam .internal import pickler
66+ from apache_beam .io .filesystems_util import get_filesystem
6867from apache_beam .runners .dataflow .internal import names
6968from apache_beam .utils import processes
7069from apache_beam .utils .pipeline_options import GoogleCloudOptions
@@ -158,6 +157,7 @@ def _stage_extra_packages(extra_packages, staging_location, temp_dir,
158157 name patterns.
159158 """
160159 resources = []
160+ staging_filesystem = get_filesystem (staging_location )
161161 staging_temp_dir = None
162162 local_packages = []
163163 for package in extra_packages :
@@ -190,13 +190,14 @@ def _stage_extra_packages(extra_packages, staging_location, temp_dir,
190190 local_packages .append (package )
191191
192192 if staging_temp_dir :
193+ temp_fs = get_filesystem (staging_temp_dir )
193194 local_packages .extend (
194- [utils . path .join (staging_temp_dir , f ) for f in os .listdir (
195+ [temp_fs .join (staging_temp_dir , f ) for f in os .listdir (
195196 staging_temp_dir )])
196197
197198 for package in local_packages :
198199 basename = os .path .basename (package )
199- staged_path = utils . path .join (staging_location , basename )
200+ staged_path = staging_filesystem .join (staging_location , basename )
200201 file_copy (package , staged_path )
201202 resources .append (basename )
202203 # Create a file containing the list of extra packages and stage it.
@@ -209,7 +210,7 @@ def _stage_extra_packages(extra_packages, staging_location, temp_dir,
209210 with open (os .path .join (temp_dir , EXTRA_PACKAGES_FILE ), 'wt' ) as f :
210211 for package in local_packages :
211212 f .write ('%s\n ' % os .path .basename (package ))
212- staged_path = utils . path .join (staging_location , EXTRA_PACKAGES_FILE )
213+ staged_path = staging_filesystem .join (staging_location , EXTRA_PACKAGES_FILE )
213214 # Note that the caller of this function is responsible for deleting the
214215 # temporary folder where all temp files are created, including this one.
215216 file_copy (os .path .join (temp_dir , EXTRA_PACKAGES_FILE ), staged_path )
@@ -284,13 +285,15 @@ def stage_job_resources(
284285 raise RuntimeError (
285286 'The --temp_location option must be specified.' )
286287
288+ filesystem = get_filesystem (google_cloud_options .staging_location )
289+
287290 # Stage a requirements file if present.
288291 if setup_options .requirements_file is not None :
289292 if not os .path .isfile (setup_options .requirements_file ):
290293 raise RuntimeError ('The file %s cannot be found. It was specified in the '
291294 '--requirements_file command line option.' %
292295 setup_options .requirements_file )
293- staged_path = utils . path .join (google_cloud_options .staging_location ,
296+ staged_path = filesystem .join (google_cloud_options .staging_location ,
294297 REQUIREMENTS_FILE )
295298 file_copy (setup_options .requirements_file , staged_path )
296299 resources .append (REQUIREMENTS_FILE )
@@ -305,7 +308,7 @@ def stage_job_resources(
305308 populate_requirements_cache (
306309 setup_options .requirements_file , requirements_cache_path )
307310 for pkg in glob .glob (os .path .join (requirements_cache_path , '*' )):
308- file_copy (pkg , utils . path .join (google_cloud_options .staging_location ,
311+ file_copy (pkg , filesystem .join (google_cloud_options .staging_location ,
309312 os .path .basename (pkg )))
310313 resources .append (os .path .basename (pkg ))
311314
@@ -324,7 +327,7 @@ def stage_job_resources(
324327 'setup.py instead of %s' % setup_options .setup_file )
325328 tarball_file = _build_setup_package (setup_options .setup_file , temp_dir ,
326329 build_setup_args )
327- staged_path = utils . path .join (google_cloud_options .staging_location ,
330+ staged_path = filesystem .join (google_cloud_options .staging_location ,
328331 WORKFLOW_TARBALL_FILE )
329332 file_copy (tarball_file , staged_path )
330333 resources .append (WORKFLOW_TARBALL_FILE )
@@ -344,7 +347,7 @@ def stage_job_resources(
344347 pickled_session_file = os .path .join (temp_dir ,
345348 names .PICKLED_MAIN_SESSION_FILE )
346349 pickler .dump_session (pickled_session_file )
347- staged_path = utils . path .join (google_cloud_options .staging_location ,
350+ staged_path = filesystem .join (google_cloud_options .staging_location ,
348351 names .PICKLED_MAIN_SESSION_FILE )
349352 file_copy (pickled_session_file , staged_path )
350353 resources .append (names .PICKLED_MAIN_SESSION_FILE )
@@ -359,7 +362,7 @@ def stage_job_resources(
359362 else :
360363 stage_tarball_from_remote_location = False
361364
362- staged_path = utils . path .join (google_cloud_options .staging_location ,
365+ staged_path = filesystem .join (google_cloud_options .staging_location ,
363366 names .DATAFLOW_SDK_TARBALL_FILE )
364367 if stage_tarball_from_remote_location :
365368 # If --sdk_location is not specified then the appropriate package
0 commit comments