diff --git a/dataproc/README.md b/dataproc/README.md
index 98622be7dc1..150e90cc3f6 100644
--- a/dataproc/README.md
+++ b/dataproc/README.md
@@ -1,84 +1,3 @@
-# Cloud Dataproc API Examples
+These samples have been moved.
 
-[![Open in Cloud Shell][shell_img]][shell_link]
-
-[shell_img]: http://gstatic.com/cloudssh/images/open-btn.png
-[shell_link]: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dataproc/README.md
-
-Sample command-line programs for interacting with the Cloud Dataproc API.
-
-See [the tutorial on the using the Dataproc API with the Python client
-library](https://cloud.google.com/dataproc/docs/tutorials/python-library-example)
-for information on a walkthrough you can run to try out the Cloud Dataproc API sample code.
-
-Note that while this sample demonstrates interacting with Dataproc via the API, the functionality demonstrated here could also be accomplished using the Cloud Console or the gcloud CLI.
-
-`list_clusters.py` is a simple command-line program to demonstrate connecting to the Cloud Dataproc API and listing the clusters in a region.
-
-`submit_job_to_cluster.py` demonstrates how to create a cluster, submit the
-`pyspark_sort.py` job, download the output from Google Cloud Storage, and output the result.
-
-`single_job_workflow.py` uses the Cloud Dataproc InstantiateInlineWorkflowTemplate API to create an ephemeral cluster, run a job, then delete the cluster with one API request.
-
-`pyspark_sort.py_gcs` is the same as `pyspark_sort.py` but demonstrates
- reading from a GCS bucket.
-
-## Prerequisites to run locally:
-
-* [pip](https://pypi.python.org/pypi/pip)
-
-Go to the [Google Cloud Console](https://console.cloud.google.com).
-
-Under API Manager, search for the Google Cloud Dataproc API and enable it.
-
-## Set Up Your Local Dev Environment
-
-To install, run the following commands. If you want to use  [virtualenv](https://virtualenv.readthedocs.org/en/latest/)
-(recommended), run the commands within a virtualenv.
-
-    * pip install -r requirements.txt
-
-## Authentication
-
-Please see the [Google cloud authentication guide](https://cloud.google.com/docs/authentication/).
-The recommended approach to running these samples is a Service Account with a JSON key.
-
-## Environment Variables
-
-Set the following environment variables:
-
-    GOOGLE_CLOUD_PROJECT=your-project-id
-    REGION=us-central1 # or your region
-    CLUSTER_NAME=waprin-spark7
-    ZONE=us-central1-b
-
-## Running the samples
-
-To run list_clusters.py:
-
-    python list_clusters.py $GOOGLE_CLOUD_PROJECT --region=$REGION
-
-`submit_job_to_cluster.py` can create the Dataproc cluster or use an existing cluster. To create a cluster before running the code, you can use the [Cloud Console](console.cloud.google.com) or run:
-
-    gcloud dataproc clusters create your-cluster-name
-
-To run submit_job_to_cluster.py, first create a GCS bucket (used by Cloud Dataproc to stage files) from the Cloud Console or with gsutil:
-
-    gsutil mb gs://<your-staging-bucket-name>
-
-Next, set the following environment variables:
-
-    BUCKET=your-staging-bucket
-    CLUSTER=your-cluster-name
-
-Then, if you want to use an existing cluster, run:
-
-    python submit_job_to_cluster.py --project_id=$GOOGLE_CLOUD_PROJECT --zone=us-central1-b --cluster_name=$CLUSTER --gcs_bucket=$BUCKET
-
-Alternatively, to create a new cluster, which will be deleted at the end of the job, run:
-
-    python submit_job_to_cluster.py --project_id=$GOOGLE_CLOUD_PROJECT --zone=us-central1-b --cluster_name=$CLUSTER --gcs_bucket=$BUCKET --create_new_cluster
-
-The script will setup a cluster, upload the PySpark file, submit the job, print the result, then, if it created the cluster, delete the cluster.
-
-Optionally, you can add the `--pyspark_file` argument to change from the default `pyspark_sort.py` included in this script to a new script.
+https://github.com/googleapis/python-dataproc/tree/master/samples
\ No newline at end of file
diff --git a/dataproc/create_cluster.py b/dataproc/create_cluster.py
deleted file mode 100644
index b4d63d2e13f..00000000000
--- a/dataproc/create_cluster.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This sample walks a user through creating a Cloud Dataproc cluster using
-# the Python client library.
-#
-# This script can be run on its own:
-#   python create_cluster.py ${PROJECT_ID} ${REGION} ${CLUSTER_NAME}
-
-
-import sys
-
-# [START dataproc_create_cluster]
-from google.cloud import dataproc_v1 as dataproc
-
-
-def create_cluster(project_id, region, cluster_name):
-    """This sample walks a user through creating a Cloud Dataproc cluster
-       using the Python client library.
-
-       Args:
-           project_id (string): Project to use for creating resources.
-           region (string): Region where the resources should live.
-           cluster_name (string): Name to use for creating a cluster.
-    """
-
-    # Create a client with the endpoint set to the desired cluster region.
-    cluster_client = dataproc.ClusterControllerClient(client_options={
-        'api_endpoint': f'{region}-dataproc.googleapis.com:443',
-    })
-
-    # Create the cluster config.
-    cluster = {
-        'project_id': project_id,
-        'cluster_name': cluster_name,
-        'config': {
-            'master_config': {
-                'num_instances': 1,
-                'machine_type_uri': 'n1-standard-1'
-            },
-            'worker_config': {
-                'num_instances': 2,
-                'machine_type_uri': 'n1-standard-1'
-            }
-        }
-    }
-
-    # Create the cluster.
-    operation = cluster_client.create_cluster(project_id, region, cluster)
-    result = operation.result()
-
-    # Output a success message.
-    print(f'Cluster created successfully: {result.cluster_name}')
-    # [END dataproc_create_cluster]
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 4:
-        sys.exit('python create_cluster.py project_id region cluster_name')
-
-    project_id = sys.argv[1]
-    region = sys.argv[2]
-    cluster_name = sys.argv[3]
-    create_cluster(project_id, region, cluster_name)
diff --git a/dataproc/create_cluster_test.py b/dataproc/create_cluster_test.py
deleted file mode 100644
index 6b1d6806100..00000000000
--- a/dataproc/create_cluster_test.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import uuid
-
-from google.cloud import dataproc_v1 as dataproc
-import pytest
-
-import create_cluster
-
-
-PROJECT_ID = os.environ['GOOGLE_CLOUD_PROJECT']
-REGION = 'us-central1'
-CLUSTER_NAME = 'py-cc-test-{}'.format(str(uuid.uuid4()))
-
-
-@pytest.fixture(autouse=True)
-def teardown():
-    yield
-
-    cluster_client = dataproc.ClusterControllerClient(client_options={
-        'api_endpoint': f'{REGION}-dataproc.googleapis.com:443'
-    })
-    # Client library function
-    operation = cluster_client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME)
-    # Wait for cluster to delete
-    operation.result()
-
-
-def test_cluster_create(capsys):
-    # Wrapper function for client library function
-    create_cluster.create_cluster(PROJECT_ID, REGION, CLUSTER_NAME)
-
-    out, _ = capsys.readouterr()
-    assert CLUSTER_NAME in out
diff --git a/dataproc/dataproc_e2e_donttest.py b/dataproc/dataproc_e2e_donttest.py
deleted file mode 100644
index 44cc03bfd42..00000000000
--- a/dataproc/dataproc_e2e_donttest.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-""" Integration tests for Dataproc samples.
-
-Creates a Dataproc cluster, uploads a pyspark file to Google Cloud Storage,
-submits a job to Dataproc that runs the pyspark file, then downloads
-the output logs from Cloud Storage and verifies the expected output."""
-
-import os
-
-import submit_job_to_cluster
-
-PROJECT = os.environ['GOOGLE_CLOUD_PROJECT']
-BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
-CLUSTER_NAME = 'testcluster3'
-ZONE = 'us-central1-b'
-
-
-def test_e2e():
-    output = submit_job_to_cluster.main(
-        PROJECT, ZONE, CLUSTER_NAME, BUCKET)
-    assert b"['Hello,', 'dog', 'elephant', 'panther', 'world!']" in output
diff --git a/dataproc/instantiate_inline_workflow_template.py b/dataproc/instantiate_inline_workflow_template.py
deleted file mode 100644
index f9358376f9f..00000000000
--- a/dataproc/instantiate_inline_workflow_template.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This sample walks a user through instantiating an inline
-# workflow for Cloud Dataproc using the Python client library.
-#
-# This script can be run on its own:
-#   python instantiate_inline_workflow_template.py ${PROJECT_ID} ${REGION}
-
-
-import sys
-
-# [START dataproc_instantiate_inline_workflow_template]
-from google.cloud import dataproc_v1 as dataproc
-
-
-def instantiate_inline_workflow_template(project_id, region):
-    """This sample walks a user through submitting a workflow
-       for a Cloud Dataproc using the Python client library.
-
-       Args:
-           project_id (string): Project to use for running the workflow.
-           region (string): Region where the workflow resources should live.
-    """
-
-    # Create a client with the endpoint set to the desired region.
-    workflow_template_client = dataproc.WorkflowTemplateServiceClient(
-        client_options={
-            'api_endpoint': f'{region}-dataproc.googleapis.com:443'
-        }
-    )
-
-    parent = workflow_template_client.region_path(project_id, region)
-
-    template = {
-        'jobs': [
-            {
-                'hadoop_job': {
-                    'main_jar_file_uri': 'file:///usr/lib/hadoop-mapreduce/'
-                    'hadoop-mapreduce-examples.jar',
-                    'args': [
-                        'teragen',
-                        '1000',
-                        'hdfs:///gen/'
-                    ]
-                },
-                'step_id': 'teragen'
-            },
-            {
-                'hadoop_job': {
-                    'main_jar_file_uri': 'file:///usr/lib/hadoop-mapreduce/'
-                    'hadoop-mapreduce-examples.jar',
-                    'args': [
-                        'terasort',
-                        'hdfs:///gen/',
-                        'hdfs:///sort/'
-                    ]
-                },
-                'step_id': 'terasort',
-                'prerequisite_step_ids': [
-                    'teragen'
-                ]
-            }],
-        'placement': {
-            'managed_cluster': {
-                'cluster_name': 'my-managed-cluster',
-                'config': {
-                    'gce_cluster_config': {
-                        # Leave 'zone_uri' empty for 'Auto Zone Placement'
-                        # 'zone_uri': ''
-                        'zone_uri': 'us-central1-a'
-                    }
-                }
-            }
-        }
-    }
-
-    # Submit the request to instantiate the workflow from an inline template.
-    operation = workflow_template_client.instantiate_inline_workflow_template(
-        parent, template
-    )
-    operation.result()
-
-    # Output a success message.
-    print('Workflow ran successfully.')
-    # [END dataproc_instantiate_inline_workflow_template]
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 3:
-        sys.exit('python instantiate_inline_workflow_template.py '
-                 + 'project_id region')
-
-    project_id = sys.argv[1]
-    region = sys.argv[2]
-    instantiate_inline_workflow_template(project_id, region)
diff --git a/dataproc/instantiate_inline_workflow_template_test.py b/dataproc/instantiate_inline_workflow_template_test.py
deleted file mode 100644
index 22673e4ee08..00000000000
--- a/dataproc/instantiate_inline_workflow_template_test.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import instantiate_inline_workflow_template
-
-
-PROJECT_ID = os.environ['GOOGLE_CLOUD_PROJECT']
-REGION = 'us-central1'
-
-
-def test_workflows(capsys):
-    # Wrapper function for client library function
-    instantiate_inline_workflow_template.instantiate_inline_workflow_template(
-        PROJECT_ID, REGION
-    )
-
-    out, _ = capsys.readouterr()
-    assert "successfully" in out
diff --git a/dataproc/list_clusters.py b/dataproc/list_clusters.py
deleted file mode 100644
index 1639c413468..00000000000
--- a/dataproc/list_clusters.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Sample command-line program to list Cloud Dataproc clusters in a region.
-
-Example usage:
-python list_clusters.py --project_id=my-project-id --region=global
-
-"""
-import argparse
-
-from google.cloud import dataproc_v1
-from google.cloud.dataproc_v1.gapic.transports import (
-    cluster_controller_grpc_transport)
-
-
-# [START dataproc_list_clusters]
-def list_clusters(dataproc, project, region):
-    """List the details of clusters in the region."""
-    for cluster in dataproc.list_clusters(project, region):
-        print(('{} - {}'.format(cluster.cluster_name,
-                                cluster.status.State.Name(
-                                    cluster.status.state))))
-# [END dataproc_list_clusters]
-
-
-def main(project_id, region):
-
-    if region == 'global':
-        # Use the default gRPC global endpoints.
-        dataproc_cluster_client = dataproc_v1.ClusterControllerClient()
-    else:
-        # Use a regional gRPC endpoint. See:
-        # https://cloud.google.com/dataproc/docs/concepts/regional-endpoints
-        client_transport = (
-            cluster_controller_grpc_transport.ClusterControllerGrpcTransport(
-                address='{}-dataproc.googleapis.com:443'.format(region)))
-        dataproc_cluster_client = dataproc_v1.ClusterControllerClient(
-            client_transport)
-
-    list_clusters(dataproc_cluster_client, project_id, region)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=(
-            argparse.RawDescriptionHelpFormatter))
-    parser.add_argument(
-        '--project_id', help='Project ID to access.', required=True)
-    parser.add_argument(
-        '--region', help='Region of clusters to list.', required=True)
-
-    args = parser.parse_args()
-    main(args.project_id, args.region)
diff --git a/dataproc/pyspark_sort.py b/dataproc/pyspark_sort.py
deleted file mode 100644
index 0ce2350ad02..00000000000
--- a/dataproc/pyspark_sort.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-""" Sample pyspark script to be uploaded to Cloud Storage and run on
-Cloud Dataproc.
-
-Note this file is not intended to be run directly, but run inside a PySpark
-environment.
-"""
-
-# [START dataproc_pyspark_sort]
-import pyspark
-
-sc = pyspark.SparkContext()
-rdd = sc.parallelize(['Hello,', 'world!', 'dog', 'elephant', 'panther'])
-words = sorted(rdd.collect())
-print(words)
-# [END dataproc_pyspark_sort]
diff --git a/dataproc/pyspark_sort_gcs.py b/dataproc/pyspark_sort_gcs.py
deleted file mode 100644
index f1961c378d3..00000000000
--- a/dataproc/pyspark_sort_gcs.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env python
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-""" Sample pyspark script to be uploaded to Cloud Storage and run on
-Cloud Dataproc.
-
-Note this file is not intended to be run directly, but run inside a PySpark
-environment.
-
-This file demonstrates how to read from a GCS bucket. See README.md for more
-information.
-"""
-
-# [START dataproc_pyspark_sort_gcs]
-import pyspark
-
-sc = pyspark.SparkContext()
-rdd = sc.textFile('gs://path-to-your-GCS-file')
-print(sorted(rdd.collect()))
-# [END dataproc_pyspark_sort_gcs]
diff --git a/dataproc/python-api-walkthrough.md b/dataproc/python-api-walkthrough.md
deleted file mode 100644
index 1a8d436f720..00000000000
--- a/dataproc/python-api-walkthrough.md
+++ /dev/null
@@ -1,170 +0,0 @@
-# Use the Python Client Library to call Dataproc APIs
-
-Estimated completion time: <walkthrough-tutorial-duration duration="5"></walkthrough-tutorial-duration>
-
-## Overview
-
-This [Cloud Shell](https://cloud.google.com/shell/docs/) walkthrough leads you
-through the steps to use the
-[Google Cloud Client Libraries for Python](https://googleapis.github.io/google-cloud-python/latest/dataproc/index.html)
-to programmatically interact with [Dataproc](https://cloud.google.com/dataproc/docs/).
-
-As you follow this walkthrough, you run Python code that calls
-[Dataproc gRPC APIs](https://cloud.google.com/dataproc/docs/reference/rpc/)
-to:
-
-* create a Dataproc cluster
-* submit a small PySpark word sort job to run on the cluster
-* get job status
-* tear down the cluster after job completion
-
-## Using the walkthrough
-
-The `submit_job_to_cluster.py file` used in this walkthrough is opened in the
-Cloud Shell editor when you launch the walkthrough. You can view
-the code as your follow the walkthrough steps.
-
-**For more information**: See [Dataproc&rarr;Use the Python Client Library](https://cloud.google.com/dataproc/docs/tutorials/python-library-example) for
-an explanation of how the code works.
-
-**To reload this walkthrough:** Run the following command from the
-`~/python-docs-samples/dataproc` directory in Cloud Shell:
-
-    cloudshell launch-tutorial python-api-walkthrough.md
-
-**To copy and run commands**: Click the "Paste in Cloud Shell" button
-  (<walkthrough-cloud-shell-icon></walkthrough-cloud-shell-icon>)
-  on the side of a code box, then press `Enter` to run the command.
-
-## Prerequisites (1)
-
-1. Create or select a Google Cloud Platform project to use for this tutorial.
-    * <walkthrough-project-billing-setup permissions=""></walkthrough-project-billing-setup>
-
-1. Click the link below to enable the Dataproc, Compute Engine, and Cloud Storage APIs
-   in a separate GCP console tab in your browser.
-    
-   **Note:** After you select your project and enable the APIs, return to this tutorial by clicking
-   on the **Cloud Shell** tab in your browser.
-    
-   * [Enable APIs](https://console.cloud.google.com/flows/enableapi?apiid=dataproc,compute_component,storage-component.googleapis.com&redirect=https://console.cloud.google.com)
-
-## Prerequisites (2)
-
-1. This walkthrough uploads a PySpark file (`pyspark_sort.py`) to a
-   [Cloud Storage bucket](https://cloud.google.com/storage/docs/key-terms#buckets) in
-   your project.
-   * You can use the [Cloud Storage browser page](https://console.cloud.google.com/storage/browser)
-   in Google Cloud Platform Console to view existing buckets in your project.
-
-   &nbsp;&nbsp;&nbsp;&nbsp;**OR**
-
-   * To create a new bucket, run the following command. Your bucket name must be unique.
-   ```bash
-   gsutil mb -p {{project-id}} gs://your-bucket-name
-   ```
-
-1.  Set environment variables.
-
-    * Set the name of your bucket.
-    ```bash
-    BUCKET=your-bucket-name
-    ```
-
-## Prerequisites (3)
-
-1. Set up a Python
-   [virtual environment](https://virtualenv.readthedocs.org/en/latest/)
-   in Cloud Shell.
-
-    * Create the virtual environment.
-    ```bash
-    virtualenv ENV
-    ```
-    * Activate the virtual environment.
-    ```bash
-    source ENV/bin/activate
-    ```
-
-1. Install library dependencies in Cloud Shell.
-    ```bash
-    pip install -r requirements.txt
-    ```
-
-## Create a cluster and submit a job
-
-1. Set a name for your new cluster.
-    ```bash
-    CLUSTER=new-cluster-name
-    ```
-
-1. Set a [zone](https://cloud.google.com/compute/docs/regions-zones/#available)
-   where your new cluster will be located. You can change the
-   "us-central1-a" zone that is pre-set in the following command.
-    ```bash
-    ZONE=us-central1-a
-    ```
-
-1. Run `submit_job.py` with the `--create_new_cluster` flag
-   to create a new cluster and submit the `pyspark_sort.py` job
-   to the cluster.
-
-    ```bash
-    python submit_job_to_cluster.py \
-    --project_id={{project-id}} \
-    --cluster_name=$CLUSTER \
-    --zone=$ZONE \
-    --gcs_bucket=$BUCKET \
-    --create_new_cluster
-    ```
-
-## Job Output
-
-Job output in Cloud Shell shows cluster creation, job submission,
-    job completion, and then tear-down of the cluster.
-
-     ...
-     Creating cluster...
-     Cluster created.
-     Uploading pyspark file to Cloud Storage.
-     new-cluster-name - RUNNING
-     Submitted job ID ...
-     Waiting for job to finish...
-     Job finished.
-     Downloading output file
-     .....
-     ['Hello,', 'dog', 'elephant', 'panther', 'world!']
-     ...
-     Tearing down cluster
-     ```
-## Congratulations on Completing the Walkthrough!
-<walkthrough-conclusion-trophy></walkthrough-conclusion-trophy>
-
----
-
-### Next Steps:
-
-* **View job details from the Console.** View job details by selecting the
-   PySpark job from the Dataproc 
-=
-   [Jobs page](https://console.cloud.google.com/dataproc/jobs)
-   in the Google Cloud Platform Console.
-
-* **Delete resources used in the walkthrough.**
-   The `submit_job_to_cluster.py` job deletes the cluster that it created for this
-   walkthrough.
-
-   If you created a bucket to use for this walkthrough,
-   you can run the following command to delete the
-   Cloud Storage bucket (the bucket must be empty).
-   ```bash
-   gsutil rb gs://$BUCKET
-   ```
-   You can run the following command to delete the bucket **and all
-   objects within it. Note: the deleted objects cannot be recovered.**
-   ```bash
-   gsutil rm -r gs://$BUCKET
-   ```
-
-* **For more information.** See the [Dataproc documentation](https://cloud.google.com/dataproc/docs/)
-   for API reference and product feature information.
diff --git a/dataproc/quickstart/quickstart.py b/dataproc/quickstart/quickstart.py
deleted file mode 100644
index 4159e281520..00000000000
--- a/dataproc/quickstart/quickstart.py
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# [START dataproc_quickstart]
-"""
-This quickstart sample walks a user through creating a Cloud Dataproc
-cluster, submitting a PySpark job from Google Cloud Storage to the
-cluster, reading the output of the job and deleting the cluster, all
-using the Python client library.
-
-Usage:
-    python quickstart.py --project_id <PROJECT_ID> --region <REGION> \
-        --cluster_name <CLUSTER_NAME> --job_file_path <GCS_JOB_FILE_PATH>
-"""
-
-import argparse
-import time
-
-from google.cloud import dataproc_v1 as dataproc
-from google.cloud import storage
-
-
-def quickstart(project_id, region, cluster_name, job_file_path):
-    # Create the cluster client.
-    cluster_client = dataproc.ClusterControllerClient(client_options={
-        'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)
-    })
-
-    # Create the cluster config.
-    cluster = {
-        'project_id': project_id,
-        'cluster_name': cluster_name,
-        'config': {
-            'master_config': {
-                'num_instances': 1,
-                'machine_type_uri': 'n1-standard-1'
-            },
-            'worker_config': {
-                'num_instances': 2,
-                'machine_type_uri': 'n1-standard-1'
-            }
-        }
-    }
-
-    # Create the cluster.
-    operation = cluster_client.create_cluster(project_id, region, cluster)
-    result = operation.result()
-
-    print('Cluster created successfully: {}'.format(result.cluster_name))
-
-    # Create the job client.
-    job_client = dataproc.JobControllerClient(client_options={
-        'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)
-    })
-
-    # Create the job config.
-    job = {
-        'placement': {
-            'cluster_name': cluster_name
-        },
-        'pyspark_job': {
-            'main_python_file_uri': job_file_path
-        }
-    }
-
-    job_response = job_client.submit_job(project_id, region, job)
-    job_id = job_response.reference.job_id
-
-    print('Submitted job \"{}\".'.format(job_id))
-
-    # Termimal states for a job.
-    terminal_states = {
-        dataproc.types.JobStatus.ERROR,
-        dataproc.types.JobStatus.CANCELLED,
-        dataproc.types.JobStatus.DONE
-    }
-
-    # Create a timeout such that the job gets cancelled if not in a
-    # terminal state after a fixed period of time.
-    timeout_seconds = 600
-    time_start = time.time()
-
-    # Wait for the job to complete.
-    while job_response.status.state not in terminal_states:
-        if time.time() > time_start + timeout_seconds:
-            job_client.cancel_job(project_id, region, job_id)
-            print('Job {} timed out after threshold of {} seconds.'.format(
-                job_id, timeout_seconds))
-
-        # Poll for job termination once a second.
-        time.sleep(1)
-        job_response = job_client.get_job(project_id, region, job_id)
-
-    # Cloud Dataproc job output gets saved to a GCS bucket allocated to it.
-    cluster_info = cluster_client.get_cluster(
-        project_id, region, cluster_name)
-
-    storage_client = storage.Client()
-    bucket = storage_client.get_bucket(cluster_info.config.config_bucket)
-    output_blob = (
-        'google-cloud-dataproc-metainfo/{}/jobs/{}/driveroutput.000000000'
-        .format(cluster_info.cluster_uuid, job_id))
-    output = bucket.blob(output_blob).download_as_string()
-
-    print('Job {} finished with state {}:\n{}'.format(
-        job_id,
-        job_response.status.State.Name(job_response.status.state),
-        output))
-
-    # Delete the cluster once the job has terminated.
-    operation = cluster_client.delete_cluster(project_id, region, cluster_name)
-    operation.result()
-
-    print('Cluster {} successfully deleted.'.format(cluster_name))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    parser.add_argument(
-        '--project_id',
-        type=str,
-        required=True,
-        help='Project to use for creating resources.')
-    parser.add_argument(
-        '--region',
-        type=str,
-        required=True,
-        help='Region where the resources should live.')
-    parser.add_argument(
-        '--cluster_name',
-        type=str,
-        required=True,
-        help='Name to use for creating a cluster.')
-    parser.add_argument(
-        '--job_file_path',
-        type=str,
-        required=True,
-        help='Job in GCS to execute against the cluster.')
-
-    args = parser.parse_args()
-    quickstart(args.project_id, args.region,
-               args.cluster_name, args.job_file_path)
-# [END dataproc_quickstart]
diff --git a/dataproc/quickstart/quickstart_test.py b/dataproc/quickstart/quickstart_test.py
deleted file mode 100644
index 3e17f6fa3e5..00000000000
--- a/dataproc/quickstart/quickstart_test.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import uuid
-
-from google.cloud import dataproc_v1 as dataproc
-from google.cloud import storage
-import pytest
-
-import quickstart
-
-
-PROJECT_ID = os.environ['GOOGLE_CLOUD_PROJECT']
-REGION = 'us-central1'
-CLUSTER_NAME = 'py-qs-test-{}'.format(str(uuid.uuid4()))
-STAGING_BUCKET = 'py-dataproc-qs-bucket-{}'.format(str(uuid.uuid4()))
-JOB_FILE_NAME = 'sum.py'
-JOB_FILE_PATH = 'gs://{}/{}'.format(STAGING_BUCKET, JOB_FILE_NAME)
-SORT_CODE = (
-    "import pyspark\n"
-    "sc = pyspark.SparkContext()\n"
-    "rdd = sc.parallelize((1,2,3,4,5))\n"
-    "sum = rdd.reduce(lambda x, y: x + y)\n"
-)
-
-
-@pytest.fixture(autouse=True)
-def setup_teardown():
-    storage_client = storage.Client()
-    bucket = storage_client.create_bucket(STAGING_BUCKET)
-    blob = bucket.blob(JOB_FILE_NAME)
-    blob.upload_from_string(SORT_CODE)
-
-    yield
-
-    cluster_client = dataproc.ClusterControllerClient(client_options={
-      'api_endpoint': '{}-dataproc.googleapis.com:443'.format(REGION)
-    })
-
-    # The quickstart sample deletes the cluster, but if the test fails
-    # before cluster deletion occurs, it can be manually deleted here.
-    clusters = cluster_client.list_clusters(PROJECT_ID, REGION)
-
-    for cluster in clusters:
-        if cluster.cluster_name == CLUSTER_NAME:
-            cluster_client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME)
-
-    blob.delete()
-    bucket.delete()
-
-
-def test_quickstart(capsys):
-    quickstart.quickstart(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH)
-    out, _ = capsys.readouterr()
-
-    assert 'Cluster created successfully' in out
-    assert 'Submitted job' in out
-    assert 'finished with state DONE:' in out
-    assert 'successfully deleted' in out
diff --git a/dataproc/requirements-test.txt b/dataproc/requirements-test.txt
deleted file mode 100644
index 7e460c8c866..00000000000
--- a/dataproc/requirements-test.txt
+++ /dev/null
@@ -1 +0,0 @@
-pytest==6.0.1
diff --git a/dataproc/requirements.txt b/dataproc/requirements.txt
deleted file mode 100644
index 5dd9b1d8b1c..00000000000
--- a/dataproc/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-grpcio==1.31.0
-google-auth==1.20.1
-google-auth-httplib2==0.0.4
-google-cloud==0.34.0
-google-cloud-storage==1.30.0
-google-cloud-dataproc==1.1.1
diff --git a/dataproc/single_job_workflow.py b/dataproc/single_job_workflow.py
deleted file mode 100644
index b2754b06c1e..00000000000
--- a/dataproc/single_job_workflow.py
+++ /dev/null
@@ -1,209 +0,0 @@
-#!/usr/bin/env python
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-r"""Sample Cloud Dataproc inline workflow to run a pyspark job on an ephermeral
-cluster.
-Example Usage to run the inline workflow on a managed cluster:
-python single_job_workflow.py --project_id=$PROJECT --gcs_bucket=$BUCKET \
-  --cluster_name=$CLUSTER --zone=$ZONE
-Example Usage to run the inline workflow on a global region managed cluster:
-python submit_job_to_cluster.py --project_id=$PROJECT --gcs_bucket=$BUCKET \
-  --cluster_name=$CLUSTER --zone=$ZONE --global_region
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import os
-
-from google.cloud import dataproc_v1
-from google.cloud import storage
-from google.cloud.dataproc_v1.gapic.transports import (
-    workflow_template_service_grpc_transport)
-
-
-DEFAULT_FILENAME = "pyspark_sort.py"
-waiting_callback = False
-
-
-def get_pyspark_file(pyspark_file=None):
-    if pyspark_file:
-        f = open(pyspark_file, "rb")
-        return f, os.path.basename(pyspark_file)
-    else:
-        """Gets the PySpark file from current directory."""
-        current_dir = os.path.dirname(os.path.abspath(__file__))
-        f = open(os.path.join(current_dir, DEFAULT_FILENAME), "rb")
-        return f, DEFAULT_FILENAME
-
-
-def get_region_from_zone(zone):
-    try:
-        region_as_list = zone.split("-")[:-1]
-        return "-".join(region_as_list)
-    except (AttributeError, IndexError, ValueError):
-        raise ValueError("Invalid zone provided, please check your input.")
-
-
-def upload_pyspark_file(project, bucket_name, filename, spark_file):
-    """Uploads the PySpark file in this directory to the configured input
-    bucket."""
-    print("Uploading pyspark file to Cloud Storage.")
-    client = storage.Client(project=project)
-    bucket = client.get_bucket(bucket_name)
-    blob = bucket.blob(filename)
-    blob.upload_from_file(spark_file)
-
-
-def run_workflow(dataproc, project, region, zone, bucket_name, filename,
-                 cluster_name):
-
-    parent = "projects/{}/regions/{}".format(project, region)
-    zone_uri = ("https://www.googleapis.com/compute/v1/projects/{}/zones/{}"
-                .format(project, zone))
-
-    workflow_data = {
-        "placement": {
-            "managed_cluster": {
-                "cluster_name": cluster_name,
-                "config": {
-                    "gce_cluster_config": {"zone_uri": zone_uri},
-                    "master_config": {
-                        "num_instances": 1,
-                        "machine_type_uri": "n1-standard-1",
-                    },
-                    "worker_config": {
-                        "num_instances": 2,
-                        "machine_type_uri": "n1-standard-1",
-                    },
-                },
-            }
-        },
-        "jobs": [
-            {
-                "pyspark_job": {
-                    "main_python_file_uri": "gs://{}/{}".format(
-                        bucket_name, filename)
-                },
-                "step_id": "pyspark-job",
-            }
-        ],
-    }
-
-    workflow = dataproc.instantiate_inline_workflow_template(parent,
-                                                             workflow_data)
-
-    workflow.add_done_callback(callback)
-    global waiting_callback
-    waiting_callback = True
-
-
-def callback(operation_future):
-    # Reset global when callback returns.
-    global waiting_callback
-    waiting_callback = False
-
-
-def wait_for_workflow_end():
-    """Wait for cluster creation."""
-    print("Waiting for workflow completion ...")
-    print("Workflow and job progress, and job driver output available from: "
-          "https://console.cloud.google.com/dataproc/workflows/")
-
-    while True:
-        if not waiting_callback:
-            print("Workflow completed.")
-            break
-
-
-def main(
-    project_id,
-    zone,
-    cluster_name,
-    bucket_name,
-    pyspark_file=None,
-    create_new_cluster=True,
-    global_region=True,
-):
-
-    # [START dataproc_get_workflow_template_client]
-    if global_region:
-        region = "global"
-        # Use the default gRPC global endpoints.
-        dataproc_workflow_client = dataproc_v1.WorkflowTemplateServiceClient()
-    else:
-        region = get_region_from_zone(zone)
-        # Use a regional gRPC endpoint. See:
-        # https://cloud.google.com/dataproc/docs/concepts/regional-endpoints
-        client_transport = (workflow_template_service_grpc_transport
-                            .WorkflowTemplateServiceGrpcTransport(
-                                address="{}-dataproc.googleapis.com:443"
-                                .format(region)))
-        dataproc_workflow_client = dataproc_v1.WorkflowTemplateServiceClient(
-            client_transport
-        )
-    # [END dataproc_get_workflow_template_client]
-
-    try:
-        spark_file, spark_filename = get_pyspark_file(pyspark_file)
-        upload_pyspark_file(project_id, bucket_name, spark_filename,
-                            spark_file)
-
-        run_workflow(
-            dataproc_workflow_client,
-            project_id,
-            region,
-            zone,
-            bucket_name,
-            spark_filename,
-            cluster_name
-        )
-        wait_for_workflow_end()
-
-    finally:
-        spark_file.close()
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=(argparse
-                                              .RawDescriptionHelpFormatter))
-    parser.add_argument(
-        "--project_id", help="Project ID you want to access.", required=True
-    )
-    parser.add_argument(
-        "--zone", help="Zone to create clusters in/connect to", required=True
-    )
-    parser.add_argument(
-        "--cluster_name", help="Name of the cluster to create/connect to",
-        required=True
-    )
-    parser.add_argument(
-        "--gcs_bucket", help="Bucket to upload Pyspark file to", required=True
-    )
-    parser.add_argument(
-        "--pyspark_file", help="Pyspark filename. Defaults to pyspark_sort.py"
-    )
-    parser.add_argument("--global_region",
-                        action="store_true",
-                        help="If cluster is in the global region")
-
-    args = parser.parse_args()
-    main(
-        args.project_id,
-        args.zone,
-        args.cluster_name,
-        args.gcs_bucket,
-        args.pyspark_file,
-    )
diff --git a/dataproc/submit_job_to_cluster.py b/dataproc/submit_job_to_cluster.py
deleted file mode 100644
index 389cbec87aa..00000000000
--- a/dataproc/submit_job_to_cluster.py
+++ /dev/null
@@ -1,288 +0,0 @@
-#!/usr/bin/env python
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-r"""Sample command-line program to run a pyspark job on a new or existing
-cluster.
-
-Global region clusters are supported with --global_region flag.
-
-Example Usage to run the pyspark job on a new cluster:
-python submit_job_to_cluster.py --project_id=$PROJECT --gcs_bucket=$BUCKET \
-  --create_new_cluster --cluster_name=$CLUSTER --zone=$ZONE
-
-Example Usage to run the pyspark job on an existing global region cluster:
-python submit_job_to_cluster.py --project_id=$PROJECT --gcs_bucket=$BUCKET \
-  --global_region --cluster_name=$CLUSTER --zone=$ZONE
-
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import os
-
-from google.cloud import dataproc_v1
-from google.cloud import storage
-from google.cloud.dataproc_v1.gapic.transports import (
-    cluster_controller_grpc_transport)
-from google.cloud.dataproc_v1.gapic.transports import (
-    job_controller_grpc_transport)
-
-
-DEFAULT_FILENAME = 'pyspark_sort.py'
-waiting_callback = False
-
-
-def get_pyspark_file(pyspark_file=None):
-    if pyspark_file:
-        f = open(pyspark_file, "rb")
-        return f, os.path.basename(pyspark_file)
-    else:
-        """Gets the PySpark file from current directory."""
-        current_dir = os.path.dirname(os.path.abspath(__file__))
-        f = open(os.path.join(current_dir, DEFAULT_FILENAME), "rb")
-        return f, DEFAULT_FILENAME
-
-
-def get_region_from_zone(zone):
-    try:
-        region_as_list = zone.split('-')[:-1]
-        return '-'.join(region_as_list)
-    except (AttributeError, IndexError, ValueError):
-        raise ValueError('Invalid zone provided, please check your input.')
-
-
-def upload_pyspark_file(project, bucket_name, filename, spark_file):
-    """Uploads the PySpark file in this directory to the configured input
-    bucket."""
-    print('Uploading pyspark file to Cloud Storage.')
-    client = storage.Client(project=project)
-    bucket = client.get_bucket(bucket_name)
-    blob = bucket.blob(filename)
-    blob.upload_from_file(spark_file)
-
-
-def download_output(project, cluster_id, output_bucket, job_id):
-    """Downloads the output file from Cloud Storage and returns it as a
-    string."""
-    print('Downloading output file.')
-    client = storage.Client(project=project)
-    bucket = client.get_bucket(output_bucket)
-    output_blob = (
-        ('google-cloud-dataproc-metainfo/{}/jobs/{}/driveroutput.000000000'.
-            format(cluster_id, job_id)))
-    return bucket.blob(output_blob).download_as_string()
-
-
-# [START dataproc_create_cluster]
-def create_cluster(dataproc, project, zone, region, cluster_name):
-    """Create the cluster."""
-    print('Creating cluster...')
-    zone_uri = \
-        'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format(
-            project, zone)
-    cluster_data = {
-        'project_id': project,
-        'cluster_name': cluster_name,
-        'config': {
-            'gce_cluster_config': {
-                'zone_uri': zone_uri
-            },
-            'master_config': {
-                'num_instances': 1,
-                'machine_type_uri': 'n1-standard-1'
-            },
-            'worker_config': {
-                'num_instances': 2,
-                'machine_type_uri': 'n1-standard-1'
-            }
-        }
-    }
-
-    cluster = dataproc.create_cluster(project, region, cluster_data)
-    cluster.add_done_callback(callback)
-    global waiting_callback
-    waiting_callback = True
-# [END dataproc_create_cluster]
-
-
-def callback(operation_future):
-    # Reset global when callback returns.
-    global waiting_callback
-    waiting_callback = False
-
-
-def wait_for_cluster_creation():
-    """Wait for cluster creation."""
-    print('Waiting for cluster creation...')
-
-    while True:
-        if not waiting_callback:
-            print("Cluster created.")
-            break
-
-
-# [START dataproc_list_clusters_with_detail]
-def list_clusters_with_details(dataproc, project, region):
-    """List the details of clusters in the region."""
-    for cluster in dataproc.list_clusters(project, region):
-        print(('{} - {}'.format(cluster.cluster_name,
-                                cluster.status.State.Name(
-                                    cluster.status.state))))
-# [END dataproc_list_clusters_with_detail]
-
-
-def get_cluster_id_by_name(dataproc, project_id, region, cluster_name):
-    """Helper function to retrieve the ID and output bucket of a cluster by
-    name."""
-    for cluster in dataproc.list_clusters(project_id, region):
-        if cluster.cluster_name == cluster_name:
-            return cluster.cluster_uuid, cluster.config.config_bucket
-
-
-# [START dataproc_submit_pyspark_job]
-def submit_pyspark_job(dataproc, project, region, cluster_name, bucket_name,
-                       filename):
-    """Submit the Pyspark job to the cluster (assumes `filename` was uploaded
-    to `bucket_name."""
-    job_details = {
-        'placement': {
-            'cluster_name': cluster_name
-        },
-        'pyspark_job': {
-            'main_python_file_uri': 'gs://{}/{}'.format(bucket_name, filename)
-        }
-    }
-
-    result = dataproc.submit_job(
-        project_id=project, region=region, job=job_details)
-    job_id = result.reference.job_id
-    print('Submitted job ID {}.'.format(job_id))
-    return job_id
-# [END dataproc_submit_pyspark_job]
-
-
-# [START dataproc_delete]
-def delete_cluster(dataproc, project, region, cluster):
-    """Delete the cluster."""
-    print('Tearing down cluster.')
-    result = dataproc.delete_cluster(
-        project_id=project, region=region, cluster_name=cluster)
-    return result
-# [END dataproc_delete]
-
-
-# [START dataproc_wait]
-def wait_for_job(dataproc, project, region, job_id):
-    """Wait for job to complete or error out."""
-    print('Waiting for job to finish...')
-    while True:
-        job = dataproc.get_job(project, region, job_id)
-        # Handle exceptions
-        if job.status.State.Name(job.status.state) == 'ERROR':
-            raise Exception(job.status.details)
-        elif job.status.State.Name(job.status.state) == 'DONE':
-            print('Job finished.')
-            return job
-# [END dataproc_wait]
-
-
-def main(project_id,
-         zone,
-         cluster_name,
-         bucket_name,
-         pyspark_file=None,
-         create_new_cluster=True,
-         global_region=True):
-
-    # [START dataproc_get_client]
-    if global_region:
-        region = 'global'
-        # Use the default gRPC global endpoints.
-        dataproc_cluster_client = dataproc_v1.ClusterControllerClient()
-        dataproc_job_client = dataproc_v1.JobControllerClient()
-    else:
-        region = get_region_from_zone(zone)
-        # Use a regional gRPC endpoint. See:
-        # https://cloud.google.com/dataproc/docs/concepts/regional-endpoints
-        client_transport = (
-            cluster_controller_grpc_transport.ClusterControllerGrpcTransport(
-                address='{}-dataproc.googleapis.com:443'.format(region)))
-        job_transport = (
-            job_controller_grpc_transport.JobControllerGrpcTransport(
-                address='{}-dataproc.googleapis.com:443'.format(region)))
-        dataproc_cluster_client = dataproc_v1.ClusterControllerClient(
-            client_transport)
-        dataproc_job_client = dataproc_v1.JobControllerClient(job_transport)
-    # [END dataproc_get_client]
-
-    try:
-        spark_file, spark_filename = get_pyspark_file(pyspark_file)
-        if create_new_cluster:
-            create_cluster(dataproc_cluster_client, project_id, zone, region,
-                           cluster_name)
-            wait_for_cluster_creation()
-        upload_pyspark_file(project_id, bucket_name, spark_filename,
-                            spark_file)
-
-        list_clusters_with_details(dataproc_cluster_client, project_id,
-                                   region)
-
-        (cluster_id, output_bucket) = (
-                get_cluster_id_by_name(dataproc_cluster_client, project_id,
-                                       region, cluster_name))
-
-        # [START dataproc_call_submit_pyspark_job]
-        job_id = submit_pyspark_job(dataproc_job_client, project_id, region,
-                                    cluster_name, bucket_name, spark_filename)
-        # [END dataproc_call_submit_pyspark_job]
-
-        wait_for_job(dataproc_job_client, project_id, region, job_id)
-        output = download_output(project_id, cluster_id, output_bucket, job_id)
-        print('Received job output {}'.format(output))
-        return output
-    finally:
-        if create_new_cluster:
-            delete_cluster(dataproc_cluster_client, project_id, region,
-                           cluster_name)
-            spark_file.close()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description=__doc__,
-                                     formatter_class=argparse.
-                                     RawDescriptionHelpFormatter)
-    parser.add_argument(
-        '--project_id', help='Project ID you want to access.', required=True)
-    parser.add_argument('--zone',
-                        help='Zone to create clusters in/connect to',
-                        required=True)
-    parser.add_argument('--cluster_name',
-                        help='Name of the cluster to create/connect to',
-                        required=True)
-    parser.add_argument('--gcs_bucket',
-                        help='Bucket to upload Pyspark file to',
-                        required=True)
-    parser.add_argument('--pyspark_file',
-                        help='Pyspark filename. Defaults to pyspark_sort.py')
-    parser.add_argument('--create_new_cluster',
-                        action='store_true',
-                        help='States if the cluster should be created')
-    parser.add_argument('--global_region',
-                        action='store_true',
-                        help='If cluster is in the global region')
-
-    args = parser.parse_args()
-    main(args.project_id, args.zone, args.cluster_name, args.gcs_bucket,
-         args.pyspark_file, args.create_new_cluster, args.global_region)