Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6860ee7

Browse files
committed
Added workflows sample
1 parent 20b6892 commit 6860ee7

File tree

2 files changed

+123
-0
lines changed

2 files changed

+123
-0
lines changed

dataproc/workflows.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This sample walks a user through creating a workflow
16+
# for Cloud Dataproc using the Python client library.
17+
18+
import sys
19+
# [START dataproc_inline_workflow]
20+
from google.cloud import dataproc_v1 as dataproc
21+
22+
23+
def instantiate_inline_workflow(project_id, region):
24+
"""This sample walks a user through submitting a workflow
25+
for a Cloud Dataproc using the Python client library.
26+
27+
Args:
28+
project_id (string): Project to use for running the workflow.
29+
region (string): Region where the workflow resources should live.
30+
"""
31+
32+
# Create a client with the endpoint set to the desired region.
33+
workflow_client = dataproc.WorkflowTemplateServiceClient(
34+
client_options={
35+
'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)}
36+
)
37+
38+
parent = workflow_client.region_path(project_id, region)
39+
40+
template = {
41+
'jobs': [
42+
{
43+
'hadoop_job': {
44+
'main_jar_file_uri': 'file:///usr/lib/hadoop-mapreduce/'
45+
'hadoop-mapreduce-examples.jar',
46+
'args': [
47+
'teragen',
48+
'1000',
49+
'hdfs:///gen/'
50+
]
51+
},
52+
'step_id': 'teragen'
53+
},
54+
{
55+
'hadoop_job': {
56+
'main_jar_file_uri': 'file:///usr/lib/hadoop-mapreduce/'
57+
'hadoop-mapreduce-examples.jar',
58+
'args': [
59+
'terasort',
60+
'hdfs:///gen/',
61+
'hdfs:///sort/'
62+
]
63+
},
64+
'step_id': 'terasort',
65+
'prerequisite_step_ids': [
66+
'teragen'
67+
]
68+
}],
69+
'placement': {
70+
'managed_cluster': {
71+
'cluster_name': 'my-managed-cluster',
72+
'config': {
73+
'gce_cluster_config': {
74+
# Leave 'zone_uri' empty for 'autozone'
75+
# 'zone_uri': ''
76+
'zone_uri': 'us-central1-a'
77+
}
78+
}
79+
}
80+
}
81+
}
82+
83+
# Submit the request to instantiate the workflow from an inline template.
84+
operation = workflow_client.instantiate_inline_workflow_template(
85+
parent, template)
86+
operation.result()
87+
88+
# Output a success message.
89+
print('Workflow ran successfully.')
90+
# [END dataproc_inline_workflow]
91+
92+
93+
if __name__ == "__main__":
94+
instantiate_inline_workflow(sys.argv[1], sys.argv[2])

dataproc/workflows_test.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import workflows
18+
19+
20+
PROJECT_ID = os.environ['GCLOUD_PROJECT']
21+
REGION = 'us-central1'
22+
23+
24+
def test_workflows(capsys):
25+
# Wrapper function for client library function
26+
workflows.instantiate_inline_workflow(PROJECT_ID, REGION)
27+
28+
out, _ = capsys.readouterr()
29+
assert "successfully" in out

0 commit comments

Comments
 (0)