Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion feathr_project/feathr/spark_provider/_databricks_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,15 @@ def submit_feathr_job(
"coordinates": get_maven_artifact_fullname()}
# Add json-schema dependency
# TODO: find a proper way deal with unresolved dependencies
submission_params["libraries"][1]["maven"]= {"coordinates": "com.github.everit-org.json-schema:org.everit.json.schema:1.9.1","repo":"https://repository.mulesoft.org/nexus/content/repositories/public/"}
# Since we are adding another entry to the config, make sure that the spark config passed as part of execution also contains a libraries array of atleast size 2
# else you will get List Index Out of Bound exception
# Example from feathr_config.yaml -
# config_template: {"run_name":"FEATHR_FILL_IN",.....,"libraries":[{}, {}],".......}

submission_params["libraries"][1]["maven"]= {
"coordinates": "com.github.everit-org.json-schema:org.everit.json.schema:1.9.1",
"repo":"https://repository.mulesoft.org/nexus/content/repositories/public/"
}
else:
submission_params["libraries"][0]["jar"] = self.upload_or_get_cloud_path(
main_jar_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ spark_config:
# config string including run time information, spark version, machine size, etc.
# the config follows the format in the databricks documentation: https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/2.0/jobs#--request-structure-6
# The fields marked as "FEATHR_FILL_IN" will be managed by Feathr. Other parameters can be customizable. For example, you can customize the node type, spark version, number of workers, instance pools, timeout, etc.
config_template: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","node_type_id":"Standard_D3_v2","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"}},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}'
config_template: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","node_type_id":"Standard_D3_v2","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"}},"libraries":[{"jar":"FEATHR_FILL_IN"}, {"maven":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}'
# workspace dir for storing all the required configuration files and the jar resources. All the feature definitions will be uploaded here
work_dir: "dbfs:/feathr_getting_started"
# This is the location of the runtime jar for Spark job submission. If you have compiled the runtime yourself, you need to specify this location.
Expand Down
2 changes: 1 addition & 1 deletion feathr_project/test/test_user_workspace/feathr_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ spark_config:
workspace_token_value: ''
# config string including run time information, spark version, machine size, etc.
# the config follows the format in the databricks documentation: https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/2.0/jobs
config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}
config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}, {"maven":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}
# Feathr Job location. Support local paths, path start with http(s)://, and paths start with dbfs:/
work_dir: 'dbfs:/feathr_getting_started'
# this is the default location so end users don't have to compile the runtime again.
Expand Down