Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
79 changes: 68 additions & 11 deletions deployments/common/manila/datashares.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,81 @@
# </meta:header>
#

shares:
datashares:

- id: "GDR2"
sharename: "aglais-gaia-dr2"
mountpath: "/data/gaia/dr2"
sharename: "aglais-data-gaia-dr2-6514"
mountpath: "/data/gaia/GEDR2_6514"

- id: "GEDR3"
sharename: "aglais-gaia-edr3"
mountpath: "/data/gaia/edr3"
sharename: "aglais-data-gaia-edr3-11932"
mountpath: "/data/gaia/GEDR3_11932"

- id: "GEDR3-2048"
sharename: "aglais-data-gaia-edr3-2048"
mountpath: "/data/gaia/GEDR3_2048"

- id: "GEDR3-4096"
sharename: "aglais-data-gaia-edr3-4096"
mountpath: "/data/gaia/GEDR3_4096"

- id: "ALLWISE"
sharename: "aglais-wise-allwise"
mountpath: "/data/wise/allwise"
sharename: "aglais-data-wise-allwise"
mountpath: "/data/wise/ALLWISE"

- id: "PS1"
sharename: "aglais-panstarrs-dr1"
mountpath: "/data/panstarrs/dr1"
sharename: "aglais-data-panstarrs-ps1"
mountpath: "/data/panstarrs/PS1"

- id: "2MASS"
sharename: "aglais-twomass-allsky"
mountpath: "/data/twomass/allsky"
sharename: "aglais-data-twomass-allsky"
mountpath: "/data/twomass/2MASSPSC"

#
# These links are used for two things.
# 1) To simplify the normal data locations.
# 2) To hide the side effects of name changes.
#

datalinks:

# User friendly location GEDR2_GAIASOURCE
- linkpath : "/data/gaia/GEDR2/GEDR2_GAIASOURCE"
linkdest : "/data/gaia/GEDR2_6514/GEDR2_6514_GAIASOURCE"

# User friendly location GEDR3_GAIASOURCE
- linkpath : "/data/gaia/GEDR3/GEDR3_GAIASOURCE"
linkdest : "/data/gaia/GEDR3_2048/GEDR3_2048_GAIASOURCE"

# User friendly location GEDR3_ALLWISE_BEST_NEIGHBOURS
- linkpath : "/data/gaia/GEDR3/GEDR3_ALLWISE_BEST_NEIGHBOURS"
linkdest : "/data/gaia/GEDR3_2048/GEDR3_2048_ALLWISE_BEST_NEIGHBOURS"

# User friendly location GEDR3_PS1_BEST_NEIGHBOURS
- linkpath : "/data/gaia/GEDR3/GEDR3_PS1_BEST_NEIGHBOURS"
linkdest : "/data/gaia/GEDR3_2048/GEDR3_2048_PS1_BEST_NEIGHBOURS"

# User friendly location GEDR3_2MASSPSC_BEST_NEIGHBOURS
- linkpath : "/data/gaia/GEDR3/GEDR3_2MASSPSC_BEST_NEIGHBOURS"
linkdest : "/data/gaia/GEDR3_2048/GEDR3_2048_2MASSPSC_BEST_NEIGHBOURS"

# Name change was gaia/edr2
- linkpath : "/data/gaia/edr2"
linkdest : "/data/gaia/GEDR2_6514"

# Name change was gaia/edr3
- linkpath : "/data/gaia/edr3"
linkdest : "/data/gaia/GEDR3_11932"

# Name change was wise/allwise
- linkpath : "/data/wise/allwise"
linkdest : "/data/wise/ALLWISE"

# Name change was dr1
- linkpath : "/data/panstarrs/dr1"
linkdest : "/data/panstarrs/PS1"

# Name change was twomass/allsky
- linkpath : "/data/twomass/allsky"
linkdest : "/data/twomass/2MASS"

2 changes: 1 addition & 1 deletion deployments/common/manila/usershares.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# </meta:header>
#

shares:
usershares:

- id: "nch"
sharename: "aglais-user-nch"
Expand Down
2 changes: 1 addition & 1 deletion deployments/hadoop-yarn/ansible/51-cephfs-mount.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

---
- name: "Install and mount a CephFS share"
hosts: zeppelin:masters:workers
hosts: "{{ mnthost | default('zeppelin:masters:workers') }}"
gather_facts: false
vars_files:
- config/ansible.yml
Expand Down
43 changes: 43 additions & 0 deletions deployments/hadoop-yarn/ansible/61-data-links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/sh
#
# <meta:header>
# <meta:licence>
# Copyright (c) 2021, ROE (http://www.roe.ac.uk/)
#
# This information is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This information is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# </meta:licence>
# </meta:header>
#
#

- name: "Create data links"
hosts: zeppelin:workers
gather_facts: false
vars_files:
- config/ansible.yml
- /tmp/ansible-vars.yml
- /deployments/common/manila/datashares.yaml

tasks:

- name : "Linking data directories"
include_tasks: "tasks/create-linked.yml"
vars:
linkpath: "{{item.linkpath}}"
linkdest: "{{item.linkdest}}"
loop:
"{{ datalinks }}"



172 changes: 172 additions & 0 deletions deployments/hadoop-yarn/ansible/config/zrq-dev-small.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#
# <meta:header>
# <meta:licence>
# Copyright (c) 2020, ROE (http://www.roe.ac.uk/)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2020 -> 2021

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well caught

#
# This information is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This information is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# </meta:licence>
# </meta:header>
#
#

all:

vars:

# Hadoop vars

hdname: "hadoop-3.1.3"
hdbase: "/opt"
hdhome: "/opt/hadoop"

hdconf: "{{hdhome}}/etc/hadoop"
hdhost: "master01"
hduser: "fedora"

# HDFS vars

hdfsconf: "/var/hdfs/conf"
hdfsuser: "fedora"

# Spark vars
spname: "spark-2.4.7"
spfull: "spark-2.4.7-bin-hadoop2.7"
spbase: "/opt"
sphome: "/opt/spark"
sphost: "master01"
spuser: "fedora"

sparkconfig: |

# https://spark.apache.org/docs/latest/configuration.html
# https://spark.apache.org/docs/latest/running-on-yarn.html
# https://stackoverflow.com/questions/37871194/how-to-tune-spark-executor-number-cores-and-executor-memory

# Amount of memory to use for the driver process (where SparkContext is initialized).
# (small zeppelin node has 22G memory)
spark.driver.memory 10g
# Limit of total size of serialized results of all partitions for each Spark action.
# Setting a proper limit can protect the driver from out-of-memory errors.
spark.driver.maxResultSize 8g

# Amount of memory to use for the YARN Application Master
# (default 512m)
#spark.yarn.am.memory 512m
# Number of cores to use for the YARN Application Master in client mode.
# (default 1)
#spark.yarn.am.cores 1

# The number of cores to use on each executor.
# (small worker node has 6 cores)
spark.executor.cores 3
# Amount of memory to use per executor process.
# (small worker node has 22G memory and 6 cores)
# (22G - 512M)/2
# ((22 * 1024)-512)/2
spark.executor.memory 11008m

# The number of executors for static allocation.
# 8w * 2
spark.executor.instances 16

# Zeppelin vars
zepname: "zeppelin-0.8.2"
zepbase: "/home/fedora"
zephome: "/home/fedora/zeppelin-0.8.2-bin-all"
zephost: "zeppelin"
zepuser: "fedora"

hosts:

zeppelin:
login: 'fedora'
image: 'Fedora-30-1.2'
flavor: 'general.v1.small'
discs:
- type: 'cinder'
size: 512
format: 'btrfs'
mntpath: "/mnt/cinder/vdb"
devname: 'vdb'
paths:
# Empty on Zeppelin, master, worker
hddatalink: "/var/hadoop/data"
hddatadest: "/mnt/local/vda/hadoop/data"
# Empty on Zeppelin
hdtemplink: "/var/hadoop/temp"
hdtempdest: "/mnt/local/vda/hadoop/temp"
# Empty on Zeppelin
hdlogslink: "/var/hadoop/logs"
hdlogsdest: "/mnt/local/vda/hadoop/logs"
# Used on Zeppelin
sptemplink: "/var/spark/temp"
sptempdest: "/mnt/cinder/vdb/spark/temp"

children:

masters:
hosts:
master[01:01]:
vars:
login: 'fedora'
image: 'Fedora-30-1.2'
flavor: 'general.v1.tiny'
discs: []
paths:
# Empty on Zeppelin, master, worker
hddatalink: "/var/hadoop/data"
hddatadest: "/mnt/local/vda/hadoop/data"
# Used on master
# /var/hadoop/temp/dfs/namesecondary/current/
hdtemplink: "/var/hadoop/temp"
hdtempdest: "/mnt/local/vda/hadoop/temp"
# Used on master
hdlogslink: "/var/hadoop/logs"
hdlogsdest: "/mnt/local/vda/hadoop/logs"
# Used on master
# /var/hdfs/meta/namenode/fsimage/current/
hdfsmetalink: "/var/hdfs/meta"
hdfsmetadest: "/mnt/local/vda/hadoop/meta"

workers:
hosts:
worker[01:06]:
vars:
login: 'fedora'
image: 'Fedora-30-1.2'
flavor: 'general.v1.small'
discs:
- type: 'cinder'
size: 512
format: 'btrfs'
mntpath: "/mnt/cinder/vdb"
devname: 'vdb'
paths:
# Empty on Zeppelin, master, worker
hddatalink: "/var/hadoop/data"
hddatadest: "/mnt/local/vda/hadoop/data"
# Used on workers
# /var/hadoop/temp/nm-local-dir/
hdtemplink: "/var/hadoop/temp"
hdtempdest: "/mnt/local/vda/hadoop/temp"
# Used on worker
hdlogslink: "/var/hadoop/logs"
hdlogsdest: "/mnt/local/vda/hadoop/logs"
# Workers only, empty
hdfslogslink: "/var/hdfs/logs"
hdfslogsdest: "/mnt/local/vda/hdfs/logs"
# Workers only, used
hdfsdatalink: "/var/hdfs/data"
hdfsdatadest: "/mnt/cinder/vdb/hdfs/data"

Loading