diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..dda7550 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,51 @@ +--- +name: CI + +on: + workflow_dispatch: + pull_request: + schedule: + - cron: "0 0 * * *" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + linting: + runs-on: ubuntu-latest + timeout-minutes: 15 + defaults: + run: + shell: bash -leo pipefail {0} + + permissions: + contents: read + + steps: + - name: checkout + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.11" + cache: "pip" + cache-dependency-path: ".pre-commit-config.yaml" + + - name: Cache pre-commit environments + uses: actions/cache@v6 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }} + + - name: Check whether the citation metadata from CITATION.cff is valid + uses: citation-file-format/cffconvert-github-action@2.0.0 + with: + args: "--validate" + + - name: Pre-commit + run: | + python -m pip install pre-commit + SKIP=no-commit-to-branch pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a6bff42..bd67886 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,24 +1,24 @@ repos: # https://pycqa.github.io/isort/docs/configuration/black_compatibility.html#integration-with-pre-commit - repo: https://github.com/pycqa/isort - rev: 5.13.2 + rev: 9.0.0a3 hooks: - id: isort args: ["--profile", "black", "--filter-files"] - repo: https://github.com/psf/black - rev: 24.8.0 + rev: 26.5.1 hooks: - id: black args: ["--line-length=100"] # https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html?highlight=other%20tools#flake8 - repo: https://github.com/PyCQA/flake8 - rev: 7.1.1 + rev: 7.3.0 hooks: - id: flake8 args: ["--max-line-length=100", "--extend-ignore=E203,E712"] # https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3d08fcc --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,9 @@ +# Changelog + +All notable changes to the Eventdisplay_AnalysisScripts_CTA project will be documented in this file. +Changes for upcoming releases can be found in the [docs/changes](docs/changes) directory. +Note that changes before release v2.5.0 are not documented here. + +This changelog is generated using [Towncrier](https://towncrier.readthedocs.io/). + + diff --git a/CITATION.cff b/CITATION.cff index 68e6d0a..00437f4 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,9 +4,9 @@ cff-version: 1.2.0 message: If you use this software, please cite it using these metadata. title: "Eventdisplay Analysis Scripts for CTA" abstract: "Run scripts for CTA. Allows to efficiently run all analysis steps starting from the raw MC files to sensitivities using Eventdisplay." -doi: "https://doi.org/10.5281/zenodo.4263710" -version: 1.7.0 -date-released: 2022-04-25 +doi: 10.5281/zenodo.4263710 +version: 2.4.0 +date-released: 2025-09-24 keywords: - "gamma-ray astronomy" - "astronomy software" @@ -22,18 +22,15 @@ authors: family-names: Maier affiliation: DESY orcid: https://orcid.org/0000-0001-9868-4700 -- given-names: Orel - family-names: Gueta - affiliation: DESY references: - scope: "Additional material" type: article title: "Eventdisplay: An Analysis and Reconstruction Package for Ground-based Gamma-ray Astronomy" authors: - - familiy-names: Maier + - family-names: Maier given-names: Gernot affiliation: DESY - - familiy-names: Holder + - family-names: Holder given-names: Jamie affiliation: "Department of Physics and Astronomy and the Bartol Research Institute, University of Delaware, Newark, DE 19716, USA" doi: 10.48550/arXiv.1708.04048 diff --git a/CTA.mainRunScriptsReduced.sh b/CTA.mainRunScriptsReduced.sh index 25dd1d0..5652e25 100755 --- a/CTA.mainRunScriptsReduced.sh +++ b/CTA.mainRunScriptsReduced.sh @@ -20,10 +20,14 @@ if [ $# -lt 2 ]; then prod5b-North-20deg-moon prod5b-North-40deg-moon prod5b-North-60deg-moon Prod6 analysis: prod6-North-20deg prod6-North-40deg prod6-North-52deg prod6-North-60deg - prod6-South-20deg + prod6-North-20deg-moon prod6-North-40deg-moon prod6-North-52deg-moon prod6-North-60deg-moon + prod6-South-20deg prod6-South-40deg prod6-South-52deg prod6-South-60deg + prod6-South-20deg-moon prod6-South-40deg-moon prod6-South-52deg-moon prod6-South-60deg-moon run modes: - MAKETABLES DISPBDT ANATABLES PREPARETMVA TRAIN ANGRES QC CUTS PHYS + MAKETABLES PREPAREDISPBDTDATASPLIT DISPBDT ANATABLES PREPAREANA XGBSTEREOTRAIN XGBSTEREOANA PREPARETMVA TRAIN ANGRES QC CUTS PHYS CLEANUP + + optional run modes: TRAIN_RECO_QUALITY TRAIN_RECO_METHOD " exit @@ -41,22 +45,38 @@ fi RECID="0" # run scripts are collected here -RUNSCRIPTDIR="${CTA_USER_LOG_DIR}/jobs/$(uuidgen)" -mkdir -p ${RUNSCRIPTDIR} +if [[ ${RUN} != "CLEANUP" ]] && [[ ${RUN} != "PREPAREDISPBDTDATASPLIT" ]]; then + UUID=$(python -c "import uuid6; print(uuid6.uuid7())") + RUNSCRIPTDIR="${CTA_USER_LOG_DIR}/jobs/$UUID" + mkdir -p ${RUNSCRIPTDIR} +fi +echo ${RUNSCRIPTDIR} -if [[ ${RUN} == "MAKETABLES" ]] || [[ ${RUN} == "DISPBDT" ]] || [[ ${RUN} == "ANATABLES" ]] || [[ ${RUN} == "PREPARETMVA" ]]; then +if [[ ${RUN} == "MAKETABLES" ]] || [[ ${RUN} == "DISPBDT" ]] || [[ ${RUN} == "ANATABLES" ]] || [[ ${RUN} == "PREPARETMVA" ]] || [[ ${RUN} == "PREPAREANA" ]]; then ./CTA.runAnalysis.sh ${P2} ${RUN} ${RECID} 2 2 2 2 ${RUNSCRIPTDIR} if [[ $SITE == "South" ]] || [[ $P2 == *"prod6"* ]]; then ./CTA.runAnalysis.sh ${P2}-sub ${RUN} ${RECID} 2 2 2 2 ${RUNSCRIPTDIR} elif [[ $SITE == *"North"* ]]; then ./CTA.runAnalysis.sh ${P2}-LST ${RUN} ${RECID} 2 2 2 2 ${RUNSCRIPTDIR} fi +elif [[ ${RUN} == "XGBSTEREOTRAIN" ]] || [[ ${RUN} == "XGBSTEREOANA" ]]; then + ./CTA.runAnalysis.sh ${P2} ${RUN} ${RECID} 2 2 2 2 ${RUNSCRIPTDIR} + ./CTA.runAnalysis.sh ${P2} ${RUN} ${RECID} 3 3 3 3 ${RUNSCRIPTDIR} + if [[ $SITE == "South" ]] || [[ $P2 == *"prod6"* ]]; then + ./CTA.runAnalysis.sh ${P2}-sub ${RUN} ${RECID} 2 2 2 2 ${RUNSCRIPTDIR} + ./CTA.runAnalysis.sh ${P2}-sub ${RUN} ${RECID} 3 3 3 3 ${RUNSCRIPTDIR} + fi +elif [[ ${RUN} == "CLEANUP" ]]; then + ./CTA.runAnalysis.sh ${P2} ${RUN} ${RECID} 2 2 2 2 ${RUNSCRIPTDIR} +elif [[ ${RUN} == "PREPAREDISPBDTDATASPLIT" ]]; then + ./CTA.runAnalysis.sh ${P2} ${RUN} ${RECID} 2 2 2 2 ${RUNSCRIPTDIR} + ./CTA.runAnalysis.sh ${P2}-sub ${RUN} ${RECID} 2 2 2 2 ${RUNSCRIPTDIR} else while IFS= read -r mult do ./CTA.runAnalysis.sh ${P2} ${RUN} ${RECID} $mult ${RUNSCRIPTDIR} done < NIM-${SITE}.dat - if [[ $SITE == "South" ]]; then + if [[ $SITE == "South" ]] || [[ $P2 == *"prod6"* ]]; then while IFS= read -r mult do ./CTA.runAnalysis.sh ${P2}-sub ${RUN} ${RECID} $mult ${RUNSCRIPTDIR} @@ -66,5 +86,7 @@ else fi fi -echo "#####" -echo "RUNSCRIPTDIR: ${RUNSCRIPTDIR}/${RUN}" +if [[ ${RUN} != "CLEANUP" ]] && [[ ${RUN} != "PREPAREDISPBDTDATASPLIT" ]]; then + echo "#####" + echo "RUNSCRIPTDIR: ${RUNSCRIPTDIR%/}/${RUN}" +fi diff --git a/CTA.runAllScripts.sh b/CTA.runAllScripts.sh new file mode 100755 index 0000000..b1749ae --- /dev/null +++ b/CTA.runAllScripts.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# +# Run all prod6 scripts over all zenith angles and NSB levels +# + +if [ $# -lt 1 ]; then + echo " + ./CTA.runAllScripts.sh + + run modes: + MAKETABLES PREPAREDISPBDTDATASPLIT DISPBDT ANATABLES PREPAREANA XGBSTEREOTRAIN XGBSTEREOANA PREPARETMVA TRAIN ANGRES QC CUTS PHYS CLEANUP + + optional run modes: TRAIN_RECO_QUALITY TRAIN_RECO_METHOD + + " + exit +fi +# run mode +RUN="$1" +SITE="South" + +for ZE in 20deg; + for NSB in dark; do + dataset="prod6-${SITE}-${ZE}-${NSB}" + ./CTA.mainRunScriptsReduced.sh $dataset $RUN + done +done diff --git a/CTA.runAnalysis.sh b/CTA.runAnalysis.sh index 716bb5b..e41c467 100755 --- a/CTA.runAnalysis.sh +++ b/CTA.runAnalysis.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # analysis submission for production 3b/4/5/6 analysis # @@ -22,13 +22,16 @@ then prod5b-N prod5-South-20deg prod5-South-40deg prod5-South-60deg prod5b-North-20deg prod5b-North-40deg prod5b-North-60deg - add 'moon' for NSB5x data sets + add 'moon' for NSB5x data sets, 'fullmoon' for NSB30x prod3b-S20-SCT156Tel Prod6 analysis: prod6-North-20deg prod6-North-40deg prod6-North-52deg prod6-North-60deg - prod6-South-20deg + prod6-South-20deg prod6-South-40deg prod6-South-52deg prod6-South-60deg - possible run modes are EVNDISP MAKETABLES DISPBDT/DISPMLP ANATABLES PREPARETMVA TRAIN ANGRES QC CUTS PHYS + possible run modes are + EVNDISP MAKETABLES PREPAREDISPBDTDATASPLIT DISPBDT ANATABLES PREPAREANA XGBSTEREOTRAIN XGBSTEREOANA PREPARETMVA TRAIN ANGRES QC CUTS PHYS CLEANUP + + optional run modes: TRAIN_RECO_QUALITY TRAIN_RECO_METHOD [recids]: 0 = all telescopes (default), 1 = LSTs, 2 = MSTs, 3 = SSTs, 4 = MSTs+SSTs, 5 = LSTs+MSTs @@ -49,7 +52,7 @@ RUN="$2" [[ "$5" ]] && MST=$5 || MST="2" [[ "$6" ]] && SST=$6 || SST="2" [[ "$7" ]] && SCMST=$7 || SCMST="2" -[[ "$8" ]] && PDIR=${8} || PDIR="${CTA_USER_LOG_DIR%/}/" +[[ "$8" ]] && PDIR=${8} || PDIR="${CTA_USER_LOG_DIR%/}" echo "Telescope multiplicities: LST ${LST} MST ${MST} SST ${SST} SCMST ${SCMST}" ##################################### @@ -58,9 +61,6 @@ echo "Telescope multiplicities: LST ${LST} MST ${MST} SST ${SST} SCMST ${SCMST}" # even without using gridengine: do not remove this QSUBOPT="_M_P_X_cta_high_X__M_js_X_9" -##################################### -# output directory for script parameter files -mkdir -p "${PDIR%/}/tempRunParameterDir/" ##################################### # analysis dates and table dates @@ -71,6 +71,7 @@ EFFVERSION="V3" # (will be overwritten later) TDATE="g20200817" ANADATE="${TDATE}" +ANASOURCEDATE="" TMVADATE="${TDATE}" EFFDATE="${TDATE}" EFFDATE="g20221102" @@ -229,7 +230,6 @@ then ANADATE="g20231204" TMVADATE="${ANADATE}" EFFDATE="${ANADATE}" - EFFDATE="g20240315" PHYSDATE="${EFFDATE}" #################################### # prod5 - Paranal @@ -244,22 +244,21 @@ then else SITE="prod5-Paranal-20deg" fi - if [[ $P2 == *"moon"* ]]; then + if [[ $P2 == *"fullmoon"* ]]; then + SITE="${SITE}-NSB30x" + elif [[ $P2 == *"moon"* ]]; then SITE="${SITE}-NSB5x" fi - EDM="-sq52-LL" + EDM="-sq70-LL" if [[ $P2 == *"DL2plus"* ]]; then EDM="-sq10-LL-DL2plus" fi - ARRAY=( "subArray.prod5.South-BL.list" ) - ARRAY=( "subArray.prod5.South-Alpha-2LSTs42SSTs.list" ) - ARRAY=( "subArray.prod5.South-Alpha-2LSTs42SSTsBeta.list" ) - ARRAY=( "subArray.prod5.South-Alpha.list" ) + ARRAY=( "subArray.prod5.South-Beta.list" ) if [[ $P2 == *"sub"* ]]; then - ARRAY=( "subArray.prod5.South-Alpha-2LSTs42SSTsBeta-sub.list") - ARRAY=( "subArray.prod5.South-Alpha-sub.list" ) + ARRAY=( "subArray.prod5.South-Beta-sub.list" ) fi if [[ $P2 == *"Hyper"* ]] || [[ $P2 == *"hyper"* ]]; then +# ARRAY=( "subArray.prod5.South-HyperSST.list" ) ARRAY=( "subArray.prod5.South-Hyper.list" ) fi if [[ $P2 == *"LST"* ]]; then @@ -282,11 +281,11 @@ then ARRAY=( "subArray.prod5.South-MSTF.list" ) fi ARRAYDIR="prod5" - TDATE="g20230823" + TDATE="g20250826" ANADATE="${TDATE}" + ANADATE="g20250906" TMVADATE="${ANADATE}" EFFDATE="${ANADATE}" - EFFDATE="g20240315" PHYSDATE="${EFFDATE}" #################################### # prod6 - Paranal and LaPalma @@ -295,9 +294,11 @@ then if [[ $P2 == *"South"* ]]; then NS="South" PLACE="Paranal" + EDM="-sq20-LL" else NS="North" PLACE="LaPalma" + EDM="-sq51-LL" fi SCT="" if [[ $P2 == *"SCT"* ]]; then @@ -319,24 +320,25 @@ then else SITE="${SITE}-dark" fi - EDM="-sq30-LL" - if [[ $P2 == "prod6-N"* ]]; then - EDM="-lin30-LL" - EDM="-sq230-LL" - fi if [[ $P2 == *"DL2plus"* ]]; then - EDM="-sq10-LL-DL2plus" + EDM="-sq50-LL-DL2plus" fi + ARRAY=( "subArray.prod6.${NS}ML${SCT}.list" ) ARRAY=( "subArray.prod6.${NS}Alpha${SCT}.list" ) if [[ $P2 == *"sub"* ]]; then + ARRAY=( "subArray.prod6.${NS}ML-sub.list" ) ARRAY=( "subArray.prod6.${NS}Alpha-sub.list" ) fi if [[ $P2 == *"Hyper"* ]] || [[ $P2 == *"hyper"* ]]; then - ARRAY=( "subArray.prod6.NorthHyper.list" ) + ARRAY=( "subArray.prod6.${NS}Hyper.list" ) fi ARRAYDIR="prod6" - TDATE="g20240826" - ANADATE="${TDATE}" + TDATE="g20260610" + # Reuse the existing reconstruction files, but keep products from the new + # XGBoost analysis under a new analysis date. + ANASOURCEDATE="g20260325" + ANADATE="g20260629" + XGBDATE="g20260629" TMVADATE="${ANADATE}" EFFDATE="${ANADATE}" PHYSDATE="${EFFDATE}" @@ -348,7 +350,10 @@ fi if [[ -z ${PHYSDATE} ]]; then PHYSDATE=${EFFDATE} fi -# should be either onSource or cone (default is cone) +if [[ -z ${ANASOURCEDATE} ]]; then + ANASOURCEDATE=${ANADATE} +fi +# NOT USED ANYMORE! Keep 'cone' OFFAXIS="cone" ##################################### @@ -374,6 +379,7 @@ OBSTIME=( "5h" "30m" "100s" ) OBSTIME=( "500h" "250h" "100h" "50h" "20h" "10h" "5h" "1h" "5m" "30m" "100s" ) OBSTIME=( "50h" "30m" ) OBSTIME=( "5h" ) +OBSTIME=( "5h" "30m" "100s" ) OBSTIME=( "50h" ) echo "$RUN" "$SITE" @@ -387,7 +393,7 @@ echo "RUN: $RUN" # run eventdisplay if [[ $RUN == "EVNDISP" ]] then - # Keep DST files on disk (require a lot of disk space + # Keep DST files on disk (require a lot of disk space) KEEPDST="0" for ((i = 0; i < ${#PARTICLE[@]}; i++ )) do @@ -411,7 +417,18 @@ then $QSUBOPT cd ../ done - continue + exit 0 +fi +# remove from PHYS directory any unreasonable files (e.g. LST4 requirement for 2 LST array) +if [[ $RUN == "CLEANUP" ]]; then + PHYSDIR="${CTA_USER_DATA_DIR%/}/analysis/AnalysisData/${SITE}${EDM}/Phys-${PHYSDATE}" + ./utilities/removeUnreasonablePhysFiles.sh ${PHYSDIR} + exit +fi +# Prepare file lists required for DispBDT training +if [[ $RUN == "PREPAREDISPBDTDATASPLIT" ]]; then + ./analysis/CTA.separateDispTrainingEvndispFiles.sh "${SITE}${EDM}" "${ARRAYDIR}/$ARRAY" + exit fi ########################################## # for the following: duplicate the array list adding the scaling to array names @@ -419,6 +436,9 @@ if [[ ! -e ${ARRAYDIR}/$ARRAY ]]; then echo "Error: array file not found: ${ARRAYDIR}/$ARRAY" exit fi +##################################### +# output directory for script parameter files +mkdir -p "${PDIR%/}/tempRunParameterDir/" NXARRAY=$(cat ${ARRAYDIR}/$ARRAY) NFILARRAY=${PDIR%/}/tempRunParameterDir/temp.$ARRAY.list rm -f "$NFILARRAY" @@ -431,15 +451,10 @@ done # dispBDT training if [[ $RUN == "DISP"* ]] then - if [[ $RUN == "DISPMLP" ]]; then - BDTDIR="MLPdisp." - RUNPAR="${CTA_EVNDISP_AUX_DIR}/ParameterFiles/TMVA.MLPDisp.runparameter" - else - BDTDIR="BDTdisp." - RUNPAR="${CTA_EVNDISP_AUX_DIR}/ParameterFiles/TMVA.BDTDisp.runparameter" - fi + BDTDIR="BDTdisp." + RUNPAR="${CTA_EVNDISP_AUX_DIR}/ParameterFiles/TMVA.BDTDisp.runparameter" QCPAR="${CTA_EVNDISP_AUX_DIR}/ParameterFiles/TMVA.BDTDispQualityCuts.runparameter" - DDIR="${CTA_USER_DATA_DIR}/analysis/AnalysisData/${SITE}${EDM}/" + DDIR="${CTA_USER_DATA_DIR%/}/analysis/AnalysisData/${SITE}${EDM}/" for A in $NXARRAY do cd ./analysis/ @@ -549,9 +564,11 @@ do touch "$PARA" echo "WRITING PARAMETERFILE $PARA" EFFDIR=EffectiveArea-"$OOTIME"-ID$ID$AZ-$ETYPF-$EFFDATE-$EFFVERSION - EFFFULLDIR="${CTA_USER_DATA_DIR}/analysis/AnalysisData/${SITE}${EDM}/EffectiveAreas/${EFFDIR}/" + EFFFULLDIR="${CTA_USER_DATA_DIR%/}/analysis/AnalysisData/${SITE}${EDM}/EffectiveAreas/${EFFDIR}/" echo "MSCWSUBDIRECTORY ${MSCWSUBDIRECTORY}" >> "$PARA" echo "TMVASUBDIR BDT-${TMVAVERSION}-ID$ID$AZ-$TMVATYPF-$TMVADATE" >> "$PARA" + echo "TMVA_RECO_METHOD BDT-RECO-METHOD-${TMVAVERSION}-ID$ID$AZ-$TMVATYPF-$TMVADATE" >> "$PARA" + echo "TMVA_RECO_QUALITY BDT-RECO-QUALITY-${TMVAVERSION}-ID$ID$AZ-$TMVATYPF-$TMVADATE" >> "$PARA" echo "EFFAREASUBDIR ${EFFDIR}" >> "$PARA" EFFBDIR=EffectiveArea-50h-ID$ID$AZ-$ETYPF-$EFFDATE-$EFFVERSION echo "EFFAREASUBBASEDIR $EFFBDIR" >> "$PARA" @@ -584,36 +601,82 @@ do fi cd ./analysis/ ########################################## -# prepare train BDTs - if [[ $RUN == "PREPARETMVA" ]] +# XGB stereo analysis training and analysis + if [[ $RUN == "XGBSTEREOTRAIN" ]] + then + # Train XGB independently of AZ + if [ ${o} -eq 0 ] && [[ -z ${AZ} ]] + then + ./CTA.XGBSTEREO.sub_train.sh \ + "$NFILARRAY" \ + ${SITE}${EDM} \ + "$PARA" \ + "XGBStereo_${XGBDATE}" \ + $QSUBOPT \ + ${PDIR}/${RUN} + fi + elif [[ $RUN == "XGBSTEREOANA" ]] + then + if [ ${o} -eq 0 ] + then + ./CTA.XGBSTEREO.sub_analyse.sh \ + "$NFILARRAY" \ + ${SITE}${EDM} \ + "$PARA" \ + "XGBStereo_${XGBDATE}" \ + $QSUBOPT \ + "$AZ" \ + ${PDIR}/${RUN} + fi +########################################## +# prepare TMVA training events + elif [[ $RUN == "PREPARETMVA" ]] then if [ ${o} -eq 0 ] && [[ ! -z ${AZ} ]] then ./CTA.prepareTMVA.sub_train.sh \ "$NFILARRAY" \ - $OFFAXIS \ ${SITE}${EDM} \ "$PARA" \ $QSUBOPT \ $AZ \ ${PDIR}/${RUN} - fi + fi +########################################## +# prepare disjoint training and analysis files + elif [[ $RUN == "PREPAREANA" ]] + then + if [ ${o} -eq 0 ] && [[ -z ${AZ} ]] + then + ./CTA.prepareAnalysis_no_sub.sh \ + "$NFILARRAY" \ + ${SITE}${EDM} \ + "$PARA" \ + "Analysis-ID$ID-${ANASOURCEDATE}" + fi ########################################## # train BDTs # (note: BDT training does not need to be done for all observing periods) - elif [[ $RUN == "TRAIN" ]] || [[ $RUN == "TMVA" ]] + elif [[ $RUN == TRAIN* ]] || [[ $RUN == "TMVA" ]] then + if [ $RUN == "TRAIN_RECO_METHOD" ]; then + TMVA_RUN_MODE="TrainAngularReconstructionMethod" + elif [ $RUN == "TRAIN_RECO_QUALITY" ]; then + TMVA_RUN_MODE="TrainReconstructionQuality" + else + TMVA_RUN_MODE="TrainGammaHadronSeparation" + fi if [ ${o} -eq 0 ] && [[ ! -z ${AZ} ]] then ./CTA.TMVA.sub_train.sh \ + "$TMVA_RUN_MODE" \ "$NFILARRAY" \ - $OFFAXIS \ ${SITE}${EDM} \ "$PARA" \ $QSUBOPT \ $AZ \ ${PDIR}/${RUN} - fi + fi ########################################## # IRFs: angular resolution elif [[ $RUN == "ANGRES" ]] @@ -683,7 +746,7 @@ do if [[ $OFFAXIS == "cone" ]] then ./CTA.WPPhysWriter.sub.sh \ - "$NFILARRAY "\ + "$NFILARRAY" \ ${EFFFULLDIR}/BDT."$OOTIME"-${EFFVERSION}.$EFFDATE \ $OOTIME \ DESY.$PHYSDATE.${EFFVERSION}.ID$ID$AZ$ETYPF.${SITE}${EDM} \ diff --git a/DSTgeneration/README.md b/DSTgeneration/README.md index cf9e3c9..11eec03 100644 --- a/DSTgeneration/README.md +++ b/DSTgeneration/README.md @@ -21,9 +21,8 @@ Execute: ./generateDSTFiles.sh ``` -Output is written to: +Output is written to: ``` $CTA_USER_DATA_DIR/DST_testDevelopment_prod5/ ``` - diff --git a/DSTgeneration/generateDSTFiles.sh b/DSTgeneration/generateDSTFiles.sh index 22d9bfb..caf645a 100755 --- a/DSTgeneration/generateDSTFiles.sh +++ b/DSTgeneration/generateDSTFiles.sh @@ -1,5 +1,5 @@ #!/bin/sh -# +# # generate DST files # @@ -111,7 +111,7 @@ elif [[ $SITE == "Paranal" ]]; then OFILE="${ODIR}/${N}-${TYPE}-Emin05TeV" $EVNDISPSYS/bin/CTA.convert_hessio_to_VDST -a ${ARRAY} -minenergy 5. -c ${IPR} -o ${OFILE}.root ${MCFILE} > ${OFILE}.log - # superE + # superE OFILE="${ODIR}/${N}-${TYPE}-Emin20TeV" $EVNDISPSYS/bin/CTA.convert_hessio_to_VDST -a ${ARRAY} -minenergy 20. -c ${IPR} -o ${OFILE}.root ${MCFILE} > ${OFILE}.log diff --git a/IPRgeneration/README.md b/IPRgeneration/README.md index eb2a162..9865f60 100644 --- a/IPRgeneration/README.md +++ b/IPRgeneration/README.md @@ -9,7 +9,10 @@ To produce IPR graphs for all telescopes, run the scripts in the following order 1. `producePedestals.sh` - This will run sim_telarray to produce the pedestal files. Notice to change environmental variable at the top to point to a scratch area where intermediate results will be saved. You will also need a dummy CORSIKA file "dummy1.corsika.gz" in your scratch area, which can be be set in the script as SCRATCH. This script requires adjustments at the top of the script. -2. `produceIPRGraphs.sh` - Calculate the IPR graphs. +2. `convertToDST.sh` - This will convert the simtel_array output to a `dst.root` file required as input to the next step. +In case of errors: ensure that the Eventdisplay installation uses the same pre-processor flags (e.g. PROD6) as the sim_telarray installation. + +3. `produceIPRGraphs.sh` - Calculate the IPR graphs. Convert the output from sim_telarray to DST files and calulate IPR graphs. This requires having EVENTDISPLAY installed. -3. mergeIPRGraphs.sh - Merge all graphs calculated into root file (save also all log files into this root file) +4. mergeIPRGraphs.sh - Merge all graphs calculated into root file (save also all log files into this root file) diff --git a/IPRgeneration/convertToDST.sh b/IPRgeneration/convertToDST.sh index 7b79ed6..54bb7a3 100755 --- a/IPRgeneration/convertToDST.sh +++ b/IPRgeneration/convertToDST.sh @@ -1,6 +1,5 @@ #!/bin/sh -# Produce IPR graphs from NSB simulations -# Converts first simtel_array ouput file to DST +# Converts first simtel_array ouput file to DST for IPR generation # # Requires an Eventdisplay installation # including all environmental variables @@ -8,7 +7,7 @@ if [ $# -lt 1 ]; then echo " -./produceIPRGraphs.sh [production (default=PROD6; optional PROD5) +./convertToDST.sh [production (default=PROD6; optional PROD5) " exit fi @@ -16,13 +15,6 @@ SCRATCH=${1} [[ "$2" ]] && PROD=$2 || PROD="PROD6" CDIR=$(pwd) -if [[ $PROD == "PROD5" ]]; then - RUNPARA="EVNDISP.prod5.reconstruction.runparameter" -else - RUNPARA="EVNDISP.prod6.reconstruction.runparameter" -fi -echo "Using ${RUNPARA} for Production ${PROD}" - FLIST=$(find $SCRATCH -name "*.simtel.gz") for F in $FLIST diff --git a/IPRgeneration/geometry-1-telescope.lis b/IPRgeneration/geometry-1-telescope.lis index 56a6051..d00491f 100644 --- a/IPRgeneration/geometry-1-telescope.lis +++ b/IPRgeneration/geometry-1-telescope.lis @@ -1 +1 @@ -1 \ No newline at end of file +1 diff --git a/IPRgeneration/mergeIPRGraphs.C b/IPRgeneration/mergeIPRGraphs.C index 462f27c..7e11345 100755 --- a/IPRgeneration/mergeIPRGraphs.C +++ b/IPRgeneration/mergeIPRGraphs.C @@ -4,9 +4,9 @@ * eventdisplay analysis * * root -l -q -b 'mergeIPRGraphs.C( "output.root", "inputdirectory/" )' - *. - *. If the output file name contains the word "halfmoon", e.g., prod5-halfmoon-ze-20-IPR.root, - *. then the halfmoon pedestal files are merged. + * + * If the output file name contains the word "halfmoon", e.g., prod5-halfmoon-ze-20-IPR.root, + * then the halfmoon pedestal files are merged. */ vector< string > get_file_list( string iFileListName ) @@ -69,4 +69,3 @@ void mergeIPRGraphs( string iMergedFile = "prod6-ze-20-IPR.root", } f->Close(); } - diff --git a/IPRgeneration/mergeIPRGraphs.sh b/IPRgeneration/mergeIPRGraphs.sh index fa53180..1945e0e 100755 --- a/IPRgeneration/mergeIPRGraphs.sh +++ b/IPRgeneration/mergeIPRGraphs.sh @@ -17,9 +17,9 @@ IPRFILE=${2} CDIR=$(pwd) # list of files to be merged -FLIST=${SCRATCH}/pedestals-${MOON}-ze-${ZE}.list +FLIST="${SCRATCH}/pedestals.list" rm -f ${FLIST} -find $SCRATCH -name "*.pedestal.root" > ${FLIST} +find $SCRATCH -name "*.pedestal.root" | sort > ${FLIST} echo "IPR files to be merged (from ${FLIST}):" cat ${FLIST} @@ -27,7 +27,8 @@ cat ${FLIST} root -l -q -b 'mergeIPRGraphs.C( '\"$IPRFILE\"', '\"$FLIST\"' )' # add log files -for logFileNow in $(ls ${SCRATCH}/*.log) +LOGFILES=$(find $SCRATCH -name "*.log" | sort) +for logFileNow in $LOGFILES do logFile=$(basename -- "$logFileNow") fileTitle="${logFile%.*}" diff --git a/IPRgeneration/produceIPRGraphs.sh b/IPRgeneration/produceIPRGraphs.sh index b5eef94..01b23a2 100755 --- a/IPRgeneration/produceIPRGraphs.sh +++ b/IPRgeneration/produceIPRGraphs.sh @@ -9,11 +9,11 @@ if [ $# -lt 1 ]; then echo " -./produceIPRGraphs.sh [production (default=PROD6; optional PROD5) + ./produceIPRGraphs.sh <(full) directory with simtel files> [production (default=PROD6; optional PROD5) " exit fi -SCRATCH=${1} +SCRATCH=$(realpath "$1") [[ "$2" ]] && PROD=$2 || PROD="PROD6" CDIR=$(pwd) @@ -38,10 +38,9 @@ do -sourcefile ${SCRATCH}/${FILEN}.dst.root \ -runmode=1 -singlepedestalrootfile=1 \ -donotusepeds -usePedestalsInTimeSlices=0 \ - -calibrationsumwindow=10 -calibrationsumfirst=0 \ + -calibrationsumwindow=30 -calibrationsumfirst=0 \ -reconstructionparameter ${RUNPARA} \ - -nopedestalsintimeslices -combine_pedestal_channels ${ADC} \ - >& ${SCRATCH}/${FILEN}.pedestal.log + -nopedestalsintimeslices -combine_pedestal_channels ${ADC} mv -f dst.root ${SCRATCH}/${FILEN}.pedestal.root cd ${CDIR} diff --git a/IPRgeneration/producePedestals.sh b/IPRgeneration/producePedestals.sh index 5e48d82..b4524c6 100755 --- a/IPRgeneration/producePedestals.sh +++ b/IPRgeneration/producePedestals.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Produce pedestal files from NSB simulations using sim_telarray # # script prepared for prod5/prod6 simulations @@ -41,56 +41,83 @@ SCRATCH="." if [[ $PROD == "PROD5" ]]; then TELTYPES=( LST MST-FlashCam MST-NectarCam SST ) - TELTYPES=( SST ) else TELTYPES=( LST MST-FlashCam MST-NectarCam SST SCT MAGIC ) - SITE=( CTA_NORTH CTA_SOUTH CTA_NORTH CTA_SOUTH CTA_SOUTH CTA_NORTH ) fi -# dedicated scratch directory -SCRATCH=${SCRATCH}/${PROD}/ze${ZE}deg-${MOONSET} -mkdir -p ${SCRATCH} -echo "Writing all data products to ${SCRATCH}" -echo "(use this directory as input for all following analysis steps)" for i in "${!TELTYPES[@]}" do T="${TELTYPES[$i]}" - echo "Simulating $T for ${PROD}" - outputFile="${SCRATCH}/pedestals-${T}${MOON}-ze-${ZE}-1k.simtel.gz" - rm -f $outputFile + for SITE in CTA_NORTH CTA_SOUTH + do + # dedicated scratch directory + SCRATCH=./${PROD}/${SITE}-ze${ZE}deg-${MOONSET} + mkdir -p ${SCRATCH} + echo "Writing all data products to ${SCRATCH}" + echo "(use this directory as input for all following analysis steps)" + if [[ $T == "SST" ]] && [[ $SITE == "CTA_NORTH" ]] + then + continue + fi + if [[ $T == "MAGIC" ]] && [[ $SITE == "CTA_SOUTH" ]] + then + continue + fi + echo "Simulating $T for ${PROD} at $SITE" - if [[ $PROD == "PROD5" ]] && [[ $T == "SST" ]]; then - CFG="${SIM_TELARRAY_PATH}/cfg/CTA/CTA-${PROD}-${T}.cfg" - elif [[ $PROD == "PROD5" ]]; then - CFG="${SIM_TELARRAY_PATH}/cfg/CTA/CTA-PROD4-${T}.cfg" - else - CFG="${SIM_TELARRAY_PATH}/cfg/CTA/CTA-${PROD}-${T}.cfg" - fi - INCLUDEOPT="" - if [[ $T == "MAGIC" ]]; then - CFG="${SIM_TELARRAY_PATH}/cfg/MAGIC/MAGIC1.cfg" - INCLUDEOPT="-I${SIM_TELARRAY_PATH}/cfg/MAGIC" - fi + outputFile="${SCRATCH}/pedestals-${SITE}-${T}${MOON}-ze-${ZE}-1k" + rm -f "${outputFile}" "${outputFile}.simtel.gz" "${outputFile}.log" - if [[ $PROD == "PROD5" ]]; then - SITEOPT="" - else - SITEOPT="-D\"${SITE[$i]}\"" - fi + if [[ $PROD == "PROD5" ]] && [[ $T == "SST" ]]; then + CFG="${SIM_TELARRAY_PATH}/cfg/CTA/CTA-${PROD}-${T}.cfg" + elif [[ $PROD == "PROD5" ]]; then + CFG="${SIM_TELARRAY_PATH}/cfg/CTA/CTA-PROD4-${T}.cfg" + else + CFG="${SIM_TELARRAY_PATH}/cfg/CTA/CTA-${PROD}-${T}.cfg" + fi - ${SIM_TELARRAY_PATH}/bin/sim_telarray -c ${CFG} \ - -I${SIM_TELARRAY_PATH}/cfg/CTA -I${SIM_TELARRAY_PATH}/cfg/common \ - -I${SIM_TELARRAY_PATH}/cfg/hess ${INCLUDEOPT} ${SITEOPT} -C Altitude=2150 -C iobuf_maximum=1000000000 \ - ${MOONOPT} -DNUM_TELESCOPES=1 -C maximum_telescopes=1 \ - -C atmospheric_transmission=atm_trans_2150_1_10_0_0_2150.dat \ - -DNSB_AUTOSCALE -C telescope_theta=${ZENITH} -C telescope_phi=180 \ - -C pedestal_events=1000 \ - -C output_file=$outputFile \ - ${CDIR}/dummy1.corsika.gz >& ${SCRATCH}/sim_telarray${MOON}-${T}-ze-${ZE}.log & + INCLUDEOPT="" + if [[ $T == "MAGIC" ]]; then + CFG="${SIM_TELARRAY_PATH}/cfg/MAGIC/MAGIC1.cfg" + INCLUDEOPT="-I${SIM_TELARRAY_PATH}/cfg/MAGIC" + fi + if [[ $PROD == "PROD5" ]]; then + SITEOPT="" + ALT="-C Altitude=2150" + ATMO="-C atmospheric_transmission=atm_trans_2150_1_10_0_0_2150.dat" + TEL="" + elif [[ $PROD == "PROD6" ]]; then + SITEOPT="-D${SITE}" + if [[ $SITE == "CTA_NORTH" ]]; then + ALT="-C Altitude=2156" + ATMO="-C atmospheric_transmission=atm_trans_2156_1_3_2_0_0_0.1_0.1.dat" + TEL="" + # North: LST1 is different - generate pedestals for 2/3/4 and use that also for LST1 + if [[ $T == "LST" ]]; then + TEL="-DLST2" + fi + else + ALT="-C Altitude=2147" + ATMO="-C atmospheric_transmission=atm_trans_2147_1_10_2_0_2147.dat" + TEL="" + fi + fi + + ${SIM_TELARRAY_PATH}/bin/sim_telarray -c ${CFG} \ + -I${SIM_TELARRAY_PATH}/cfg/CTA -I${SIM_TELARRAY_PATH}/cfg/common \ + -I${SIM_TELARRAY_PATH}/cfg/hess ${INCLUDEOPT} ${SITEOPT} ${ALT} -C iobuf_maximum=1000000000 \ + ${MOONOPT} -DNUM_TELESCOPES=1 -C maximum_telescopes=1 ${TEL} \ + ${ATMO} \ + -DNSB_AUTOSCALE -C telescope_theta=${ZENITH} -C telescope_phi=180 \ + -C pedestal_events=1000 \ + -C output_file=${outputFile}.simtel.gz \ + ${CDIR}/dummy1.corsika.gz >& ${outputFile}.log + + done done # minor cleanup diff --git a/IPRgeneration/run_prod6_all.sh b/IPRgeneration/run_prod6_all.sh index 9260ea7..2c9d112 100755 --- a/IPRgeneration/run_prod6_all.sh +++ b/IPRgeneration/run_prod6_all.sh @@ -1,30 +1,36 @@ #!/bin/sh -# Run analysis for all zenith angles and light levels +# Run analysis for all run modes, site, zenith angles and light levels # -if [ $# -lt 1 ]; then - echo " - ./run_all.sh - " - exit -fi -RUNMODE=${1} - -for Z in 20.0 40.0 52.0 60.0 +# for RUNMODE in producePedestals convertToDST produceIPRGraphs mergeIPRGraphs +for RUNMODE in convertToDST produceIPRGraphs mergeIPRGraphs do - for M in dark half + for Z in 20.0 40.0 52.0 60.0 do - ZE=${Z%.*} - if [[ $RUNMODE == "producePedestals" ]]; then - ./producePedestals.sh PROD6 "${Z}" ${M} >& log_pedestals_${Z}_${M} & - elif [[ $RUNMODE == "convertToDST" ]]; then - ./convertToDST.sh PROD6/ze${ZE}deg-${M} >& log_convert_${Z}_${M} & - elif [[ $RUNMODE == "produceIPRGraphs" ]]; then - ./produceIPRGraphs.sh PROD6/ze${ZE}deg-${M} >& log_ipr_${Z}_${M} & - elif [[ $RUNMODE == "mergeIPRGraphs" ]]; then - ./mergeIPRGraphs.sh PROD6/ze${ZE}deg-${M} prod6-${M}-ze${ZE}deg-IPR.root - else - echo "Unknown run mode, should be one of producePedestals/produceIPRGraphs/mergeIPRGraphs" - fi + for M in dark half + do + ZE=${Z%.*} + echo $RUNMODE $M $Z $ZE + if [ "$RUNMODE" = "producePedestals" ]; then + ./producePedestals.sh PROD6 "${Z}" "${M}" > "pedestals_${Z}_${M}.log" 2>&1 + else + for SITE in CTA_NORTH CTA_SOUTH + do + DATADIR="PROD6/${SITE}-ze${ZE}deg-${M}" + if [ "$RUNMODE" = "convertToDST" ]; then + ./convertToDST.sh "${DATADIR}" > "${DATADIR}/convert_${SITE}_${Z}_${M}.log" 2>&1 + elif [ "$RUNMODE" = "produceIPRGraphs" ]; then + ./produceIPRGraphs.sh "${DATADIR}" > "${DATADIR}/ipr_${SITE}_${Z}_${M}.log" 2>&1 + elif [ "$RUNMODE" = "mergeIPRGraphs" ]; then + if [ "$SITE" = "CTA_NORTH" ]; then + S="north" + else + S="south" + fi + ./mergeIPRGraphs.sh "${DATADIR}" "prod6-${S}-${M}-ze${ZE}deg-IPR.root" + fi + done + fi + done done done diff --git a/IPRgeneration/setupPackage.sh b/IPRgeneration/setupPackage.sh index ed2c145..359af7b 100644 --- a/IPRgeneration/setupPackage.sh +++ b/IPRgeneration/setupPackage.sh @@ -1,13 +1,13 @@ #!/bin/bash # make sure zstd is available -source /cvmfs/cta.in2p3.fr/software/centos7/gcc48_default/tools/zstd/v1.4.5/setupPackage.sh +source /cvmfs/sw.cta-observatory.org/software/centos7/gcc48_noOpt/tools/zstd/v1.5.2/setupPackage.sh # use our version of gsl (for random numbers in sim_telarray) - reset LD_LIBRARY_PATH -source /cvmfs/cta.in2p3.fr/software/centos7/gcc48_default/tools/gsl/v1.15/setupPackage.sh +source /cvmfs/sw.cta-observatory.org/software/centos7/gcc48_noOpt/tools/gsl/v1.15/setupPackage.sh # Main directory # USER: Change this path to the grid package you want to use! # TODO -export CTA_PROD6_PATH="/cvmfs/cta.in2p3.fr/software/centos7/gcc83_noOpt/simulations/corsika_simtelarray/2022-08-03-sc/" +export CTA_PROD6_PATH="/cvmfs/sw.cta-observatory.org/software/centos7/gcc83_noOpt/simulations/corsika_simtelarray/2024-02-05/" export CTA_PATH=${CTA_PROD6_PATH} # CORSIKA and SIMTEL @@ -22,4 +22,3 @@ export SIMTEL_CONFIG_PREPROCESSOR="${SIM_TELARRAY_PATH}/bin/pfp -v -I." export MCDATA_PATH=${PWD}/Data export CORSIKA_DATA=${MCDATA_PATH}/corsika export SIM_TELARRAY_DATA=${MCDATA_PATH}/sim_telarray - diff --git a/LICENSE b/LICENSE index d30be47..c3a152b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2019, Eventdisplay Developers +Copyright (c) 2019-2026 Eventdisplay Developers (Gernot Maier, DESY) All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/NIM-North-sub.dat b/NIM-North-sub.dat new file mode 100644 index 0000000..1cfc36b --- /dev/null +++ b/NIM-North-sub.dat @@ -0,0 +1,3 @@ +2 2 2 2 +3 3 3 3 +4 4 4 4 diff --git a/NIM-South-sub.dat b/NIM-South-sub.dat index dce5267..10df77a 100644 --- a/NIM-South-sub.dat +++ b/NIM-South-sub.dat @@ -1,6 +1,5 @@ -2 1 2 2 -3 1 3 3 -4 1 4 4 2 2 2 2 3 3 3 3 4 4 4 4 +5 5 5 5 +6 6 6 6 diff --git a/NIM-South.dat b/NIM-South.dat index 3f4e29c..05c207f 100644 --- a/NIM-South.dat +++ b/NIM-South.dat @@ -1,9 +1,3 @@ -2 1 2 2 -2 1 3 2 -2 1 3 3 -2 1 4 2 -2 1 4 3 -2 1 5 3 2 2 2 2 2 2 3 2 2 2 3 3 @@ -11,15 +5,11 @@ 2 3 3 2 2 3 4 3 2 3 5 3 -3 1 3 3 -3 1 4 3 -3 1 4 4 -3 1 5 3 +2 4 4 3 +2 4 5 3 3 3 3 3 3 3 4 3 3 3 5 3 3 4 4 4 -4 1 4 4 -4 1 5 4 4 4 4 4 4 4 5 4 diff --git a/README.md b/README.md index c1d4df3..bbe40d4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # Eventdisplay Analysis Scripts for CTA [![DOI](https://zenodo.org/badge/221257176.svg)](https://zenodo.org/badge/latestdoi/221257176) +[![License](https://img.shields.io/badge/License-BSD_3--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/4d356e6133ee4548ba8e4650c25c3a03)](https://app.codacy.com/gh/Eventdisplay/Eventdisplay_AnalysisScripts_CTA?utm_source=github.com&utm_medium=referral&utm_content=Eventdisplay/Eventdisplay_AnalysisScripts_CTA&utm_campaign=Badge_Grade) Run scripts for CTA. Allows to efficiently run all analysis steps starting from the raw MC files to sensitivities using Eventdisplay. @@ -12,15 +13,18 @@ Run scripts for CTA. Allows to efficiently run all analysis steps starting from All scripts expect the following setup for directories. Any deviation from this will break the scripts. Main directory for all analysis products: -``` + +```console ${CTA_USER_DATA_DIR}/analysis/AnalysisData/${DSET} ``` -Main directory for all software and auxilaury files plus all log files from the analysis: -``` + +Main directory for all software and auxiliary files plus all log files from the analysis: + +```console ${CTA_USER_WORK_DIR}/analysis/AnalysisData/${DSET} ``` -$DSET is the name of the data set to be analysed, e.g. *prod3b-paranal20deg_SCT-sq08-LL*. +$DSET is the name of the data set to be analysed, e.g. *prod3b-paranal20deg_SCT-sq08-LL*. The following subdirectories are expected: @@ -35,7 +39,7 @@ Requires root to be installed and *$ROOTSYS* to be set (use ROOT versions >=6.20 Install *Eventdisplay_AnalysisScripts_CTA* from github and select the corresponding branch to work with (e.g., prod5-v08): -``` +```console git clone https://github.com/Eventdisplay/Eventdisplay_AnalysisScripts_CTA.git cd Eventdisplay_AnalysisScripts_CTA git checkout prod5-v08 @@ -43,12 +47,13 @@ git checkout prod5-v08 Install and compile eventdisplay (expect all Eventdisplay repositories with same branch names): -``` +```console cd install ./prepareProductionBinaries.sh prod5-v08 ``` This installs the following packages: + - hessioxx - sofa - Eventdisplay analysis files @@ -60,7 +65,7 @@ Note that only the data set name needs to be given here (e.g., *prod3b-paranal20 Before running any scripts, the correct paths for all executables and libraries needs to be set. -``` +```console cd ${CTA_USER_DATA_DIR}/analysis/AnalysisData/Eventdisplay_AnalysisScripts_CTA source ./setSoftwarePaths.sh ${DSET} ``` @@ -83,8 +88,9 @@ In this directory, the list of files for the different particle types are: - *electron.list* - *proton.list* -e.g., -``` +e.g., + +```console /lustre/fs24/group/cta/prod3b/CTA-ProdX-Download-DESY/Prod3b_Paranal_20deg_HB9//electron/electron_20deg_0deg_run945___cta-prod3-sct_desert-2150m-Paranal-SCT.simtel.gz /lustre/fs24/group/cta/prod3b/CTA-ProdX-Download-DESY/Prod3b_Paranal_20deg_HB9//electron/electron_20deg_0deg_run2962___cta-prod3-sct_desert-2150m-Paranal-SCT.simtel.gz /lustre/fs24/group/cta/prod3b/CTA-ProdX-Download-DESY/Prod3b_Paranal_20deg_HB9//electron/electron_20deg_180deg_run2634___cta-prod3-sct_desert-2150m-Paranal-SCT.simtel.gz @@ -94,30 +100,35 @@ e.g., ## Running the analysis -Central execution scripts are [CTA.mainRunScriptsReduced.sh](CTA.mainRunScriptsReduced.sh) and [CTA.runAnalysis.sh](CTA.runAnalysis.sh). +Central execution scripts are [CTA.mainRunScriptsReduced.sh](CTA.mainRunScriptsReduced.sh) and [CTA.runAnalysis.sh](CTA.runAnalysis.sh). In the best case, no changes are required to these scripts. e.g., to run the first step of the analysis with evndisp, do -``` + +```console ./CTA.mainRunScriptsReduced.sh prod6-Paranal-20deg-dark-sq10-LL EVNDISP ``` + (or set any other data set, as outlined in ./CTA.mainRunScriptsReduced.sh) To submit script, check the log file directory printed to the screen (the directory with the UUID) and then run: -``` + +```console ./utilities/submit_scripts_to_htcondor.sh submit ``` -Try this first without the submit argument and check the `submit.txt` file. + +Try this first without the submit argument and check the `submit.txt` file. This assumes the HTCondor job submission system. Gridengine will work after changing the variable `SUBC` from `condor` to `qsub` in the scripts `analysis/*sub*`. The script `./CTA.mainRunScriptsReduced.sh` does the following: -- read a list of arrays from a subdirectory specificed for your data set in ./CTA.runAnalysis.sh (e.g., prod3b/subArray.prod3b.South-SCT.list) +- read a list of arrays from a subdirectory specified for your data set in ./CTA.runAnalysis.sh (e.g., prod3b/subArray.prod3b.South-SCT.list) - execute scripts to submit jobs from the ./analysis directory - all output products are written to *${CTA_USER_DATA_DIR}/analysis/AnalysisData/${DSET}* - for all telescope multiplicity dependent analysis, this is done for the multiplicities defined in `NIM-South.dat` and `NIM-South-sub.dat`. On the list of arrays: + - arrays are defined by the telescope numbering as defined during the simulations. - array layout definition files can be found in *$$CTA_EVNDISP_AUX_DIR}/DetectorGeometry* @@ -142,30 +153,30 @@ Testing the results for consistent is important; please look into the testProduc Set links to EVNDISP production directory: -``` +```console cd utitilities ./linkEvndispProduction.sh prod5-LaPalma-20deg-EVNDISP prod5-LaPalma-20deg-v02-LL ../prod5/subArray.prod5.North-noHyper-N.list ``` Set links for hyper array analysis: -``` +```console cd utiltities ./prepareHyperProduction.sh prod5-LaPalma-20deg-v01-LL prod5-LaPalma-20deg-h01-LL ../prod5/subArray.prod5.North-noHyper.list ``` Count number of files in production directories: -``` +```console cd utilities ./countFilesinProduction.sh prod5-LaPalma-20deg-EVNDISP ../prod5/subArray.prod5.North-noHyper.list EVNDISP ``` -e.g., cross checks that number of EVNDISP files is correct +e.g., cross checks that number of EVNDISP files is correct -## Licence +## License -License: BSD-3 (see LICENCE file) +License: BSD-3 (see LICENSE file) ## Contact diff --git a/analysis/CTA.DISPTRAINING.qsub_analyse.sh b/analysis/CTA.DISPTRAINING.qsub_analyse.sh index 31be991..673fa13 100755 --- a/analysis/CTA.DISPTRAINING.qsub_analyse.sh +++ b/analysis/CTA.DISPTRAINING.qsub_analyse.sh @@ -15,6 +15,8 @@ TMVAO=TTT DSET="DATASET" ARRAY=AAA QC="QQQQ" +# WEIGHT="MCe0*MCe0/(1.-loss)/(1.-loss)/(1.-loss)/(1.-loss)/(1.-loss)/(1.-loss)" +WEIGHT="1." # set the right observatory (environmental variables) source $EVNDISPSYS/setObservatory.sh CTA @@ -67,7 +69,8 @@ $EVNDISPSYS/bin/trainTMVAforAngularReconstruction $TLIST \ ${TMVAO} \ ${ADIR} \ "" \ - ${QC} \ + "${QC}" \ + "${WEIGHT}" \ 0 > $ODIR/${BDT}-${TTYPE}.training.log 2>&1 ######################################### diff --git a/analysis/CTA.DISPTRAINING.sub_analyse.sh b/analysis/CTA.DISPTRAINING.sub_analyse.sh index d6580c0..dfa4394 100755 --- a/analysis/CTA.DISPTRAINING.sub_analyse.sh +++ b/analysis/CTA.DISPTRAINING.sub_analyse.sh @@ -22,10 +22,10 @@ h_cpu="47:29:00" h_vmem="24000M" tmpdir_size="1G" -if [ $# -lt 5 ] +if [ $# -lt 7 ] then echo - echo "CTA.DISPTRAINING_sub_analyse.sh [scaling] [qsub options (optional)] [job_dir]" + echo "CTA.DISPTRAINING_sub_analyse.sh [qsub options] [job_dir]" echo "" echo " e.g. cta-ultra3, ISDC3700m, ... " echo " training results will be written to this directory (full path)" @@ -33,6 +33,7 @@ then echo " layout name with telescope type ID and scaling (e.g. S.3HB1)" echo " file name of list of TMVA parameter file" echo " layout scaling (e.g. 5); give 99 to ignore scaling" + echo " DISP quality-cut parameter file" echo echo " (note 1: hardwired telescope types in this script)" echo " (note 2: disp core training switched off)" @@ -49,16 +50,17 @@ RECID=$3 ARRAY=$4 TMVAP=$5 SCALING=999 -if [ -n $6 ] +if [ -n "$6" ] then SCALING=$6 fi -TMVAQC="" -if [ -n $8 ] -then - TMVAQC="$7" +TMVAQC="$7" +if [ ! -r "$TMVAQC" ]; then + echo "Error: DISP quality-cut parameter file not found: $TMVAQC" >&2 + exit 1 fi -if [ -n $8 ] +QSUBOPT="" +if [ -n "$8" ] then QSUBOPT="$8" fi @@ -88,7 +90,7 @@ EVNDISP="EVNDISP" DATE=`date +"%y%m%d"` QLOG=$CTA_USER_LOG_DIR/$DATE/DISPTRAINING/ SHELLDIR=$CTA_USER_LOG_DIR/$DATE/DISPTRAINING/ -if [ -n ${9} ]; then +if [ -n "${9}" ]; then QLOG=${9} SHELLDIR=${QLOG} fi @@ -149,7 +151,7 @@ fi if [[ $TMVAP == *"MLP"* ]]; then declare -a MLPLIST=( "MLPDisp" "MLPDispEnergy" "MLPDispError" "MLPDispCore" ) else - declare -a MLPLIST=( "BDTDisp" "BDTDispEnergy" "BDTDispError" "BDTDispCore" "BDTDispPhi" ) + declare -a MLPLIST=( "BDTDisp" "BDTDispEnergy" "BDTDispError" "BDTDispCore" "BDTDispPhi" "BDTDispSign" ) fi ######################################### @@ -168,12 +170,9 @@ do echo $QC let "NSTEP = $NSTEP + 1" - OFFDIR=${ODIR}.T${NSTEP} - OFFDIR=${ODIR}.E${NSTEP} - # removed cross and tgrad - OFFDIR=${ODIR}.S${NSTEP} - # tgrad^2 to trad - OFFDIR=${ODIR}.R${NSTEP} + # output directory (match CTA.MSCW_ENERGY.qsub_analyse_MC.sh) + # e.g. J1 + OFFDIR=${ODIR}.J${NSTEP} #################### # output directory TDIR="${OFFDIR}/${MLP}/${MCAZ}/" @@ -188,42 +187,15 @@ do echo "==========================================================================" #################### - # input file list - rm -f $SHELLDIR/tempList.list - find $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/${EVNDISP}/gamma_cone/ -name "*[_,.]${MCAZ}*.root" > $SHELLDIR/tempList.list - NFIL=`wc -l $SHELLDIR/tempList.list | awk '{print $1}'` - echo "Total number of files available: $NFIL" - # only use NN% of all evndisp files for training - # (for LSTs: use more, as there are less telescopes) - # South: 10% - # North: 20% - k=`expr 0.2*$NFIL | bc` - if [[ $DSET == *"LaPalma"* ]] - then - k=$(echo $NFIL | awk '{printf "%d\n",$1*0.25}') - elif [[ $DSET == *"SCT"* ]] - then - k=$(echo $NFIL | awk '{printf "%d\n",$1*0.30}') - else - k=$(echo $NFIL | awk '{printf "%d\n",$1*0.10}') - fi - # SV1 arrays: mix directories by hand!! - if [[ $ARRAY == *"SV1"* ]] - then - k=$(echo $NFIL) - fi - if [[ $ARRAY == *"DISP"* ]] - then - k=$(echo $NFIL | awk '{printf "%d\n",$1*0.50}') - fi + # input file list (based on lists generated by separateDispTrainingEvndispFiles) TLIST="$SHELLDIR/EDISP-$DSET-$ARRAY-$SCALING-$MCAZ-$TELTYPE-$MLP-$NSTEP.list" - rm -f $TLIST - shuf -n $k $SHELLDIR/tempList.list > $TLIST - echo "List of $k input files for training: $TLIST" + cp -f $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/EVNDISP.TRAIN/gamma_cone_${MCAZ}.list ${TLIST} + shuf ${TLIST} -o ${TLIST} + echo "List of $(wc -l ${TLIST}) input files for training: $TLIST" #################### # prepare run scripts - FNAM="$SHELLDIR/EDISP-$ARRAY-$SCALING-$MCAZ-$TELTYPE-$MLP-$NSTEP" + FNAM="$SHELLDIR/EDISP-$ARRAY-$DSET-$SCALING-$MCAZ-$TELTYPE-$MLP-$NSTEP" cp $FSCRIPT.sh $FNAM.sh sed -i -e "s|OFILE|$TDIR|" \ diff --git a/analysis/CTA.EFFAREA.qsub_analyse_list.sh b/analysis/CTA.EFFAREA.qsub_analyse_list.sh index 6e2fc81..ea181ae 100644 --- a/analysis/CTA.EFFAREA.qsub_analyse_list.sh +++ b/analysis/CTA.EFFAREA.qsub_analyse_list.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # calculate effective areas and instrument response functions for CTA # @@ -28,6 +28,12 @@ MCAZ="PPPMCAZ" PARTID="PARTIDNOTSET" ###################################################################### +if [[ ! -n "$TMPDIR" ]]; +then + TMPDIR="$CTA_USER_DATA_DIR/tmp" + mkdir "$TMPDIR" +fi + # Choose PARTICLE type from job id for SGE qsub system re='^[0-9]+$' if ! [[ $PARTID =~ $re ]] ; then @@ -73,7 +79,9 @@ then DL2FILLING=`grep DL2 $ANAPAR | awk {'print $2'}` fi -NIMAGESMIN=`grep NIMAGESMIN $ANAPAR | awk {'print $2'}` +NIMAGESMIN=$(grep NIMAGESMIN "$ANAPAR" | awk '{print $2}') +# multiplicity dependent model selection +XGBMINTEL=23 # get telescope type dependent cuts NCUTLST=`grep NLST $ANAPAR | awk {'print $2'}` NCUTMST=`grep NMST $ANAPAR | awk {'print $2'}` @@ -125,12 +133,17 @@ if [ -z $EFFAREABASEDIR ] then EFFAREABASEDIR=$EFFAREADIR fi -# see if strict separation of training/testing events if possible -# (mscw files would be in a directory ....EFF) -if [ -e ${PRODBASEDIR}/$ARRAY/${ANADIR}.EFFAREA.MCAZ${MCAZ} ] +# Require the explicitly prepared analysis partition. Falling back to ANADIR +# would allow files used for machine-learning training into the final IRFs. +SPLITANADIR=${ANADIR}.EFFAREA.MCAZ${MCAZ} +if [ ! -d "${PRODBASEDIR}/$ARRAY/${SPLITANADIR}" ] then - ANADIR=${ANADIR}.EFFAREA.MCAZ${MCAZ} + echo "Error: analysis/test data directory not found:" >&2 + echo "${PRODBASEDIR}/$ARRAY/${SPLITANADIR}" >&2 + echo "Run CTA.prepareAnalysis_no_sub.sh before the effective-area analysis." >&2 + exit 1 fi +ANADIR=${SPLITANADIR} # observation time OBSTIME=`grep OBSERVINGTIME_H $ANAPAR | awk {'print $2'}` @@ -505,6 +518,7 @@ do echo "* FILLINGMODE 3" >> $MSCF fi # fill IRFs only + echo "* RECONSTRUCTIONTYPE XGBSTEREO ${XGBMINTEL}" >> $MSCF echo "* ENERGYRECONSTRUCTIONMETHOD 1" >> $MSCF echo "* ENERGYAXISBINS 60 -2. 4." >> $MSCF echo "* ENERGYAXISBINHISTOS 25 -1.9 3.1" >> $MSCF @@ -585,10 +599,19 @@ do ############################## # run effective area code - ${EVNDISPSYS}/bin/makeEffectiveArea $MSCF $OFIX.root > $OLOG.log + if ! ${EVNDISPSYS}/bin/makeEffectiveArea $MSCF $OFIX.root > $OLOG.log; then + echo "makeEffectiveArea failed for $OFIX.root" >&2 + touch $OLOG.SMALLFILE + continue + fi # cross check if run was successfull # (expect simply > 800k) + if [[ ! -e $OFIX.root ]]; then + echo "Effective-area output not found: $OFIX.root" >&2 + touch $OLOG.SMALLFILE + continue + fi DS=$(du -k $OFIX.root | cut -f 1) if [[ ${DS} -le $minimumsize ]]; then touch $OLOG.SMALLFILE diff --git a/analysis/CTA.EFFAREA.sub_analyse_list.sh b/analysis/CTA.EFFAREA.sub_analyse_list.sh index 6dab8a1..4a17975 100755 --- a/analysis/CTA.EFFAREA.sub_analyse_list.sh +++ b/analysis/CTA.EFFAREA.sub_analyse_list.sh @@ -9,10 +9,10 @@ set -e SUBC="condor" h_cpu="11:29:00" -h_vmem="40000M" +h_vmem="24000M" tmpdir_size="15G" -if [ $# -lt 6 ] +if [ $# -lt 6 ] then echo "" echo "./CTA.EFFAREA.sub_analyse_list.sh [filling mode] [qsub options] [job_dir] [direction (e.g. _180deg)]" @@ -24,7 +24,7 @@ then echo " template for gamma/hadron cut file" echo " (suffix must be .gamma/.CRbck ; this will be added by this script)" echo " examples can be found in $CTA_EVNDISP_AUX_DIR/GammaHadronCutFiles" - echo + echo echo "" echo " file with analysis parameter" echo " examples can be found in $CTA_EVNDISP_AUX_DIR/ParameterFiles/" @@ -64,9 +64,9 @@ if [ -n $7 ] then QSUBOPT="$7" fi -QSUBOPT=${QSUBOPT//_X_/ } -QSUBOPT=${QSUBOPT//_M_/-} -QSUBOPT=${QSUBOPT//\"/} +QSUBOPT=${QSUBOPT//_X_/ } +QSUBOPT=${QSUBOPT//_M_/-} +QSUBOPT=${QSUBOPT//\"/} # set directories source ../setSoftwarePaths.sh $DSET @@ -76,7 +76,7 @@ CDIR="$CTA_EVNDISP_AUX_DIR/GammaHadronCutFiles/" # read values from parameter file if [ ! -e $ANAPAR ] then - echo "error: analysis parameter file not found: $ANAPAR" + echo "error: analysis parameter file not found: $ANAPAR" exit fi echo "reading analysis parameter from $ANAPAR" @@ -106,7 +106,7 @@ mkdir -p $QSHELLDIR mkdir -p $QDIR ################################################# -# set particle types +# set particle types # (don't expect to have cone for all data sets) if [ $GMOD = "0" ] || [ $GMOD = "3" ] then @@ -159,7 +159,6 @@ do chmod u+x $QSHELLDIR/$FNAM.sh - echo $QSHELLDIR/$FNAM.sh ########################################### # submit the job script @@ -173,6 +172,7 @@ do sed -e "s|PARTIDNOTSET|$PARTICLEID|" "${QSHELLDIR}/${FNAM}.sh" > "${QSHELLDIR}/${FNAM}-${PARTICLEID}.sh" chmod u+x "${QSHELLDIR}/${FNAM}-${PARTICLEID}.sh" ./condorSubmission.sh "${QSHELLDIR}/${FNAM}-${PARTICLEID}.sh" $h_vmem $tmpdir_size + echo "$QSHELLDIR/${FNAM}-${PARTICLEID}.sh" done rm -f "$QSHELLDIR/$FNAM.sh" fi @@ -187,6 +187,7 @@ do sed -e "s|PARTIDNOTSET|$PARTICLEID|" "${QSHELLDIR}/${FNAM}.sh" > "${QSHELLDIR}/${FNAM}-${PARTICLEID}.sh" chmod u+x "${QSHELLDIR}/${FNAM}-${PARTICLEID}.sh" ./condorSubmission.sh "${QSHELLDIR}/${FNAM}-${PARTICLEID}.sh" $h_vmem $tmpdir_size + echo "$QSHELLDIR/${FNAM}-${PARTICLEID}.sh" done rm -f "$QSHELLDIR/$FNAM.sh" fi diff --git a/analysis/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.sh b/analysis/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.sh index 62e966c..04f25b2 100755 --- a/analysis/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.sh +++ b/analysis/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.sh @@ -127,8 +127,12 @@ do #################################################################### # execute converter - SIMFIL=`ls $TMPDIR/*.simtel.${EXTE}` - echo "TMPDIR FILES " $SIMFIL + SIMFIL="$TMPDIR/$(basename "$IFIL0")" + if [ ! -f "$SIMFIL" ]; then + echo "ERROR: copied simulation file not found: $SIMFIL" >&2 + exit 1 + fi + echo "TMPDIR FILE: $SIMFIL" if [[ $DSET == *"prod3"* ]] then if [[ $DSET == *"paranal"* ]] && [[ $DSET != *"prod3b"* ]] @@ -189,7 +193,7 @@ do then $EVNDISPSYS/bin/logFile convLog $TMPDIR/${RUNN}.root $TMPDIR/$OFIL.$N.convert.log fi - cp -v -f $TMPDIR/[0-9]*.root ${ODIR}/${RUNN}HD_${ILINE}_${MCAZ}deg.root + cp -v -f $TMPDIR/[0-9]*.root ${ODIR}/${RUNN}CTAO_${ILINE}_${MCAZ}deg.root else echo "No root files found!" if [ -e $TMPDIR/$OFIL.$N.convert.log ]; then diff --git a/analysis/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.sh b/analysis/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.sh index 118c76c..8e3c8c8 100755 --- a/analysis/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.sh +++ b/analysis/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.sh @@ -102,7 +102,9 @@ then elif [[ $DSET == *"prod5"* ]] then ARRAYCUTS="EVNDISP.prod5.reconstruction.runparameter" - if [[ $DSET == *"moon"* ]] || [[ $DSET == *"Moon"* ]] || [[ $DSET == *"NSB5x"* ]]; then + if [[ $DSET == *"fullmoon"* ]] || [[ $DSET == *"NSB30x"* ]]; then + PEDFIL="$CTA_EVNDISP_AUX_DIR/Calibration/prod5/prod5-fullmoon-IPR.root" + elif [[ $DSET == *"moon"* ]] || [[ $DSET == *"Moon"* ]] || [[ $DSET == *"NSB5x"* ]]; then PEDFIL="$CTA_EVNDISP_AUX_DIR/Calibration/prod5/prod5-halfmoon-IPR.root" else PEDFIL="$CTA_EVNDISP_AUX_DIR/Calibration/prod5/prod5-IPR.root" @@ -111,12 +113,16 @@ elif [[ $DSET == *"prod6"* ]] then ARRAYCUTS="EVNDISP.prod6.reconstruction.runparameter" ZE=$(echo $DSET | cut -d'-' -f3) + obs="north" + if [[ $DSET == *"Paranal"* ]]; then + obs="south" + fi if [[ $DSET == *"fullmoon"* ]]; then - PEDFIL="$CTA_EVNDISP_AUX_DIR/Calibration/prod6/prod6-full-ze${ZE}-IPR.root" + PEDFIL="$CTA_EVNDISP_AUX_DIR/Calibration/prod6/prod6-${obs}-full-ze${ZE}-IPR.root" elif [[ $DSET == *"moon"* ]] || [[ $DSET == *"Moon"* ]] || [[ $DSET == *"NSB5x"* ]]; then - PEDFIL="$CTA_EVNDISP_AUX_DIR/Calibration/prod6/prod6-half-ze${ZE}-IPR.root" + PEDFIL="$CTA_EVNDISP_AUX_DIR/Calibration/prod6/prod6-${obs}-half-ze${ZE}-IPR.root" else - PEDFIL="$CTA_EVNDISP_AUX_DIR/Calibration/prod6/prod6-dark-ze${ZE}-IPR.root" + PEDFIL="$CTA_EVNDISP_AUX_DIR/Calibration/prod6/prod6-${obs}-dark-ze${ZE}-IPR.root" fi else echo "error: unknown production in $DSET" @@ -203,7 +209,7 @@ do then DCACHEOPT=" -l cta_dcache=1 " fi - echo "$DCACHEOPT" + echo "DCACHEOPT $DCACHEOPT" if [[ $NRUN -ne 0 ]] then @@ -222,7 +228,7 @@ do done echo "submit script with $FNAM.sh" -echo "condor_submit $FNAM.sh.condor requirements='OpSysAndVer==\"AlmaLinux9\"'" +echo "condor_submit $FNAM.sh.condor" echo "writing queue log and error files to $QLOG" exit diff --git a/analysis/CTA.MSCW_ENERGY.qsub_analyse_MC.sh b/analysis/CTA.MSCW_ENERGY.qsub_analyse_MC.sh index d8deb59..fb64f30 100755 --- a/analysis/CTA.MSCW_ENERGY.qsub_analyse_MC.sh +++ b/analysis/CTA.MSCW_ENERGY.qsub_analyse_MC.sh @@ -2,7 +2,6 @@ # # script to analyse CTA MC files with lookup tables # -# TABFIL=TABLEFILE RECID=RECONSTRUCTIONID @@ -124,14 +123,18 @@ else exit fi +# number of telescopes +NTEL=$(grep -vE '^\s*#|^$' $LISFILE | wc -l) +echo "Number of telescopes in array: $NTEL" + ######################################### # options for simple stereo reconstruction MOPT="$MOPT -redo_stereo_reconstruction -sub_array_sim_telarray_counting $LISFILE" -if [[ $DSET == *"LaPalma"* ]]; then +if [[ $NTEL == "2" ]]; then MOPT="$MOPT -minangle_stereo_reconstruction=15." else - MOPT="$MOPT -minangle_stereo_reconstruction=10." + MOPT="$MOPT -minangle_stereo_reconstruction=5." fi # IMPORTANT: this must be the same or lower value as in dispBDT training if [[ $RECID == "1" ]]; then @@ -142,15 +145,14 @@ elif [[ $RECID == "2" ]]; then MOPT="$MOPT -maxdistfraction=0.75" else MOPT="$MOPT -maxloss=0.2 -minfui=0." - MOPT="$MOPT -maxdistfraction=0.80" fi ######################################### # disp reconstruction -# +# MVATYPE="BDT" # disp main directory name -DISPSUBDIR="DISPBDT/${MVATYPE}disp.${ARRAY}.R1" +DISPSUBDIR="DISPBDT/${MVATYPE}disp.${ARRAY}.J1" echo "CHECKING ${CTA_USER_DATA_DIR}/analysis/AnalysisData/${DSET}/${DISPSUBDIR/${ARRAY}/HYPERARRAY}" if [[ -d ${CTA_USER_DATA_DIR}/analysis/AnalysisData/${DSET}/${DISPSUBDIR/${ARRAY}/HYPERARRAY} ]]; then DISPSUBDIR="${DISPSUBDIR/${ARRAY}/HYPERARRAY}" @@ -160,9 +162,9 @@ else fi echo "DISPDIR $DISPSUBDIR" ######################################### -# unpack disp XML files for all telescope -# types to tmpdir -for ML in ${MVATYPE}Disp ${MVATYPE}DispError ${MVATYPE}DispEnergy +# unpack disp XML files for all telescope +# types to tmpdir (not all of them might be used) +for ML in ${MVATYPE}Disp ${MVATYPE}DispError ${MVATYPE}DispEnergy ${MVATYPE}DispCore ${MVATYPE}DispSign do MLDDIR="${CTA_USER_DATA_DIR}/analysis/AnalysisData/${DSET}/${DISPSUBDIR}/${ML}/${MCAZ}/" echo "Unpacking ${ML} from ${MLDDIR}" @@ -183,7 +185,7 @@ do # exit # fi fi - done + done done ######################################### @@ -196,13 +198,22 @@ MOPT="$MOPT -tmva_nimages_max_stereo_reconstruction=100 -tmva_filename_stereo_re DISPERRORDIR="${TMPDIR}/${MVATYPE}DispError/${MCAZ}/${MVATYPE}DispError_${MVATYPE}_" MOPT="$MOPT -tmva_filename_disperror_reconstruction $DISPERRORDIR -tmva_disperror_weight 50" +########################################################################################################## +# options for DISP method (direction sign) +DISPSIGNDIR="${TMPDIR}/${MVATYPE}DispSign/${MCAZ}/${MVATYPE}DispSign_${MVATYPE}_" +MOPT="$MOPT -tmva_filename_dispsign_reconstruction $DISPSIGNDIR" + ########################################################################################################## # options for DISP method (core) # (switch on for single-telescope analysis) -# DISPCOREDIR="${TMPDIR}/${MVATYPE}DispCore/${MCAZ}/${MVATYPE}DispCore_${MVATYPE}_" -# if [[ $ARRAY == *"SV1"* ]]; then -# MOPT="$MOPT -tmva_filename_core_reconstruction $DISPCOREDIR" -# fi +DISPCOREDIR="${TMPDIR}/${MVATYPE}DispCore/${MCAZ}/${MVATYPE}DispCore_${MVATYPE}_" +if [[ $ARRAY == *"1LSTs"* ]] || [[ $ARRAY == *"01MSTs"* ]]; then + MOPT="$MOPT -tmva_filename_core_reconstruction $DISPCOREDIR" +fi +# single telescope multiplicity (note expectation on array naming) +if [[ $ARRAY == *"1LSTs00MSTs"* ]] || [[ $ARRAY == *"0LSTs01MSTs"* ]]; then + MINIMAGE=1 +fi ########################################################################################################## # options for DISP method (energy) @@ -251,10 +262,9 @@ fi if [ -e $TMPDIR/iList.list ] && [ -e $TMPDIR/$TFIL.root ] then $EVNDISPSYS/bin/logFile mscwTableList $TMPDIR/$TFIL.root $TMPDIR/iList.list - rm -f $TMPDIR/iList.list rm -f $IFIL fi - + mv -f -v $TMPDIR/$TFIL.root $ODIR/ # sleep diff --git a/analysis/CTA.MSCW_ENERGY.qsub_make_tables.sh b/analysis/CTA.MSCW_ENERGY.qsub_make_tables.sh index 5b06c3c..3978305 100755 --- a/analysis/CTA.MSCW_ENERGY.qsub_make_tables.sh +++ b/analysis/CTA.MSCW_ENERGY.qsub_make_tables.sh @@ -71,7 +71,7 @@ then LISFILE=$CTA_EVNDISP_AUX_DIR/DetectorGeometry/CTA.prod6${DARR}.lis elif [[ $DSET == *"prod3"* ]] then - if [[ $DSET == *"paranal"* ]] && [[ $DSET != *"prod3b"* ]] + if [[ $DSET == *"paranal"* ]] && [[ $DSET != *"prod3b"* ]] then DARR=${ARRAY%??} LISFILE=$CTA_EVNDISP_AUX_DIR/DetectorGeometry/CTA.prod3${DARR}.lis @@ -100,7 +100,7 @@ fi if [[ $DSET == *"prod3b"* ]] && [[ $DSET != *"SCT"* ]] then MOPT="$MOPT -teltypeweightfile $CTA_EVNDISP_AUX_DIR/DetectorGeometry/CTA.prod3b.TelescopeWeights.dat" -fi +fi echo $MOPT ######################################### diff --git a/analysis/CTA.MSCW_ENERGY.sub_analyse_MC.sh b/analysis/CTA.MSCW_ENERGY.sub_analyse_MC.sh index 44650d8..1945c14 100755 --- a/analysis/CTA.MSCW_ENERGY.sub_analyse_MC.sh +++ b/analysis/CTA.MSCW_ENERGY.sub_analyse_MC.sh @@ -1,11 +1,10 @@ -#!/bin/sh +#!/bin/bash # # script to analyse CTA MC files with lookup tables # -# SUBC="condor" h_cpu="11:29:00" -h_vmem="4000M" +h_vmem="8000M" tmpdir_size="12G" if [ $# -lt 7 ] @@ -43,12 +42,12 @@ fi MCAZ=$7 MINIMAGE=$8 QSUBOPT="" -if [ -n $9 ] +if [ -n "$9" ] then QSUBOPT="$9" fi -QSUBOPT=${QSUBOPT//_X_/ } -QSUBOPT=${QSUBOPT//_M_/-} +QSUBOPT=${QSUBOPT//_X_/ } +QSUBOPT=${QSUBOPT//_M_/-} ######################################### # software paths @@ -66,7 +65,7 @@ fi DATE=`date +"%y%m%d"` QLOG=$CTA_USER_LOG_DIR/$DATE/ANALYSETABLES/ SHELLDIR="$QLOG/$ANADIR/" -if [ -n ${10} ]; then +if [ -n "${10}" ]; then QLOG=${10} SHELLDIR=${QLOG} fi @@ -89,45 +88,43 @@ for SUBAR in $VARRAY do echo "STARTING ARRAY $SUBAR" -# output directory ODIR="${CTA_USER_DATA_DIR}/analysis/AnalysisData/${DSET}/${SUBAR}/${ANADIR}" mkdir -p ${ODIR} -######################################### -# loop over all particle types + ######################################### + # loop over all particle types for ((m = 0; m < $NPART; m++ )) do PART=${VPART[$m]} -# delete all old files (data and log files) for the particle type and azimuth angle - rm -f ${ODIR}/${PART}*ID${RECID}_${MCAZ}* + # delete all old files (data and log files) for the particle type and azimuth angle + rm -f ${ODIR}/${PART}*ID${RECID}_${MCAZ}* -# take $FILEN files and combine them into one mscw file - FILEN=125 - if [ $PART = "proton" ] - then - FILEN=500 - fi - -######################################### -# input files lists + # take $FILEN files and combine them into one mscw file + FILEN=125 + if [ $PART = "proton" ] + then + FILEN=500 + fi - TMPLIST=${ODIR}/$PART$NC"."$SUBAR"_ID"${RECID}${MCAZ}"-"$DSET".list" - rm -f $TMPLIST - echo $TMPLIST ${MCAZ} - find $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$SUBAR/EVNDISP/$PART/ -name "*[0-9]*[\.,_]${MCAZ}*.root" > $TMPLIST - NTMPLIST=`wc -l $TMPLIST | awk '{print $1}'` - echo "total number of files for particle type $PART ($MCAZ) : $NTMPLIST" - NJOBTOT=$(( NTMPLIST / (FILEN - 1))) - echo "total number of jobs: $NJOBTOT" + ######################################### + # input files lists + TMPLIST=$CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$SUBAR/EVNDISP.ANALYSIS/${PART}_${MCAZ}.list + NTMPLIST=$(wc -l < "$TMPLIST") + echo "total number of files for particle type $PART ($MCAZ) : $NTMPLIST" + NJOBTOT=$(( NTMPLIST / (FILEN - 1))) + if [[ $NJOBTOT == 0 ]]; then + NJOBTOT=1 + fi + echo "total number of jobs: $NJOBTOT" -# output file name for mscw_energy - TFIL=$PART$NC"."$SUBAR"_ID${RECID}_${MCAZ}-"$DSET + # output file name for mscw_energy + TFIL=$PART"."$SUBAR"_ID${RECID}_${MCAZ}-"$DSET -# skeleton script + # skeleton script FSCRIPT="CTA.MSCW_ENERGY.qsub_analyse_MC" -# name of script actually submitted to the queue + # name of script actually submitted to the queue FNAM="$SHELLDIR/MSCW.ana-$DSET-ID$RECID-$PART-${MCAZ}-array$SUBAR-$6" sed -e "s|TABLEFILE|$TABLE|" \ @@ -139,15 +136,15 @@ do -e "s|FILELIST|${TMPLIST}|" \ -e "s|FILELENGTH|$FILEN|" \ -e "s|NNNIMAGE|$MINIMAGE| " \ - -e "s|AAAAADIR|$ANADIR|" $FSCRIPT.sh > $FNAM.sh + -e "s|AAAAADIR|$ANADIR|" $FSCRIPT.sh > $FNAM.sh chmod u+x $FNAM.sh echo "run script written to $FNAM.sh" echo "queue log and error files written to $QLOG" -# submit the job + # submit the job if [[ $SUBC == *qsub* ]]; then - qsub $QSUBOPT -t 1-$NJOBTOT:1 -l h_cpu=${h_cpu} -l h_rss=${h_vmem} -l tmpdir_size=${tmpdir_size} -V -o $QLOG -e $QLOG "$FNAM.sh" + qsub $QSUBOPT -t 1-$NJOBTOT:1 -l h_cpu=${h_cpu} -l h_rss=${h_vmem} -l tmpdir_size=${tmpdir_size} -V -o $QLOG -e $QLOG "$FNAM.sh" elif [[ $SUBC == *condor* ]]; then for (( i=1 ; i<=$NJOBTOT ; i++ )); do sed -e "s|PIDNOTSET|$i|" "${FNAM}.sh" > "${FNAM}-${i}.sh" @@ -158,5 +155,3 @@ do fi done done - -exit diff --git a/analysis/CTA.MSCW_ENERGY.sub_make_tables.sh b/analysis/CTA.MSCW_ENERGY.sub_make_tables.sh index eadcec7..3f2ec10 100755 --- a/analysis/CTA.MSCW_ENERGY.sub_make_tables.sh +++ b/analysis/CTA.MSCW_ENERGY.sub_make_tables.sh @@ -46,7 +46,7 @@ fi DSET=$5 AZ=$6 [[ "$7" ]] && MINTEL=$7 || MINTEL="4" -if [ -n $8 ] +if [ -n "$8" ] then QSUBOPT="$8" fi @@ -79,7 +79,7 @@ fi # in case you submit a lot of scripts: QLOG=/dev/null QLOG=$CTA_USER_LOG_DIR/$DATE/MAKETABLES/ SHELLDIR=$CTA_USER_LOG_DIR/$DATE/MAKETABLES/ -if [ -n ${9} ]; then +if [ -n "${9}" ]; then QLOG=${9} SHELLDIR=${QLOG} fi @@ -100,7 +100,7 @@ do TAFIL=$TFIL # run scripts - FNAM="$SHELLDIR/EMSCW.table-$TAFIL-W$MEANDIST-${ARRAY}${AZ}" + FNAM="$SHELLDIR/EMSCW.table-$TAFIL-${ARRAY}${AZ}" cp $FSCRIPT.sh $FNAM.sh cp $FSCRIPT.sh $FNAM.sh diff --git a/analysis/CTA.TMVA.qsub_train.sh b/analysis/CTA.TMVA.qsub_train.sh index 239248c..a6bc046 100755 --- a/analysis/CTA.TMVA.qsub_train.sh +++ b/analysis/CTA.TMVA.qsub_train.sh @@ -20,7 +20,6 @@ do echo ${PFIL}.runparameter - echo "JOB ID ${SGE_JOB_ID}" > "${PFIL}".log ${EVNDISPSYS}/bin/trainTMVAforGammaHadronSeparation "${PFIL}".runparameter >> "${PFIL}".log CDIR=$(dirname "$PFIL".log) diff --git a/analysis/CTA.TMVA.sub_train.sh b/analysis/CTA.TMVA.sub_train.sh index 9761ea4..281e73a 100755 --- a/analysis/CTA.TMVA.sub_train.sh +++ b/analysis/CTA.TMVA.sub_train.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # script to train cuts/MVAs with TMVA # @@ -7,20 +7,20 @@ SUBC="condor" h_cpu="11:29:00" -h_vmem="4000M" +h_vmem="16000M" tmpdir_size="1G" if [ $# -lt 4 ] then echo - echo "CTA.TMVA.sub_train.sh [qsub options] [direction (e.g. _180deg)] [job_dir]" + echo "CTA.TMVA.sub_train.sh [qsub options] [direction (e.g. _180deg)] [job_dir]" echo "" - echo " text file with list of subarray IDs" + echo " mva run mode ('TrainGammaHadronSeparation', 'TrainAngularReconstructionMethod', 'TrainReconstructionQuality')" echo - echo " calculate tables for on source or different wobble offsets" + echo " text file with list of subarray IDs" echo echo " e.g. cta-ultra3, ISDC3700, ... " - echo + echo echo " e.g. for north: \"_180deg\", for south: \"_0deg\", for all directions: no option" echo echo " note 1: keywords ENERGYBINS and OUTPUTFILE are ignored in the runparameter file" @@ -39,9 +39,10 @@ fi ANAPAR=$4 if [ ! -e "$ANAPAR" ] then - echo "error: analysis parameter file not found: $ANAPAR" + echo "error: analysis parameter file not found: $ANAPAR" exit fi +TMVARUNMODE="$1" echo "reading analysis parameter from $ANAPAR" NIMAGESMIN=$(grep NIMAGESMIN "$ANAPAR" | awk {'print $2'}) NCUTLST=$(grep NLST "$ANAPAR" | awk {'print $2'}) @@ -49,61 +50,45 @@ NCUTMST=$(grep NMST "$ANAPAR" | awk {'print $2'}) NCUTSST=$(grep NSST "$ANAPAR" | awk {'print $2'}) NCUTMSCT=$(grep NSCMST "$ANAPAR" | awk {'print $2'}) ANADIR=$(grep MSCWSUBDIRECTORY "$ANAPAR" | awk {'print $2'}) -DDIR=$(grep TMVASUBDIR "$ANAPAR" | awk {'print $2'}) +if [ $TMVARUNMODE == "TrainAngularReconstructionMethod" ]; then + DDIR=$(grep TMVA_RECO_METHOD "$ANAPAR" | awk {'print $2'}) +elif [ $TMVARUNMODE == "TrainReconstructionQuality" ]; then + DDIR=$(grep TMVA_RECO_QUALITY "$ANAPAR" | awk {'print $2'}) +else + DDIR=$(grep TMVASUBDIR "$ANAPAR" | awk {'print $2'}) +fi RECID=$(grep RECID "$ANAPAR" | awk {'print $2'}) -echo "Analysis parameter: " "$NIMAGESMIN" "$ANADIR" "$DDIR" DSET=$3 +echo "Analysis parameter: " "$NIMAGESMIN" "$ANADIR" "$DDIR" OFIL="BDT" -CONE="FALSE" -if [[ $2 == cone ]] -then - CONE="TRUE" -fi -VARRAY=$(awk '{printf "%s ",$0} END {print ""}' "$1") +VARRAY=$(awk '{printf "%s ",$0} END {print ""}' "$2") ###################################################### # TMVA parameters are detetermined from data set name RPAR="$CTA_EVNDISP_AUX_DIR/ParameterFiles/TMVA.BDT" RXPAR=$(basename "$RPAR".runparameter runparameter) ##################################### -if [ -n "$6" ] -then - MCAZ=$6 -fi - +MCAZ=${6:-$MCAZ} # batch farm submission options -if [ -n "$5" ] -then - QSUBOPT="$5" -fi -QSUBOPT=${QSUBOPT//_X_/ } -QSUBOPT=${QSUBOPT//_M_/-} -QSUBOPT=${QSUBOPT//\"/} +QSUBOPT=${5:-$QSUBOPT} +QSUBOPT=${QSUBOPT//_X_/ } +QSUBOPT=${QSUBOPT//_M_/-} +QSUBOPT=${QSUBOPT//\"/} +# log dir +DATE=$(date +"%y%m%d") +LDIR=$CTA_USER_LOG_DIR/$DATE/TRAIN/ +LDIR=${7:-$LDIR} ##################################### # energy bins -# default EMIN=( -1.90 -1.90 -1.45 -1.20 -0.95 -0.50 -0.10 0.45 0.90 ) EMAX=( -1.40 -1.30 -1.15 -0.80 -0.25 0.25 0.75 1.50 2.50 ) -# 20201009 -#EMIN=( -1.90 -1.20 -0.75 0.00 0.50 ) -#EMAX=( -1.15 -0.50 0.25 1.50 2.50 ) NENE=${#EMIN[@]} ##################################### -# offset bins -if [ $CONE == "TRUE" ] -then - OFFMIN=( 0.0 1.0 2.0 2.5 4.0 5.0 ) - OFFMAX=( 3.0 3.0 3.5 4.5 5.0 6.0 ) - OFFMEA=( 0.5 1.5 2.5 3.5 4.5 5.5 ) - GTYPE="cone10_evndisp" -else - OFFMIN=( "0.0" ) - OFFMAX=( "3." ) -# value used until 2015-11-09 -# OFFMAX=( "1.e10" ) - OFFMEA=( 0.0 ) -fi +# offset bins +OFFMIN=( 0.0 1.0 2.0 2.5 4.0 5.0 ) +OFFMAX=( 3.0 3.0 3.5 4.5 5.0 6.0 ) +OFFMEA=( 0.5 1.5 2.5 3.5 4.5 5.5 ) NOFF=${#OFFMIN[@]} ###################################### @@ -118,11 +103,6 @@ fi ###################################### # log files -DATE=$(date +"%y%m%d") -LDIR=$CTA_USER_LOG_DIR/$DATE/TMVATRAINING/ -if [ -n ${7} ]; then - LDIR=${7} -fi QLOG=$LDIR mkdir -p "$LDIR" echo "Log directory: " "$LDIR" @@ -145,7 +125,7 @@ do then echo "No training file found - continuing" echo ${FFF} - exit + continue fi echo "Teltype cuts: LSTs ($NCUTLST) MSTS ($NCUTMST) SSTs ($NCUTSST) MSCTs ($NCUTMSCT)" echo ${FFF} @@ -153,7 +133,7 @@ do NTELTYPE=$(echo ${NTELTYPESTRING} | awk '{print $1}') # find correct index for each cut for (( N = 0; N < $NTELTYPE; N++ )) - do + do TELTYP=$(echo ${NTELTYPESTRING}| cut -d " " -f $((N+2))) if [[ $TELTYP == "NOTELESCOPETYPE" ]]; then echo "Error: telescope type not found: $N" @@ -172,8 +152,6 @@ do then TYPECUT="${TYPECUT})" fi - #NTYPECUT="NTtype==$NTELTYPE" - #TYPECUT="$NTYPECUT\&\&$TYPECUT" TYPECUT="$TYPECUT" echo "Telescope type cut: $TYPECUT" @@ -207,8 +185,9 @@ echo "* ENERGYBINS 1 ${EMIN[$i]} ${EMAX[$i]} echo "* PREEVENTLIST ${PREEVENTLIST}" >> $RFIL.runparameter ############################################################ # setting the cuts in the run parameter file - + sed -i -e "s|MINIMAGES|$NIMAGESMIN|;s|MINIMAGETYPECUT|$TYPECUT|" \ + -e "s|TMVA_RUN_MODE|$TMVARUNMODE|" \ -e 's|ENERGYVARIABLE|ErecS|;s|ENERGYCHI2VARIABLE|EChi2S|g;s|ENERGYDEVARIABLE|dES|g' $RFIL.runparameter done rm -f -v ${ODIR}/TMVA.BDT.runparameter @@ -229,5 +208,3 @@ echo "* ENERGYBINS 1 ${EMIN[$i]} ${EMAX[$i]} fi done done - -exit diff --git a/analysis/CTA.WPPhysWriter.qsub.sh b/analysis/CTA.WPPhysWriter.qsub.sh index 18058a9..053c162 100755 --- a/analysis/CTA.WPPhysWriter.qsub.sh +++ b/analysis/CTA.WPPhysWriter.qsub.sh @@ -32,7 +32,7 @@ $EVNDISPSYS/bin/writeCTAWPPhysSensitivityFiles $AXRRAY $OXBSTIME $DXDIR $OXUTNAM ############################################################################ if [ -e $OXUTNAME.$AXRRAY.$OXBSTIME.log ] -then +then DE=$(grep "error filling" $OXUTNAME.$AXRRAY.$OXBSTIME.log) DF=$(grep "error, cannot find effective area tree" $OXUTNAME.$AXRRAY.$OXBSTIME.log) if [[ -z ${DE} ]] && [[ -z ${DF} ]]; then diff --git a/analysis/CTA.WPPhysWriter.sub.sh b/analysis/CTA.WPPhysWriter.sub.sh index 0c9908b..05c8069 100755 --- a/analysis/CTA.WPPhysWriter.sub.sh +++ b/analysis/CTA.WPPhysWriter.sub.sh @@ -10,7 +10,7 @@ tmpdir_size="1G" if [ $# -lt 7 ] then - echo + echo echo "./CTA.WPPhysWriter.sh [off-axis fine binning (default=FALSE)] [name] [job_dir] [qsub options]" echo echo " text file with list of subarray IDs" @@ -34,25 +34,25 @@ if [ -n "$8" ]; then fi PNAME="" -if [ -n $9 ]; then +if [ -n "$9" ]; then PNAME="$9" fi # log files DATE=`date +"%y%m%d"` FDIR=${CTA_USER_LOG_DIR}/$DATE/WPPHYSWRITER/ -if [ -n ${10} ]; then +if [ -n "${10}" ]; then FDIR="${10}" fi mkdir -p ${FDIR} echo "log directory: " ${FDIR} QSUBOPT="" -if [ -n ${11} ]; then +if [ -n "${11}" ]; then QSUBOPT="${11}" fi -QSUBOPT=${QSUBOPT//_X_/ } -QSUBOPT=${QSUBOPT//_M_/-} +QSUBOPT=${QSUBOPT//_X_/ } +QSUBOPT=${QSUBOPT//_M_/-} ############################################################################ # software paths diff --git a/analysis/CTA.XGBSTEREO.qsub_analyse.sh b/analysis/CTA.XGBSTEREO.qsub_analyse.sh new file mode 100755 index 0000000..748e915 --- /dev/null +++ b/analysis/CTA.XGBSTEREO.qsub_analyse.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# +# train XGB stereo for CTA +# + +MSCW_FILE="FFILE" +XGBDIR="DIRXGB" +MINTEL=TELMIN +XGB="xgb_stereo" +DSET="DATASET" +ENV_BIN="CONDA_ENV_BIN" +MAXCORES=1 + +# set environmental variables +if [ -z "${EVNDISPSYS:-}" ] || [ ! -r "$EVNDISPSYS/setObservatory.sh" ]; then + echo "Error: EVNDISPSYS is unset or setObservatory.sh is not readable." >&2 + exit 1 +fi +source "$EVNDISPSYS/setObservatory.sh" CTA || exit 1 + +# ENV_BIN is resolved once by the submission script. Calling the executable +# directly avoids conda startup and package-metadata access in every batch job. +XGB_APPLY="${ENV_BIN}/eventdisplay-ml-apply-xgb-stereo" +if [ ! -x "$XGB_APPLY" ]; then + echo "Error: incomplete eventdisplay-ml environment at '$ENV_BIN'." >&2 + exit 1 +fi +export PATH="${ENV_BIN}:${PATH}" +export CONDA_PREFIX="${ENV_BIN%/bin}" + +# hardwired max training images to three +[ "$MINTEL" -ge 3 ] && MINTEL=3 + +PREFIX="${XGBDIR}/dispdir_bdt_mintel${MINTEL}" +MODEL_OPTIONS=( --model_prefix "${PREFIX}" ) +# For NIM2 output, use different models for 2 and +# high multiplicity events +OMINTEL=${MINTEL} +[ "$OMINTEL" -eq 2 ] && OMINTEL=23 +if [ "$MINTEL" -eq 2 ]; then + MODEL_OPTIONS+=( + --model_prefix_high_multiplicity + "${XGBDIR}/dispdir_bdt_mintel3" + ) + OMINTEL=23 +fi + +if [[ $DSET == *"LaPalma"* ]]; then + site="CTAO-NORTH" +else + site="CTAO-SOUTH" +fi + +OFIL=$(basename $MSCW_FILE .root) +ODIR=$(dirname $MSCW_FILE) +OFIL="${ODIR}/${OFIL}.${XGB}_mintel${OMINTEL}" +LOGFILE="${OFIL}.log" +rm -f "$LOGFILE" + +echo "LOG FILE: $LOGFILE" + +"$XGB_APPLY" \ + --input_file "$MSCW_FILE" \ + "${MODEL_OPTIONS[@]}" \ + --output_file "${OFIL}.root" \ + --max_cores $MAXCORES \ + --observatory $site >| "${LOGFILE}" 2>&1 +status=$? + +exit "$status" diff --git a/analysis/CTA.XGBSTEREO.qsub_train.sh b/analysis/CTA.XGBSTEREO.qsub_train.sh new file mode 100755 index 0000000..d15d7d9 --- /dev/null +++ b/analysis/CTA.XGBSTEREO.qsub_train.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# +# train XGB stereo for CTA +# + +ODIR=OUTPUTDIR +LLIST=MSCWLIST +MINTEL=TELMIN +DSET="DATASET" +ENV_BIN="CONDA_ENV_BIN" +P="0.90" +N="500000000" +MAXCORES=NCORE + +# set environmental variables +if [ -z "${EVNDISPSYS:-}" ] || [ ! -r "$EVNDISPSYS/setObservatory.sh" ]; then + echo "Error: EVNDISPSYS is unset or setObservatory.sh is not readable." >&2 + exit 1 +fi +source "$EVNDISPSYS/setObservatory.sh" CTA || exit 1 + +# ENV_BIN is resolved once by the submission script. Calling the executable +# directly avoids Conda startup and package-metadata access in every batch job. +XGB_TRAIN="${ENV_BIN}/eventdisplay-ml-train-xgb-stereo" +if [ ! -x "$XGB_TRAIN" ]; then + echo "Error: incomplete eventdisplay-ml environment at '$ENV_BIN'." >&2 + exit 1 +fi +export PATH="${ENV_BIN}:${PATH}" +export CONDA_PREFIX="${ENV_BIN%/bin}" + +# output data files are written to this directory +mkdir -p "${ODIR}" || exit 1 +echo -e "Output files will be written to:\n ${ODIR}" + +PREFIX="${ODIR}/dispdir_bdt_mintel${MINTEL}" +LOGFILE="${PREFIX}.log" +rm -f "$LOGFILE" + +if [[ $DSET == *"LaPalma"* ]]; then + site="CTAO-NORTH" +else + site="CTAO-SOUTH" +fi + +{ + echo "Host: $(hostname)" + echo "Environment: $CONDA_PREFIX" + echo "Trainer: $XGB_TRAIN" +} > "${LOGFILE}" 2>&1 + +"$XGB_TRAIN" \ + --input_file_list "$LLIST" \ + --model_prefix "${PREFIX}" \ + --max_cores $MAXCORES \ + --observatory $site \ + --max_tel_per_type 10 \ + --min_images $MINTEL --memory_profile \ + --train_test_fraction $P --max_events $N >> "${LOGFILE}" 2>&1 +status=$? + +exit "$status" diff --git a/analysis/CTA.XGBSTEREO.sub_analyse.sh b/analysis/CTA.XGBSTEREO.sub_analyse.sh new file mode 100755 index 0000000..eba87c9 --- /dev/null +++ b/analysis/CTA.XGBSTEREO.sub_analyse.sh @@ -0,0 +1,141 @@ +#!/bin/bash +# +# XGB stereo analysis training +# + +SUBC="condor" +h_cpu="0:29:00" +h_vmem="12000M" +tmpdir_size="1G" +env_name="eventdisplay_ml_cta" + +if [ $# -lt 4 ] +then + echo + echo "/CTA.XGBSTEREO.sub_analyse [qsub options] [direction (e.g. _180deg)] [job_dir]" + echo "" + echo " text file with list of subarray IDs" + echo + echo " e.g. cta-ultra3, ISDC3700, ... " + echo + echo " e.g. for north: \"_180deg\", for south: \"_0deg\", for all directions: no option" + echo + exit +fi + +####################################### +# read values from parameter file +ANAPAR=$3 +if [ ! -e "$ANAPAR" ] +then + echo "error: analysis parameter file not found: $ANAPAR" + exit +fi +echo "reading analysis parameter from $ANAPAR" +NIMAGESMIN=$(grep NIMAGESMIN "$ANAPAR" | awk {'print $2'}) +NCUTLST=$(grep NLST "$ANAPAR" | awk {'print $2'}) +NCUTMST=$(grep NMST "$ANAPAR" | awk {'print $2'}) +NCUTSST=$(grep NSST "$ANAPAR" | awk {'print $2'}) +NCUTMSCT=$(grep NSCMST "$ANAPAR" | awk {'print $2'}) +ANADIR=$(grep MSCWSUBDIRECTORY "$ANAPAR" | awk {'print $2'}) +RECID=$(grep RECID "$ANAPAR" | awk {'print $2'}) +DSET=$2 +MODELDIR="$4" +echo "Analysis parameter: " "$NIMAGESMIN" "$ANADIR" "$DSET" +VARRAY=$(awk '{printf "%s ",$0} END {print ""}' "$1") + +MCAZ=${6:-$MCAZ} +# batch farm submission options +QSUBOPT=${5:-$QSUBOPT} +QSUBOPT=${QSUBOPT//_X_/ } +QSUBOPT=${QSUBOPT//_M_/-} +QSUBOPT=${QSUBOPT//\"/} +# log dir +DATE=$(date +"%y%m%d") +LDIR=$CTA_USER_LOG_DIR/$DATE/XGBSTEREOTRAINING/ +LDIR=${7:-$LDIR} +# Evaluation mscw files only +ANADIR=${ANADIR}.EFFAREA.MCAZ${MCAZ} + +###################################### +# software paths +source ../setSoftwarePaths.sh "$DSET" +# checking the path for binary +if [ -z "$EVNDISPSYS" ] +then + echo "no EVNDISPSYS env variable defined" + exit 1 +fi + +# Resolve the Conda environment once at submission time. The generated jobs +# use its binaries directly and therefore do not run Conda on worker nodes. +CONDA_RUNNER="${CONDA_EXE:-}" +if [ ! -x "$CONDA_RUNNER" ]; then + CONDA_RUNNER=$(type -P conda || type -P micromamba) +fi +if [ ! -x "$CONDA_RUNNER" ]; then + echo "no conda executable found" + exit 1 +fi +if ! CONDA_ENV_BIN=$("$CONDA_RUNNER" run -n "$env_name" python -c \ + 'import os, sys; print(os.path.dirname(sys.executable))'); then + echo "Conda environment '$env_name' is unavailable" + exit 1 +fi +if [ ! -x "$CONDA_ENV_BIN/eventdisplay-ml-apply-xgb-stereo" ]; then + echo "Conda environment '$env_name' is unavailable or eventdisplay-ml is not installed" + exit 1 +fi +echo "Using Conda environment binaries: $CONDA_ENV_BIN" + +###################################### +# log files +QLOG=$LDIR +mkdir -p "$LDIR" +echo "Log directory: " "$LDIR" + +########################### +# particle types +VPART=( "gamma_onSource" "gamma_cone" "electron" "proton" ) +NPART=${#VPART[@]} + +###################################### +# script name template +FSCRIPT="CTA.XGBSTEREO.qsub_analyse" + +############################################################### +# loop over all arrays +for ARRAY in $VARRAY +do + XGBDIR=$CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$MODELDIR + + for ((m = 0; m < $NPART; m++ )) + do + PART=${VPART[$m]} + echo "STARTING $DSET PARTICLE $PART ARRAY $ARRAY MCAZ $MCAZ" + + FILES=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/$PART."$ARRAY"_ID"$RECID$MCAZ"*.mscw.root) + + i=0 + for FILE in $FILES + do + ((i++)) + + FNAM=$LDIR/$FSCRIPT.$DSET.$ARRAY.AZ${MCAZ}.ID${RECID}.${PART}-${i}.${NIMAGESMIN} + sed -e "s|FFILE|$FILE|" \ + -e "s|DATASET|$DSET|" \ + -e "s|TELMIN|$NIMAGESMIN|" \ + -e "s|CONDA_ENV_BIN|$CONDA_ENV_BIN|" \ + -e "s|DIRXGB|$XGBDIR|" $FSCRIPT.sh > $FNAM.sh + chmod u+x $FNAM.sh + echo "SCRIPT $FNAM.sh" + + # submit job to queue + if [[ $SUBC == *qsub* ]]; then + qsub $QSUBOPT -V -l h_cpu=${h_cpu} -l h_rss=${h_vmem} -l tmpdir_size=${tmpdir_size} -o $QLOG -e $QLOG "$FNAM.sh" + elif [[ $SUBC == *condor* ]]; then + ./condorSubmission.sh ${FNAM}.sh $h_vmem $tmpdir_size + fi + done + done +done diff --git a/analysis/CTA.XGBSTEREO.sub_train.sh b/analysis/CTA.XGBSTEREO.sub_train.sh new file mode 100755 index 0000000..6f9c761 --- /dev/null +++ b/analysis/CTA.XGBSTEREO.sub_train.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# +# XGB stereo analysis training +# +# +# + +SUBC="condor" +h_cpu="47:29:00" +h_vmem="64000M" +tmpdir_size="1G" +ncore=8 +env_name="eventdisplay_ml_cta" + +if [ $# -lt 4 ] +then + echo + echo "CTA.XGBSTEREO.sub_train.sh [qsub options] [job_dir]" + echo "" + echo " text file with list of subarray IDs" + echo + echo " e.g. cta-ultra3, ISDC3700, ... " + echo + exit +fi + +####################################### +# read values from parameter file +ANAPAR=$3 +if [ ! -e "$ANAPAR" ] +then + echo "error: analysis parameter file not found: $ANAPAR" + exit +fi +echo "reading analysis parameter from $ANAPAR" +NIMAGESMIN=$(grep NIMAGESMIN "$ANAPAR" | awk {'print $2'}) +XGBMINTEL="${NIMAGESMIN}" +[ "$XGBMINTEL" -ge 3 ] && XGBMINTEL=3 +NCUTLST=$(grep NLST "$ANAPAR" | awk {'print $2'}) +NCUTMST=$(grep NMST "$ANAPAR" | awk {'print $2'}) +NCUTSST=$(grep NSST "$ANAPAR" | awk {'print $2'}) +NCUTMSCT=$(grep NSCMST "$ANAPAR" | awk {'print $2'}) +ANADIR=$(grep MSCWSUBDIRECTORY "$ANAPAR" | awk {'print $2'}) +RECID=$(grep RECID "$ANAPAR" | awk {'print $2'}) +DSET=$2 +echo "Analysis parameter: " "$NIMAGESMIN" "$ANADIR" "$DSET" +VARRAY=$(awk '{printf "%s ",$0} END {print ""}' "$1") +ODIRNAME="$4" + +# batch farm submission options +QSUBOPT=${5:-$QSUBOPT} +QSUBOPT=${QSUBOPT//_X_/ } +QSUBOPT=${QSUBOPT//_M_/-} +QSUBOPT=${QSUBOPT//\"/} +# log dir +DATE=$(date +"%y%m%d") +LDIR=$CTA_USER_LOG_DIR/$DATE/$ODIRNAME +LDIR=${6:-$LDIR} + +###################################### +# software paths +source ../setSoftwarePaths.sh "$DSET" +# checking the path for binary +if [ -z "$EVNDISPSYS" ] +then + echo "no EVNDISPSYS env variable defined" + exit 1 +fi + +# Resolve the Conda environment once at submission time. The generated jobs +# use its binaries directly and therefore do not run Conda on worker nodes. +CONDA_RUNNER="${CONDA_EXE:-}" +if [ ! -x "$CONDA_RUNNER" ]; then + CONDA_RUNNER=$(type -P conda || type -P micromamba) +fi +if [ ! -x "$CONDA_RUNNER" ]; then + echo "no conda executable found" + exit 1 +fi +if ! CONDA_ENV_BIN=$("$CONDA_RUNNER" run -n "$env_name" python -c \ + 'import os, sys; print(os.path.dirname(sys.executable))'); then + echo "Conda environment '$env_name' is unavailable" + exit 1 +fi +if [ ! -x "$CONDA_ENV_BIN/eventdisplay-ml-train-xgb-stereo" ]; then + echo "Conda environment '$env_name' is unavailable or eventdisplay-ml is not installed" + exit 1 +fi +echo "Using Conda environment binaries: $CONDA_ENV_BIN" + +###################################### +# log files +QLOG=$LDIR +mkdir -p "$LDIR" +echo "Log directory: " "$LDIR" + +###################################### +# script name template +FSCRIPT="CTA.XGBSTEREO.qsub_train" + +############################################################### +# loop over all arrays +for ARRAY in $VARRAY +do + echo "STARTING $DSET ARRAY $ARRAY" + + ODIR=$CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/${ODIRNAME}"" + mkdir -p "$ODIR" + MODELFILE="${ODIR}/dispdir_bdt_mintel${XGBMINTEL}.joblib.gz" + if [ -e "$MODELFILE" ] + then + echo "FOUND $MODELFILE - skipping training job" + continue + else + echo "MODEL FILE NOT FOUND $MODELFILE" + fi + # Use the centrally prepared file-level training partition. This is the + # union of per-direction splits and is therefore exactly disjoint from the + # files used by XGBSTEREOANA and EFFAREA. + TRAINDIR=$CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/${ANADIR}.TRAIN.MCAZ + if [ ! -d "$TRAINDIR" ]; then + echo "No training data directory found: $TRAINDIR" + echo "Run CTA.prepareAnalysis_no_sub.sh before XGB stereo training." + exit 1 + fi + SIGNALTRAINLIST=${ODIR}/training_files.list + rm -f "${SIGNALTRAINLIST}" + find "$TRAINDIR" -maxdepth 1 -name "gamma_cone.*.mscw.root" -print | sort > "${SIGNALTRAINLIST}" + if [ ! -s "$SIGNALTRAINLIST" ]; then + echo "No gamma_cone training files found in $TRAINDIR" + exit 1 + fi + + FNAM=$LDIR/$FSCRIPT.$DSET.$ARRAY.ID${RECID}.NIM${XGBMINTEL} + sed -e "s|MSCWLIST|$SIGNALTRAINLIST|" \ + -e "s|DATASET|$DSET|" \ + -e "s|TELMIN|$XGBMINTEL|" \ + -e "s|NCORE|$ncore|" \ + -e "s|CONDA_ENV_BIN|$CONDA_ENV_BIN|" \ + -e "s|OUTPUTDIR|$ODIR|" $FSCRIPT.sh > $FNAM.sh + chmod u+x $FNAM.sh + echo "SCRIPT $FNAM.sh" + +# submit job to queue + if [[ $SUBC == *qsub* ]]; then + qsub $QSUBOPT -V -l h_cpu=${h_cpu} -l h_rss=${h_vmem} -l tmpdir_size=${tmpdir_size} -o $QLOG -e $QLOG "$FNAM.sh" + elif [[ $SUBC == *condor* ]]; then + ./condorSubmission.sh ${FNAM}.sh $h_vmem $tmpdir_size "" $ncore + fi +done diff --git a/analysis/CTA.prepareAnalysis_no_sub.sh b/analysis/CTA.prepareAnalysis_no_sub.sh new file mode 100755 index 0000000..d48966b --- /dev/null +++ b/analysis/CTA.prepareAnalysis_no_sub.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# +# Prepare disjoint training and analysis data sets using symbolic links. +# This is a local operation; no batch job is submitted. + +set -e +shopt -s nullglob + +usage() +{ + cat <<'EOF' +CTA.prepareAnalysis_no_sub.sh [source analysis directory] + + text file with one subarray ID per line + e.g. prod6-LaPalma-20deg-dark-sq51-LL + MSCWSUBDIRECTORY defines the target analysis date + [source analysis directory] directory containing the original mscw files; + defaults to MSCWSUBDIRECTORY + +For every subarray, this creates: + .TRAIN.MCAZ_0deg and .TRAIN.MCAZ_180deg + .EFFAREA.MCAZ_0deg and .EFFAREA.MCAZ_180deg + .TRAIN.MCAZ and .EFFAREA.MCAZ (both directions combined) + +gamma_cone and proton files are split 1/2 training and 1/2 analysis. +gamma_onSource and electron files are analysis-only. Splitting is performed +independently for each direction. + +Example for reusing an older reconstruction with a new analysis date: + CTA.prepareAnalysis_no_sub.sh arrays.list DATASET new.runparameter \ + Analysis-ID0-g20260325 +EOF +} + +if [ "$#" -lt 3 ]; then + usage + exit 1 +fi + +SUBARRAY_LIST=$1 +DSET=$2 +ANAPAR=$3 + +if [ ! -r "$SUBARRAY_LIST" ]; then + echo "Error: subarray list not readable: $SUBARRAY_LIST" >&2 + exit 1 +fi +if [ ! -r "$ANAPAR" ]; then + echo "Error: analysis parameter file not readable: $ANAPAR" >&2 + exit 1 +fi +if [ -z "${CTA_USER_DATA_DIR:-}" ]; then + echo "Error: CTA_USER_DATA_DIR is not set" >&2 + exit 1 +fi + +TARGET_ANALYSIS=$(awk '$1 == "MSCWSUBDIRECTORY" { print $2; exit }' "$ANAPAR") +if [ -z "$TARGET_ANALYSIS" ]; then + echo "Error: MSCWSUBDIRECTORY is missing from $ANAPAR" >&2 + exit 1 +fi +SOURCE_ANALYSIS=${4:-$TARGET_ANALYSIS} +case "$TARGET_ANALYSIS:$SOURCE_ANALYSIS" in + *"/"*) + echo "Error: analysis directory arguments must be directory names, not paths" >&2 + exit 1 + ;; +esac + +DATASET_DIR="${CTA_USER_DATA_DIR%/}/analysis/AnalysisData/$DSET" +DIRECTIONS=( "_0deg" "_180deg" ) + +link_file() +{ + local source_file=$1 + local target_dir=$2 + local target_file + target_file="$target_dir/$(basename "$source_file")" + + if [ -e "$target_file" ] || [ -L "$target_file" ]; then + echo "Error: duplicate target name: $target_file" >&2 + exit 1 + fi + ln -s "$source_file" "$target_file" +} + +link_partition() +{ + local source_dir=$1 + local particle=$2 + local direction=$3 + local divisor=$4 + local train_dir=$5 + local analysis_dir=$6 + local file + local index=0 + local count=0 + local files=( "${source_dir}/${particle}.${ARRAY}_ID${RECID}${direction}"*.mscw.root ) + + if [ "${#files[@]}" -eq 0 ]; then + echo "Error: no $particle files for $ARRAY $direction in $source_dir" >&2 + exit 1 + fi + + for file in "${files[@]}"; do + index=$((index + 1)) + count=$((count + 1)) + if [ $((index % divisor)) -eq 0 ]; then + link_file "$file" "$analysis_dir" + else + link_file "$file" "$train_dir" + fi + done + echo " $particle $direction: $((count - count / divisor)) train, $((count / divisor)) analysis" +} + +link_analysis_only() +{ + local source_dir=$1 + local particle=$2 + local direction=$3 + local analysis_dir=$4 + local file + local count=0 + local files=( "${source_dir}/${particle}.${ARRAY}_ID${RECID}${direction}"*.mscw.root ) + + if [ "${#files[@]}" -eq 0 ]; then + echo "Error: no $particle files for $ARRAY $direction in $source_dir" >&2 + exit 1 + fi + for file in "${files[@]}"; do + link_file "$file" "$analysis_dir" + count=$((count + 1)) + done + echo " $particle $direction: $count analysis-only" +} + +combine_directions() +{ + local kind=$1 + local combined="$ARRAY_DIR/$TARGET_ANALYSIS.$kind.MCAZ" + local direction + local file + + rm -rf "$combined" + mkdir -p "$combined" + for direction in "${DIRECTIONS[@]}"; do + for file in "$ARRAY_DIR/$TARGET_ANALYSIS.$kind.MCAZ$direction"/*.root; do + [ -L "$file" ] || continue + link_file "$(readlink "$file")" "$combined" + done + done +} + +check_source_files() +{ + local source_dir=$1 + local direction + local particle + local files + + for direction in "${DIRECTIONS[@]}"; do + for particle in gamma_cone proton gamma_onSource electron; do + files=( "${source_dir}/${particle}.${ARRAY}_ID${RECID}${direction}"*.mscw.root ) + if [ "${#files[@]}" -eq 0 ]; then + echo "Error: no $particle files for $ARRAY $direction in $source_dir" >&2 + exit 1 + fi + done + done +} + +RECID=$(awk '$1 == "RECID" { print $2; exit }' "$ANAPAR") +if [ -z "$RECID" ]; then + echo "Error: RECID is missing from $ANAPAR" >&2 + exit 1 +fi + +while IFS= read -r ARRAY || [ -n "$ARRAY" ]; do + # Permit empty lines and comments in subarray lists. + ARRAY=${ARRAY%%#*} + ARRAY=${ARRAY//[[:space:]]/} + [ -n "$ARRAY" ] || continue + + ARRAY_DIR="$DATASET_DIR/$ARRAY" + SOURCE_DIR="$ARRAY_DIR/$SOURCE_ANALYSIS" + if [ ! -d "$SOURCE_DIR" ]; then + echo "Error: source analysis directory not found: $SOURCE_DIR" >&2 + exit 1 + fi + # Check the complete source set before replacing any existing split. + check_source_files "$SOURCE_DIR" + + echo "Preparing $DSET/$ARRAY from $SOURCE_ANALYSIS into $TARGET_ANALYSIS" + for direction in "${DIRECTIONS[@]}"; do + TRAIN_DIR="$ARRAY_DIR/$TARGET_ANALYSIS.TRAIN.MCAZ$direction" + ANALYSIS_DIR="$ARRAY_DIR/$TARGET_ANALYSIS.EFFAREA.MCAZ$direction" + + rm -rf "$TRAIN_DIR" "$ANALYSIS_DIR" + mkdir -p "$TRAIN_DIR" "$ANALYSIS_DIR" + + link_partition "$SOURCE_DIR" gamma_cone "$direction" 2 "$TRAIN_DIR" "$ANALYSIS_DIR" + link_partition "$SOURCE_DIR" proton "$direction" 2 "$TRAIN_DIR" "$ANALYSIS_DIR" + link_analysis_only "$SOURCE_DIR" gamma_onSource "$direction" "$ANALYSIS_DIR" + link_analysis_only "$SOURCE_DIR" electron "$direction" "$ANALYSIS_DIR" + done + + combine_directions TRAIN + combine_directions EFFAREA +done < "$SUBARRAY_LIST" diff --git a/analysis/CTA.prepareTMVA.sub_train.sh b/analysis/CTA.prepareTMVA.sub_train.sh index 1710abb..760d225 100755 --- a/analysis/CTA.prepareTMVA.sub_train.sh +++ b/analysis/CTA.prepareTMVA.sub_train.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # script to prepare event files for TMVA # @@ -13,14 +13,12 @@ tmpdir_size="1G" if [ $# -lt 4 ] then echo - echo "CTA.prepareTMVA.sub_train.sh [qsub options] [direction (e.g. _180deg)] [job_dir]" + echo "CTA.prepareTMVA.sub_train.sh [qsub options] [direction (e.g. _180deg)] [job_dir]" echo "" echo " text file with list of subarray IDs" echo - echo " calculate tables for on source or different wobble offsets" - echo echo " e.g. cta-ultra3, ISDC3700, ... " - echo + echo echo " e.g. for north: \"_180deg\", for south: \"_0deg\", for all directions: no option" echo echo " note 1: keywords ENERGYBINS and OUTPUTFILE are ignored in the runparameter file" @@ -36,10 +34,10 @@ fi ####################################### # read values from parameter file -ANAPAR=$4 +ANAPAR=$3 if [ ! -e "$ANAPAR" ] then - echo "error: analysis parameter file not found: $ANAPAR" + echo "error: analysis parameter file not found: $ANAPAR" exit fi echo "reading analysis parameter from $ANAPAR" @@ -49,57 +47,32 @@ NCUTMST=$(grep NMST "$ANAPAR" | awk {'print $2'}) NCUTSST=$(grep NSST "$ANAPAR" | awk {'print $2'}) NCUTMSCT=$(grep NSCMST "$ANAPAR" | awk {'print $2'}) ANADIR=$(grep MSCWSUBDIRECTORY "$ANAPAR" | awk {'print $2'}) -DDIR=$(grep TMVASUBDIR "$ANAPAR" | awk {'print $2'}) RECID=$(grep RECID "$ANAPAR" | awk {'print $2'}) -echo "Analysis parameter: " "$NIMAGESMIN" "$ANADIR" "$DDIR" -DSET=$3 -CONE="FALSE" -if [[ $2 == cone ]] -then - CONE="TRUE" -fi +DSET=$2 +echo "Analysis parameter: " "$NIMAGESMIN" "$ANADIR" "$DSET" VARRAY=$(awk '{printf "%s ",$0} END {print ""}' "$1") ###################################################### # TMVA parameters are detetermined from data set name RPAR="$CTA_EVNDISP_AUX_DIR/ParameterFiles/TMVA.BDT" ##################################### -if [ -n "$6" ] -then - MCAZ=$6 -fi - +MCAZ=${5:-$MCAZ} # batch farm submission options -if [ -n "$5" ] -then - QSUBOPT="$5" -fi -QSUBOPT=${QSUBOPT//_X_/ } -QSUBOPT=${QSUBOPT//_M_/-} -QSUBOPT=${QSUBOPT//\"/} +QSUBOPT=${4:-$QSUBOPT} +QSUBOPT=${QSUBOPT//_X_/ } +QSUBOPT=${QSUBOPT//_M_/-} +QSUBOPT=${QSUBOPT//\"/} +# log dir +DATE=$(date +"%y%m%d") +LDIR=$CTA_USER_LOG_DIR/$DATE/PRETMVATRAINING/ +LDIR=${6:-$LDIR} ##################################### # offset bins -if [ $CONE == "TRUE" ] -then - OFFMIN=( 0.0 1.0 2.0 2.5 4.0 5.0 ) - OFFMAX=( 3.0 3.0 3.5 4.5 5.0 6.0 ) - OFFMEA=( 0.5 1.5 2.5 3.5 4.5 5.5 ) - DSUF="gamma_cone" - GTYPE="cone10_evndisp" - ASUF="gamma_onSource" - ATYPE="baseline_evndisp" -else - OFFMIN=( "0.0" ) - OFFMAX=( "3." ) -# value used until 2015-11-09 -# OFFMAX=( "1.e10" ) - OFFMEA=( 0.0 ) - DSUF="gamma_onSource" - GTYPE="baseline_evndisp" - ASUF="gamma_cone" - ATYPE="cone10_evndisp" -fi +OFFMIN=( 0.0 1.0 2.0 2.5 4.0 5.0 ) +OFFMAX=( 3.0 3.0 3.5 4.5 5.0 6.0 ) +OFFMEA=( 0.5 1.5 2.5 3.5 4.5 5.5 ) +ASUF="gamma_onSource" NOFF=${#OFFMIN[@]} ###################################### @@ -114,11 +87,6 @@ fi ###################################### # log files -DATE=$(date +"%y%m%d") -LDIR=$CTA_USER_LOG_DIR/$DATE/PRETMVATRAINING/ -if [ -n ${7} ]; then - LDIR=${7} -fi QLOG=$LDIR mkdir -p "$LDIR" echo "Log directory: " "$LDIR" @@ -131,90 +99,25 @@ FSCRIPT="CTA.prepareTMVA.qsub_train" # loop over all arrays for ARRAY in $VARRAY do - echo "STARTING $DSET ARRAY $ARRAY MCAZ $MCAZ" - -# signal and background files -# (no electrons are used for the background training) -# ensure mixed training set for the two different pointing directions -# two lists for signal and background, alternating from previous lists -# (list must be sorted; and then mixed) -# Splitmode=BLOCK -# SFIL1, BFIL1 used for training -# SFIL2, BFIL2 used for testing and analysis - -# different namings for GRID and local productions - if [[ ${DSET:0:2} == "GR" ]] - then - SFIL1=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/gamma*"deg$MCAZ"*"$GTYPE"*.mscw.root | sort -g | awk 'NR % 2 == 1') - SFIL2=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/gamma*"deg$MCAZ"*"$GTYPE"*.mscw.root | sort -g | awk 'NR % 2 == 0') - BFIL1=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/proton*"deg$MCAZ"*mscw.root | sort -g | awk 'NR % 2 == 1') - BFIL2=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/proton*"deg$MCAZ"*mscw.root | sort -g | awk 'NR % 2 == 0') - GFIL=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/gamma*"deg$MCAZ"*"$ATYPE"*.mscw.root) - EFIL=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/elec*"deg$MCAZ"*mscw.root) - elif [[ $DSET == *"NSB"* ]] - then - SFIL1=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/$DSUF*"_ID"$RECID"$MCAZ"*.mscw.root | sort -g | awk 'NR % 2 == 1') - SFIL2=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/$DSUF*"_ID"$RECID"$MCAZ"*.mscw.root | sort -g | awk 'NR % 2 == 0') - BFIL1=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/proton*"_ID"$RECID"$MCAZ"*mscw.root | sort -g | awk 'NR % 2 == 1') - BFIL2=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/proton*"_ID"$RECID"$MCAZ"*mscw.root | sort -g | awk 'NR % 2 == 0') - GFIL=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/$ASUF*"_ID"$RECID"$MCAZ"*.mscw.root) - EFIL=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/elec*"_ID"$RECID"$MCAZ"*mscw.root) - else - SFIL1=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/$DSUF."$ARRAY"_ID"$RECID$MCAZ"*.mscw.root | sort -g | awk 'NR % 2 == 1') - SFIL2=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/$DSUF."$ARRAY"_ID"$RECID$MCAZ"*.mscw.root | sort -g | awk 'NR % 2 == 0') - BFIL1=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/proton."$ARRAY"_ID"$RECID$MCAZ"*.mscw.root | sort -g | awk 'NR % 2 == 1') - BFIL2=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/proton."$ARRAY"_ID"$RECID$MCAZ"*.mscw.root | sort -g | awk 'NR % 2 == 0') - GFIL=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/$ASUF."$ARRAY"_ID"$RECID$MCAZ"*.mscw.root) - EFIL=$(ls -1 $CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/$ANADIR/electron."$ARRAY"_ID"$RECID$MCAZ"*.mscw.root) - fi - -########################################################## -# set links for events used in effective area calculation -# (separate training and events used for analysis) -# NOTE: ASSUME THAT THIS IS NOT CHANGED -# IF DIRECTORY EXISTS, NO NEW ONES ARE CREATED - ANAEFF="$CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/${ANADIR}.EFFAREA.MCAZ${MCAZ}" - # rm -rf $ANAEFF - if [ ! -e "$ANAEFF" ] - then - mkdir -p "$ANAEFF" - # testing signal list - for arg in $SFIL2 - do - BW=$(basename "$arg") - ln -s $arg "$ANAEFF"/"$BW" - done - # testing proton list - for arg in $BFIL2 - do - BW=$(basename "$arg") - ln -s $arg "$ANAEFF"/"$BW" - done - # depending on CONE parameter: either onSource (if CONE=TRUE) or cone (if CONE=FALSE) - for arg in $GFIL - do - BW=$(basename $arg) - ln -s $arg $ANAEFF/$BW - done - # electrons (not used in training) - for arg in $EFIL - do - BW=$(basename $arg) - ln -s $arg $ANAEFF/$BW - done - else - echo "EXISTING LINKED ANALYSIS (MSCW) FILES with testing events" + echo "STARTING $DSET ARRAY $ARRAY MCAZ $MCAZ" + + # File-level training/test assignment is prepared locally by + # CTA.prepareAnalysis_no_sub.sh. Consume the explicit training partition + # here instead of independently repeating the modulo selection. + TRAINDIR="$CTA_USER_DATA_DIR/analysis/AnalysisData/$DSET/$ARRAY/${ANADIR}.TRAIN.MCAZ${MCAZ}" + if [ ! -d "$TRAINDIR" ]; then + echo "No training data directory found: $TRAINDIR" + echo "Run CTA.prepareAnalysis_no_sub.sh before preparing TMVA events." + exit 1 fi - ############################################################### - # add a 'continue' here if linking file is the main purpose - #continue - ############################################################### + SIGNALTRAINLIST=$(find "$TRAINDIR" -maxdepth 1 -name "gamma_cone.*.mscw.root" -print | sort) + BACKGROUNDTRAINLIST=$(find "$TRAINDIR" -maxdepth 1 -name "proton.*.mscw.root" -print | sort) ############################################################### # get number of telescopes depending of telescope types # use first file - set -- $SFIL1 + set -- $SIGNALTRAINLIST # check the file exists - otherwise continue if [ -z "$1" ] || [ ! -e "$1" ] then @@ -229,7 +132,7 @@ do NTYPECUT="NTtype==$NTELTYPE" # find correct index for each cut for (( N = 0; N < $NTELTYPE; N++ )) - do + do TELTYP=$(echo ${NTELTYPESTRING}| cut -d " " -f $((N+2))) if [[ $TELTYP == "NOTELESCOPETYPE" ]]; then echo "Error: telescope type not found: $N" @@ -258,11 +161,11 @@ do touch "${TEMPPAR}" # write signal and background files # (note: training is in splitmode=block) - for arg in $SFIL1 + for arg in $SIGNALTRAINLIST do echo "* SIGNALFILE $arg" >> "${TEMPPAR}" done - for arg in $BFIL1 + for arg in $BACKGROUNDTRAINLIST do echo "* BACKGROUNDFILE $arg" >> "${TEMPPAR}" done @@ -287,8 +190,9 @@ echo "* ENERGYBINS 1 -5. 5. cat "${TEMPPAR}" >> $RFIL.runparameter ############################################################ # setting the cuts in the run parameter file - + sed -i -e "s|MINIMAGES|$NIMAGESMIN|;s|MINIMAGETYPECUT|$TYPECUT|" \ + -e "s|TMVA_RUN_MODE|WriteTrainingEvents|" \ -e 's|ENERGYVARIABLE|ErecS|;s|ENERGYCHI2VARIABLE|EChi2S|g;s|ENERGYDEVARIABLE|dES|g' $RFIL.runparameter FNAM=$LDIR/$FSCRIPT.$DSET.$ARRAY.${OFFMEA[$W]}.AZ${MCAZ}.ID${RECID}.NIMAGES${NIMAGESMIN} diff --git a/analysis/CTA.separateDispTrainingEvndispFiles.sh b/analysis/CTA.separateDispTrainingEvndispFiles.sh new file mode 100755 index 0000000..b0a4cd1 --- /dev/null +++ b/analysis/CTA.separateDispTrainingEvndispFiles.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# +# Prepare disjoint file lists for DispBDT training and subsequent analysis +# +# - randomly selects configured fractions of each particle type for training +# - writes training lists to "/EVNDISP.TRAIN/" +# - writes the complementary analysis lists to "/EVNDISP.ANALYSIS/" + + +if [ ! -n "$2" ] +then + echo "./separateDispTrainingEvndispFiles.sh " + echo + echo "Used for gamma_onSource and gamma_cone only" + echo "(hardwired fraction of files)" + exit +fi + +# Fractions used for train +declare -A LOOKUP=( +# [gamma_cone]=5 # 20% + [gamma_cone]=2 # 50% + [gamma_onSource]=10 # 10% + [proton]=1000 # 0.1% (not used) + [electron]=1000 # 0.1% (not used) +) + +# data set +HDIR="${CTA_USER_DATA_DIR%/}/analysis/AnalysisData/$1" +if [[ ! -d ${HDIR} ]]; then + echo "Error: dataset directory not found" + exit +fi +HYPERARRAY="N.hyperarray" +if [[ $1 == *Paranal* ]]; then + HYPERARRAY="S.hyperarray" +fi + +if [[ ! -e ${2} ]]; then + echo "Error: layout list not found ${2}" + exit +fi + + +# Fill list with all EVNDISP files (randomized) +echo "Filling file lists from Hyperarray" +for MCAZ in 0deg 180deg +do + for P in gamma_cone gamma_onSource proton electron + do + ALL_FILE_LIST=${HDIR}/${HYPERARRAY}/EVNDISP/${P}_${MCAZ}.all.list + rm -f ${ALL_FILE_LIST} + find $HDIR/${HYPERARRAY}/EVNDISP/${P}/ -name "*[_,.]${MCAZ}*.root" | shuf > ${ALL_FILE_LIST} + echo " Found $(wc -l ${ALL_FILE_LIST}) files for ${P} ${MCAZ}" + done +done + +# list of arrays +ALIST=$(cat $2) +for ARRAY in $ALIST +do + echo "Working on layout $ARRAY into ${HDIR}/${ARRAY}" + + # Analysis files + ANADIR="${HDIR}/${ARRAY}/EVNDISP.ANALYSIS" + mkdir -p ${ANADIR} + rm -rf ${ANADIR}/* + # Training files + TRAINDIR="${HDIR}/${ARRAY}/EVNDISP.TRAIN" + mkdir -p ${TRAINDIR} + rm -rf ${TRAINDIR}/* + + for MCAZ in 0deg 180deg + do + for P in gamma_cone gamma_onSource proton electron + do + echo " Filling ${P} for ${MCAZ} with fraction of ${LOOKUP[$P]}" + + ALL_FILE_LIST=${HDIR}/${HYPERARRAY}/EVNDISP/${P}_${MCAZ}.all.list + TRAIN_FILE_LIST=${TRAINDIR}/${P}_${MCAZ}.list + ANA_FILE_LIST=${ANADIR}/${P}_${MCAZ}.list + + awk -v n="${LOOKUP[$P]}" 'NR % n == 0' "$ALL_FILE_LIST" > "$TRAIN_FILE_LIST" + awk -v n="${LOOKUP[$P]}" 'NR % n != 0' "$ALL_FILE_LIST" > "$ANA_FILE_LIST" + echo " Training files: $(wc -l ${TRAIN_FILE_LIST})" + echo " Analysis files: $(wc -l ${ANA_FILE_LIST})" + done + + done +done diff --git a/analysis/condorSubmission.sh b/analysis/condorSubmission.sh index 86a5ba5..fc5e87d 100755 --- a/analysis/condorSubmission.sh +++ b/analysis/condorSubmission.sh @@ -3,7 +3,7 @@ if [ "$1" = "-h" ]; then echo " -UTILITY.condorSubmission.sh [submission script] [memory request] [disk request] +UTILITY.condorSubmission.sh [submission script] [memory request] [disk request] -------------------------------------------------------------------------------- " @@ -11,6 +11,7 @@ exit fi SUBFIL=${1}.condor +NCORE=${5:-1} rm -f ${SUBFIL} echo "JobBatchName = ${1}" > ${SUBFIL} echo "Executable = ${1}" > ${SUBFIL} @@ -20,9 +21,9 @@ fi echo "Log = ${1}.\$(Cluster)_\$(Process).log" >> ${SUBFIL} echo "Output = ${1}.\$(Cluster)_\$(Process).output" >> ${SUBFIL} echo "Error = ${1}.\$(Cluster)_\$(Process).error" >> ${SUBFIL} -echo "Log = ${1}.\$(Cluster)_\$(Process).log" >> ${SUBFIL} echo "request_memory = ${2}" >> ${SUBFIL} echo "request_disk = ${3}" >> ${SUBFIL} +echo "request_cpus = ${NCORE}" >> ${SUBFIL} echo "getenv = True" >> ${SUBFIL} echo "priority = 50" >> ${SUBFIL} # echo "max_materialize = 50" >> ${SUBFIL} diff --git a/analysis/prepareSummaryTrees.sh b/analysis/prepareSummaryTrees.sh index 5656b8c..c83b077 100755 --- a/analysis/prepareSummaryTrees.sh +++ b/analysis/prepareSummaryTrees.sh @@ -4,7 +4,7 @@ # expect following (standard) environmental variables # $EVNDISPSYS # $DATADIR -# +# # dataset used for layout optimisation DDAT="g20210409-20deg" diff --git a/docs/changes/.gitkeep b/docs/changes/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/changes/63.feature.md b/docs/changes/63.feature.md new file mode 100644 index 0000000..802717f --- /dev/null +++ b/docs/changes/63.feature.md @@ -0,0 +1,2 @@ +- Add analysis parameters for Prod6-South Paranal analysis (including layout lists). +- Introduce multi-core HTCondor submission. diff --git a/docs/changes/63.maintenance.md b/docs/changes/63.maintenance.md new file mode 100644 index 0000000..1545e17 --- /dev/null +++ b/docs/changes/63.maintenance.md @@ -0,0 +1 @@ +Change to ROOT version 6.40.00. diff --git a/docs/changes/64.maintenance.md b/docs/changes/64.maintenance.md new file mode 100644 index 0000000..ea44ecd --- /dev/null +++ b/docs/changes/64.maintenance.md @@ -0,0 +1 @@ +Initial setup of Changelog generation using towncrier. diff --git a/docs/changes/65.feature.md b/docs/changes/65.feature.md new file mode 100644 index 0000000..ed55bc7 --- /dev/null +++ b/docs/changes/65.feature.md @@ -0,0 +1 @@ +Introduce usage of high-multiplicity model for XGBoost. diff --git a/docs/changes/66.feature.md b/docs/changes/66.feature.md new file mode 100644 index 0000000..c0a162d --- /dev/null +++ b/docs/changes/66.feature.md @@ -0,0 +1 @@ +Improve efficiency of python scripts: don't run Conda on nodes. diff --git a/docs/changes/67.feature.md b/docs/changes/67.feature.md new file mode 100644 index 0000000..3b3a343 --- /dev/null +++ b/docs/changes/67.feature.md @@ -0,0 +1 @@ +Improve separation of training and test data for TMVA and XGBoost analysis scripts. diff --git a/docs/changes/69.bugfix.md b/docs/changes/69.bugfix.md new file mode 100644 index 0000000..b9207b0 --- /dev/null +++ b/docs/changes/69.bugfix.md @@ -0,0 +1 @@ +Fix minor-medium bugs report by Copilot code review. diff --git a/install/README.md b/install/README.md index 5d5686c..c27228d 100644 --- a/install/README.md +++ b/install/README.md @@ -12,7 +12,7 @@ Usage: ./prepareProductionBinaries.sh ``` -This will install the required binaries and data files into +This will install the required binaries and data files into ``` $CTA_USER_WORK_DIR/analysis/AnalysisData/ ``` diff --git a/install/prepareProductionBinaries.sh b/install/prepareProductionBinaries.sh index 1d7d831..ccfdacd 100755 --- a/install/prepareProductionBinaries.sh +++ b/install/prepareProductionBinaries.sh @@ -8,7 +8,7 @@ if [ $# -lt 1 ]; then echo " -./prepareProductionBinaries.sh +./prepareProductionBinaries.sh will install hessioxx, Eventdisplay analysis files and code @@ -95,10 +95,14 @@ install_hessio() then export HESSIOCFLAGS="-DCTA_PROD4 -DMAXIMUM_TELESCOPES=180 -DWITH_GSL_RNG" EFLAGS="PROD5" - elif [[ $DSET = *"prod6"* ]] + elif [[ $DSET = *"prod6-LaPalma"* ]] then export HESSIOCFLAGS="-DCTA_PROD6_SC -DMAXIMUM_TELESCOPES=120 -DWITH_GSL_RNG" - EFLAGS="PROD6" + EFLAGS="PROD6-North" + elif [[ $DSET = *"prod6-Paranal"* ]] + then + export HESSIOCFLAGS="-DCTA_PROD6 -DMAXIMUM_TELESCOPES=180 -DWITH_GSL_RNG" + EFLAGS="PROD6-South" else echo "unknown production" exit @@ -118,7 +122,7 @@ mkdir -p $CODEDIR || return echo "Software installation into $CODEDIR" echo "Preparing binaries for $DSET" -echo +echo echo "Getting Eventdisplay..." cd $CODEDIR if [[ $VERSION == "main" ]]; then @@ -130,7 +134,7 @@ fi install_hessio cd $EVNDISPSYS -./install_sofa.sh +./install_sofa.sh CI cd ${TDIR} source ../setSoftwarePaths.sh ${DSET} diff --git a/prod1/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST.sh b/prod1/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST.sh index ed17ba8..e450673 100755 --- a/prod1/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST.sh +++ b/prod1/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST.sh @@ -60,4 +60,3 @@ mkdir -p $CTA_USER_LOG_DIR"/analysis/AnalysisData/"$DSET/LOGFILES mv -v -f $OFIL.tar.gz $CTA_USER_LOG_DIR"/analysis/AnalysisData/"$DSET/LOGFILES/ exit - diff --git a/prod1/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod1.sh b/prod1/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod1.sh index 7793ab9..2829cff 100755 --- a/prod1/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod1.sh +++ b/prod1/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod1.sh @@ -71,4 +71,3 @@ mkdir -p $CTA_USER_LOG_DIR"/analysis/AnalysisData/"$DSET/LOGFILES-$LOGF mv -v -f $OFIL.tar.gz $CTA_USER_LOG_DIR"/analysis/AnalysisData/"$DSET/LOGFILES-$LOGF/ exit - diff --git a/prod1/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST.sh b/prod1/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST.sh index 366dd1c..9862ed3 100755 --- a/prod1/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST.sh +++ b/prod1/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST.sh @@ -79,7 +79,7 @@ do # QLOG=$CTA_USER_LOG_DIR/$DATE/EVNDISP/ # mkdir -p $QLOG QLOG="/dev/null" - + echo "submitting $AFIL ($LDIR)" FNAM="$SHELLDIR/E-$DSET-$PART-$ARRAY-$FLL" diff --git a/prod1/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod1.sh b/prod1/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod1.sh index 7cde80c..42adfdb 100755 --- a/prod1/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod1.sh +++ b/prod1/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod1.sh @@ -69,7 +69,7 @@ fi NRUN=`wc -l $RUNLIST | awk '{print $1}'` RUNFROMTO="1-$NRUN" - + ######################################### # output directory for error/output from batch system diff --git a/prod2/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh b/prod2/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh index f21593f..d6aa93b 100755 --- a/prod2/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh +++ b/prod2/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh @@ -141,7 +141,7 @@ do cp -v -f $TMPDIR/$OFIL.root $ODIR/VDST/ fi ls -lh $TMPDIR/*.root -# clean up +# clean up rm -f $TMPDIR/$OFIL.root rm -f $TMPDIR/[0-9]*.root echo "===================================================================" diff --git a/prod2/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod2_merge.sh b/prod2/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod2_merge.sh index f52bb7e..531f35a 100755 --- a/prod2/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod2_merge.sh +++ b/prod2/CTA.EVNDISP.qsub_convert_and_analyse_MC_VDST_ArrayJob.prod2_merge.sh @@ -45,7 +45,7 @@ fi ################################ -# sim_telarray files +# sim_telarray files # string in file 1 IDFIL1="prod2_desert" @@ -92,7 +92,7 @@ echo "COPYING FILES TO $TMPDIR" # check if files are on local disc or on dCache # (note: DESY only, this can not handle mixed lists) z=1 -for F in $IFIL1 $IFIL2 +for F in $IFIL1 $IFIL2 do G=`basename $F` mkdir -p $TMPDIR"/FILE$z" @@ -133,7 +133,7 @@ do # remove spaces N=`echo $N | tr -d ' '` -# start a log file +# start a log file SLOG=$TMPDIR/$OFIL.$N.shell.log touch $SLOG echo "files in $TMPDIR" >> $SLOG @@ -162,7 +162,7 @@ do ls -l $IFIL2 echo $SIMFIL ls -l $SIMFIL - + #################################################################### # execute converter echo "TMPDIR FILES " $SIMFIL >> $SLOG @@ -191,7 +191,7 @@ do fi ls -lh $TMPDIR/*.root >> $SLOG ls -lh $TMPDIR/*.log >> $SLOG -# clean up +# clean up rm -f $TMPDIR/$OFIL.root rm -f $TMPDIR/[0-9]*.root rm -f $SIMFIL diff --git a/prod2/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh b/prod2/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh index 44674b8..8b3669b 100755 --- a/prod2/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh +++ b/prod2/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh @@ -71,8 +71,8 @@ if [ -n $7 ] then QSUBOPT="$7" fi -QSUBOPT=${QSUBOPT//_X_/ } -QSUBOPT=${QSUBOPT//_M_/-} +QSUBOPT=${QSUBOPT//_X_/ } +QSUBOPT=${QSUBOPT//_M_/-} # checking the path for binary if [ -z $EVNDISPSYS ] @@ -175,7 +175,7 @@ do if [[ $NRUN -ne 0 ]] then - qsub $QSUBOPT -t $RUNFROMTO:1 -l h_cpu=47:29:00 -l os=sl6 -l tmpdir_size=40G -l h_rss=4G -V -o $QLOG -e $QLOG "$FNAM.sh" + qsub $QSUBOPT -t $RUNFROMTO:1 -l h_cpu=47:29:00 -l os=sl6 -l tmpdir_size=40G -l h_rss=4G -V -o $QLOG -e $QLOG "$FNAM.sh" fi done diff --git a/prod2/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2_merge.sh b/prod2/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2_merge.sh index 74a0a62..8ccdf46 100755 --- a/prod2/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2_merge.sh +++ b/prod2/CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2_merge.sh @@ -66,8 +66,8 @@ if [ -n $7 ] then QSUBOPT="$7" fi -QSUBOPT=${QSUBOPT//_X_/ } -QSUBOPT=${QSUBOPT//_M_/-} +QSUBOPT=${QSUBOPT//_X_/ } +QSUBOPT=${QSUBOPT//_M_/-} # checking the path for binary if [ -z $EVNDISPSYS ] @@ -166,7 +166,7 @@ do if [[ $NRUN -ne 0 ]] then - qsub $QSUBOPT -t $RUNFROMTO:1 -l h_cpu=47:29:00 -l os=sl6 -l tmpdir_size=40G -l h_rss=4G -V -o $QLOG -e $QLOG "$FNAM.sh" + qsub $QSUBOPT -t $RUNFROMTO:1 -l h_cpu=47:29:00 -l os=sl6 -l tmpdir_size=40G -l h_rss=4G -V -o $QLOG -e $QLOG "$FNAM.sh" fi done diff --git a/prod2/CTA.runProd2Analysis.MCPaper.sh b/prod2/CTA.runProd2Analysis.MCPaper.sh index 9fbb79f..f25249c 100755 --- a/prod2/CTA.runProd2Analysis.MCPaper.sh +++ b/prod2/CTA.runProd2Analysis.MCPaper.sh @@ -7,9 +7,9 @@ ############################################## -if [ $# -ne 2 ] +if [ $# -ne 2 ] then - echo + echo echo "./CTA.runProd2Analysis.sh " echo echo " N=prod2-North, S=prod2-South, SM=prod2-South-merged, P1=prod1, LAYOUT=array layout analysis" @@ -71,8 +71,8 @@ then # Armazones # SITE=( "prod2-Armazones-NS" ) ########################################################## -# 40 deg data sets -# started: +# 40 deg data sets +# started: # SITE=( "prod2-Aar-40deg-NS" "prod2-Leoncito-40deg-NS" "prod2-LeoncitoPP-40deg-NS" ) ########################################################## # NSB data sets @@ -230,7 +230,7 @@ do # standard evndisplay analysis else ./CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh $ARRAY $LIST $N $S$M 0 $i $QSUBOPT $TRG - fi + fi done continue fi @@ -266,7 +266,7 @@ do OOTIME=${OBSTIME[$o]} ########################################## -# loop over all shower directions +# loop over all shower directions # (i.e. North and South) for ((a = 0; a < ${#MCAZ[@]}; a++ )) do @@ -292,11 +292,11 @@ do echo "OBSERVINGTIME_H $OOTIME" >> $PARA echo "GETXOFFYOFFAFTERCUTS yes" >> $PARA ########################################## -# train BDTs +# train BDTs # (note: BDT training does not need to be done for all observing periods) if [[ $RUN == "TRAIN" ]] then - echo "$AZ " + echo "$AZ " ./CTA.TMVA.sub_train.sh $ARRAY $OFFAXIS $S$M $PARA $QSUBOPT $AZ ########################################## # IRFs: angular resolution @@ -337,6 +337,6 @@ do done done done - echo + echo echo "(end of script)" done diff --git a/prod2/CTA.runProd2Analysis.sh b/prod2/CTA.runProd2Analysis.sh index 473a717..3bd3ba2 100755 --- a/prod2/CTA.runProd2Analysis.sh +++ b/prod2/CTA.runProd2Analysis.sh @@ -7,9 +7,9 @@ ############################################## -if [ $# -ne 3 ] +if [ $# -ne 3 ] then - echo + echo echo "./CTA.runProd2Analysis.sh " echo echo " N=prod2-North, S=prod2-South, SM=prod2-South-merged, P1=prod1" @@ -68,7 +68,7 @@ then # data sets with trgmask files # SITE=( "prod2-SAC100-NS" "prod2-Leoncito-NS" ) ########################################################## -# 40 deg data sets +# 40 deg data sets # (change run list directories!) # SITE=( "prod2-Aar-40deg-NS" "prod2-Leoncito-40deg-NS" "prod2-LeoncitoPP-40deg-NS" ) ########################################################## @@ -203,7 +203,7 @@ do # standard evndisplay analysis else ./CTA.EVNDISP.sub_convert_and_analyse_MC_VDST_ArrayJob.prod2.sh $ARRAY $LIST $N $S 0 $i $QSUBOPT $TRG - fi + fi done continue fi @@ -260,11 +260,11 @@ do echo "GETXOFFYOFFAFTERCUTS yes" >> $PARA EFFDIR="/lustre/fs9/group/cta/users/$USER/CTA/analysis/AnalysisData/$S/$EFFDIR/" ########################################## -# train BDTs +# train BDTs # (note: BDT training does not need to be done for all observing periods) if [[ $RUN == "TRAIN" ]] then - echo "$AZ " + echo "$AZ " ./CTA.TMVA.sub_train.sh $ARRAY onSource $S $PARA $QSUBOPT $AZ ./CTA.TMVA.sub_train.sh $ARRAY cone $S $PARA $QSUBOPT $AZ ########################################## @@ -301,6 +301,6 @@ do done done done - echo + echo echo "(end of script)" done diff --git a/prod2/subArray.2NN.list b/prod2/subArray.2NN.list index 25d228e..aa4dfd9 100644 --- a/prod2/subArray.2NN.list +++ b/prod2/subArray.2NN.list @@ -1,5 +1,5 @@ N.2NN N.2Nb -N.2Nd +N.2Nd N.2NN-LST N.2Nf-LST diff --git a/prod3b/README.md b/prod3b/README.md index 2274ca3..fe6a0a7 100644 --- a/prod3b/README.md +++ b/prod3b/README.md @@ -16,7 +16,7 @@ ROOT installed (v6.14 or newer) ## Software installation -Install all necessary software using +Install all necessary software using ./prepareProductionBinaries.sh @@ -33,7 +33,7 @@ d. Eventdisplay software from github ($CTA_USER_DATA_DIR/analysis/AnalysisData/$ ## Analysis -Main analysis script is +Main analysis script is To start the analysis do: @@ -42,7 +42,7 @@ To start the analysis do: source ./setSoftwarePaths.sh prod3b-LaPalma-20degu05b-LL 2. start analysis - + ./CTA.runAnalysis.sh ### Eventdisplay analysis steps @@ -57,5 +57,3 @@ Requires: To run the La Palma analysis, e.g. do: ./CTA.runAnalysis.sh N20deg EVNDISP 0 - - diff --git a/prod3b/subArray.prod3b.South-SCTAlpha.list b/prod3b/subArray.prod3b.South-SCTAlpha.list index 2f0ae5a..d4ac8ac 100644 --- a/prod3b/subArray.prod3b.South-SCTAlpha.list +++ b/prod3b/subArray.prod3b.South-SCTAlpha.list @@ -13,4 +13,4 @@ S.SCTAlpha-v2.M2-14MSTs11SCTs-MSTF S.SCTAlpha-v2.M3-14MSTs11SCTs-MSTF S.SCTAlpha-v2.M4-14MSTs11SCTs-MSTF S.SCTAlpha-v2.C0-25MSTs40SSTs-MSTF -S.SCTAlpha-v2.F2-25SCTs40SSTs \ No newline at end of file +S.SCTAlpha-v2.F2-25SCTs40SSTs diff --git a/prod5/prepare_top4.sh b/prod5/prepare_top4.sh index 01c3263..e327f01 100755 --- a/prod5/prepare_top4.sh +++ b/prod5/prepare_top4.sh @@ -1,7 +1,7 @@ # script to extract certain telescope combinations # from large telescope lists # -# hardwired: +# hardwired: # - OF: output file # - IF: input (large) list # - list of MSTs and SST diff --git a/prod5/subArray.prod5.South-70SSTs-sub.list b/prod5/subArray.prod5.South-70SSTs-sub.list new file mode 100644 index 0000000..26a22ad --- /dev/null +++ b/prod5/subArray.prod5.South-70SSTs-sub.list @@ -0,0 +1,2 @@ +S.50SST.hyperarray +S.37SSTs-50SST.hyperarray diff --git a/prod5/subArray.prod5.South-AlphaC8aj-sub.list b/prod5/subArray.prod5.South-AlphaC8aj-sub.list index 85cfa05..89e70d5 100644 --- a/prod5/subArray.prod5.South-AlphaC8aj-sub.list +++ b/prod5/subArray.prod5.South-AlphaC8aj-sub.list @@ -1,2 +1,3 @@ S.C8aj-37SSTs S.M6-14MSTs-MSTF +S.BL-0LSTs25MSTs00SSTs-MSTF diff --git a/prod5/subArray.prod5.South-Beta-sub.list b/prod5/subArray.prod5.South-Beta-sub.list new file mode 100644 index 0000000..6e4c720 --- /dev/null +++ b/prod5/subArray.prod5.South-Beta-sub.list @@ -0,0 +1,13 @@ +S.BL-0LSTs25MSTs00SSTs-MSTF +S.BL-2LSTsF +S.BL-3LSTs00MSTs00SSTs-MSTF +S.BL-4LSTs00MSTs00SSTs-MSTF +S.C8aj-37SSTs +S.C8aj-42SSTs +S.M6-10MSTs-MSTF +S.M6-14MSTs-MSTF +S.SV2-0LSTs02MSTs00SSTs-MSTF +S.SV3f-0LSTs00MSTs05SSTs-MSTF +S.SV3f-0LSTs00MSTs15SSTs-MSTF +S.SV3f-0LSTs00MSTs25SSTs-MSTF +S.SV3f-0LSTs05MSTs00SSTs-MSTF diff --git a/prod5/subArray.prod5.South-Beta.list b/prod5/subArray.prod5.South-Beta.list new file mode 100644 index 0000000..68e3fac --- /dev/null +++ b/prod5/subArray.prod5.South-Beta.list @@ -0,0 +1,13 @@ +S-M6C8aj-14MSTs37SSTs-MSTF +S-M6C8sv-2LSTs14MSTs37SSTs-MSTF +S-M6C8sv-2LSTs14MSTs42SSTs-MSTF +S.SV2b-0LSTs02MSTs05SSTs-MSTF +S.SV2b-2LSTs02MSTs05SSTs-MSTF +S.SV3f-0LSTs02MSTs15SSTs-MSTF +S.SV3f-0LSTs02MSTs25SSTs-MSTF +S.SV3f-0LSTs05MSTs15SSTs-MSTF +S.SV3f-0LSTs05MSTs25SSTs-MSTF +S.SV3f-2LSTs02MSTs15SSTs-MSTF +S.SV3f-2LSTs05MSTs05SSTs-MSTF +S.SV3f-2LSTs05MSTs15SSTs-MSTF +S.SV3f-2LSTs05MSTs25SSTs-MSTF diff --git a/prod5/subArray.prod5.South-HyperSST.list b/prod5/subArray.prod5.South-HyperSST.list new file mode 100644 index 0000000..e80e071 --- /dev/null +++ b/prod5/subArray.prod5.South-HyperSST.list @@ -0,0 +1 @@ +S.50SST.hyperarray diff --git a/prod5/subArray.prod5.South-SV-5MSTs.list b/prod5/subArray.prod5.South-SV-5MSTs.list new file mode 100644 index 0000000..31f38f7 --- /dev/null +++ b/prod5/subArray.prod5.South-SV-5MSTs.list @@ -0,0 +1,12 @@ +S.SV3a-0LSTs04MSTs18SSTs-MSTF +S.SV3a-0LSTs04MSTs18SSTs-MSTN +S.SV3a-2LSTs05MSTs05SSTs-MSTF +S.SV3b-0LSTs04MSTs18SSTs-MSTF +S.SV3b-0LSTs04MSTs18SSTs-MSTN +S.SV3b-2LSTs05MSTs05SSTs-MSTF +S.SV3c-0LSTs04MSTs18SSTs-MSTF +S.SV3c-0LSTs04MSTs18SSTs-MSTN +S.SV3c-2LSTs05MSTs05SSTs-MSTF +S.SV3d-2LSTs05MSTs05SSTs-MSTF +S.SV3e-2LSTs05MSTs05SSTs-MSTF +S.SV3f-2LSTs05MSTs05SSTs-MSTF diff --git a/prod5/subArray.prod5.South-SV3f-sub.list b/prod5/subArray.prod5.South-SV3f-sub.list new file mode 100644 index 0000000..a7fa096 --- /dev/null +++ b/prod5/subArray.prod5.South-SV3f-sub.list @@ -0,0 +1,5 @@ +S.SV3f-0LSTs00MSTs05SSTs-MSTF +S.SV3f-0LSTs00MSTs15SSTs-MSTF +S.SV3f-0LSTs00MSTs25SSTs-MSTF +S.SV3f-0LSTs02MSTs00SSTs-MSTF +S.SV3f-0LSTs05MSTs00SSTs-MSTF diff --git a/prod5/subArray.prod5.South-SV3f-v2-sub.list b/prod5/subArray.prod5.South-SV3f-v2-sub.list new file mode 100644 index 0000000..39e417c --- /dev/null +++ b/prod5/subArray.prod5.South-SV3f-v2-sub.list @@ -0,0 +1,3 @@ +S.SV3f-0LSTs00MSTs05SSTs-MSTF +S.SV3f-0LSTs00MSTs15SSTs-MSTF +S.SV3f-0LSTs00MSTs25SSTs-MSTF diff --git a/prod5/subArray.prod5.South-SV3f-v2.list b/prod5/subArray.prod5.South-SV3f-v2.list new file mode 100644 index 0000000..4f6a399 --- /dev/null +++ b/prod5/subArray.prod5.South-SV3f-v2.list @@ -0,0 +1,5 @@ +S.SV3f-0LSTs02MSTs15SSTs-MSTF +S.SV3f-0LSTs05MSTs15SSTs-MSTF +S.SV3f-0LSTs02MSTs25SSTs-MSTF +S.SV3f-0LSTs05MSTs25SSTs-MSTF +S.SV3f-2LSTs05MSTs25SSTs-MSTF diff --git a/prod5/subArray.prod5.South-SV3f-v3-sub.list b/prod5/subArray.prod5.South-SV3f-v3-sub.list new file mode 100644 index 0000000..7ef2ac0 --- /dev/null +++ b/prod5/subArray.prod5.South-SV3f-v3-sub.list @@ -0,0 +1 @@ +S.SV3f-0LSTs00MSTs25SSTs-MSTF diff --git a/prod5/subArray.prod5.South-SV3f.list b/prod5/subArray.prod5.South-SV3f.list new file mode 100644 index 0000000..2d9ae5c --- /dev/null +++ b/prod5/subArray.prod5.South-SV3f.list @@ -0,0 +1,8 @@ +S.SV3f-0LSTs02MSTs15SSTs-MSTF +S.SV3f-0LSTs02MSTs25SSTs-MSTF +S.SV3f-0LSTs05MSTs15SSTs-MSTF +S.SV3f-0LSTs05MSTs25SSTs-MSTF +S.SV3f-2LSTs02MSTs15SSTs-MSTF +S.SV3f-2LSTs05MSTs05SSTs-MSTF +S.SV3f-2LSTs05MSTs15SSTs-MSTF +S.SV3f-2LSTs05MSTs25SSTs-MSTF diff --git a/prod6/subArray.prod6.NorthAlpha-sub.list b/prod6/subArray.prod6.NorthAlpha-sub.list index 23bf0e4..49b07d6 100644 --- a/prod6/subArray.prod6.NorthAlpha-sub.list +++ b/prod6/subArray.prod6.NorthAlpha-sub.list @@ -1,2 +1,5 @@ -N.Am-0LSTs09MSTs -N.Am-4LSTs00MSTs +N.Am-0LSTs05MSTs +N.Am-0LSTs03MSTs +N.Am-3LSTs00MSTs +N.Am-2LSTs00MSTs +N.AmTS-0LSTs05MSTs diff --git a/prod6/subArray.prod6.NorthAlpha.list b/prod6/subArray.prod6.NorthAlpha.list index 7a59465..86c30bd 100644 --- a/prod6/subArray.prod6.NorthAlpha.list +++ b/prod6/subArray.prod6.NorthAlpha.list @@ -1,3 +1,6 @@ N.Am-4LSTs09MSTs N.Am-0LSTs09MSTs N.Am-4LSTs00MSTs +N.Am-4LSTs03MSTs +N.Am-4LSTs05MSTs +N.AmTS-4LSTs05MSTs diff --git a/prod6/subArray.prod6.NorthML-sub.list b/prod6/subArray.prod6.NorthML-sub.list new file mode 100644 index 0000000..a61fa6b --- /dev/null +++ b/prod6/subArray.prod6.NorthML-sub.list @@ -0,0 +1,3 @@ +N.Am-3LSTs00MSTs +N.Am-2LSTs00MSTs +N.Am-1LSTs00MSTs diff --git a/prod6/subArray.prod6.NorthML.list b/prod6/subArray.prod6.NorthML.list new file mode 100644 index 0000000..2541eb7 --- /dev/null +++ b/prod6/subArray.prod6.NorthML.list @@ -0,0 +1 @@ +N.Am-4LSTs01MSTs diff --git a/prod6/subArray.prod6.SouthAlpha-sub.list b/prod6/subArray.prod6.SouthAlpha-sub.list index b64eb85..25c4de7 100644 --- a/prod6/subArray.prod6.SouthAlpha-sub.list +++ b/prod6/subArray.prod6.SouthAlpha-sub.list @@ -1,2 +1,14 @@ -S.Am-0LSTs14MSTs00SSTs +S.Am-0LSTs00MSTs05SSTs +S.Am-0LSTs00MSTs13SSTs +S.Am-0LSTs00MSTs15SSTs +S.Am-0LSTs00MSTs22SSTs S.Am-0LSTs00MSTs37SSTs +S.Am-0LSTs00MSTs42SSTs +S.Am-0LSTs02MSTs00SSTs +S.Am-0LSTs05MSTs00SSTs +S.Am-0LSTs10MSTs00SSTs +S.Am-0LSTs11MSTs00SSTs +S.Am-0LSTs14MSTs00SSTs +S.Am-2LSTs00MSTs00SSTs +S.Am-3LSTs00MSTs00SSTs +S.Am-4LSTs00MSTs00SSTs diff --git a/prod6/subArray.prod6.SouthAlpha.list b/prod6/subArray.prod6.SouthAlpha.list index ec7374b..f26f889 100644 --- a/prod6/subArray.prod6.SouthAlpha.list +++ b/prod6/subArray.prod6.SouthAlpha.list @@ -1 +1,18 @@ +S.Am-0LSTs02MSTs05SSTs +S.Am-0LSTs05MSTs13SSTs +S.Am-0LSTs10MSTs13SSTs +S.Am-0LSTs10MSTs15SSTs +S.Am-0LSTs10MSTs22SSTs +S.Am-0LSTs11MSTs37SSTs S.Am-0LSTs14MSTs37SSTs +S.Am-0LSTs14MSTs42SSTs +S.Am-2LSTs02MSTs05SSTs +S.Am-2LSTs05MSTs13SSTs +S.Am-2LSTs10MSTs13SSTs +S.Am-2LSTs10MSTs15SSTs +S.Am-2LSTs10MSTs22SSTs +S.Am-2LSTs11MSTs37SSTs +S.Am-2LSTs14MSTs37SSTs +S.Am-2LSTs14MSTs42SSTs +S.Am-4LSTs14MSTs37SSTs +S.Am-4LSTs14MSTs42SSTs diff --git a/setSoftwarePaths.sh b/setSoftwarePaths.sh index a637c37..e26561e 100755 --- a/setSoftwarePaths.sh +++ b/setSoftwarePaths.sh @@ -20,13 +20,14 @@ fi TDIR=$(pwd) # SL7 export ROOTSYS=/afs/ifh.de/group/cta/cta/software/root/root-6.20.04_build/ -export ROOTSYS=/afs/ifh.de/group/cta/cta/software/root/root_v6.30.02.Linux-almalinux9.3-x86_64-gcc11.4/ +export ROOTSYS=/afs/ifh.de/group/cta/cta/software/root/root_v6.40.00.Linux-almalinux9.7-x86_64-gcc11.5/ # main working directory (logs and code) DSET="${1}" -export WORKDIR="${CTA_USER_WORK_DIR%/}/analysis/AnalysisData/${DSET}" +SUBDIR="analysis/AnalysisData" +export WORKDIR="${CTA_USER_WORK_DIR%/}/${SUBDIR}/${DSET}" # main data results -# export DATADIR="${CTA_USER_DATA_DIR}/analysis/AnalysisData/${DSET}" +# export DATADIR="${CTA_USER_DATA_DIR}/${SUBDIR}/${DSET}" # ROOT installation expected if [[ -z ${ROOTSYS} ]]; then @@ -40,6 +41,7 @@ ROOTCONF=`root-config --libdir` export LD_LIBRARY_PATH=${ROOTCONF} # EVNDISPSYS settings +export EVNDISPSCRIPTS="$TDIR" if [[ -d ${WORKDIR}/code/Eventdisplay/ ]]; then export EVNDISPSYS="${WORKDIR}/code/Eventdisplay/" elif [[ -d ${WORKDIR}/code ]]; then @@ -56,6 +58,7 @@ if [[ -e ${EVNDISPSYS}/hessioxxx ]]; then else export HESSIOSYS=${WORKDIR}/code/hessioxxx fi +#export HESSIOSYS=/cvmfs/sw.cta-observatory.org/software/centos7/gcc83_noOpt/simulations/corsika_simtelarray/2024-02-05/hessioxxx export LD_LIBRARY_PATH=$HESSIOSYS/lib:${LD_LIBRARY_PATH} if [ $VBFSYS ] @@ -64,8 +67,8 @@ then fi export ROOT_INCLUDE_PATH=${EVNDISPSYS}/inc -export CTA_EVNDISP_AUX_DIR=${WORKDIR}/Eventdisplay_AnalysisFiles_CTA/ +export CTA_EVNDISP_AUX_DIR=${WORKDIR}/Eventdisplay_AnalysisFiles_CTA export OBS_EVNDISP_AUX_DIR=${CTA_EVNDISP_AUX_DIR} -export CTA_USER_LOG_DIR="${WORKDIR}/LOGS/" +export CTA_USER_LOG_DIR="${WORKDIR}/LOGS" export SOFASYS=${EVNDISPSYS}/sofa diff --git a/testProduction/README.md b/testProduction/README.md index 9e4023f..965f5b8 100644 --- a/testProduction/README.md +++ b/testProduction/README.md @@ -34,7 +34,7 @@ This script: - counts number of linked mscw_energy files used as input to effective area stage (observe the numbers) - test for existence of effective area output files -e.g., +e.g., ``` ./test-CTA.EFFAREA.sh prod5b-LaPalma-20deg-sq08-LL subArray.prod5b.North-test2.list 0 ``` diff --git a/testProduction/test-CTA.DISPTRAINING.sh b/testProduction/test-CTA.DISPTRAINING.sh index 8700978..960c4ae 100755 --- a/testProduction/test-CTA.DISPTRAINING.sh +++ b/testProduction/test-CTA.DISPTRAINING.sh @@ -21,7 +21,7 @@ BDTS=".T1" # directory with disp results HDIR="$CTA_USER_DATA_DIR/analysis/AnalysisData/${1}/DISPBDT/" if [[ ! -d ${HDIR} ]]; then - echo "ERROR: directory with data not found: ${HDIR}" + echo "ERROR: directory with data not found: ${HDIR}" exit fi @@ -31,8 +31,8 @@ for A in ${ALIST} do echo "Layout $A" DDIR=${HDIR}/BDTdisp.${A}${BDTS} - if [[ ! -d ${DDIR} ]]; then - echo "ERROR: directory with disp data not found: ${DDIR}" + if [[ ! -d ${DDIR} ]]; then + echo "ERROR: directory with disp data not found: ${DDIR}" continue fi for BDT in BDTDisp BDTDispEnergy BDTDispError BDTDispCore diff --git a/testProduction/test-CTA.EFFAREA.sh b/testProduction/test-CTA.EFFAREA.sh index dd6db61..6c3593f 100755 --- a/testProduction/test-CTA.EFFAREA.sh +++ b/testProduction/test-CTA.EFFAREA.sh @@ -31,7 +31,7 @@ NWBINS=6 # (end of) hardwired values ########################################################## -############## +############## # check array function checkArray() { if [[ ! -e ${FILENAME}.root ]] || [[ ! -s ${FILENAME}.root ]]; then @@ -56,7 +56,7 @@ if [[ -e ${FILENAME}.SMALLFILE ]]; then rm -f ${FILENAME}.SMALLFILE else FILELOGSTATUS="FALSE" - FFN=$(basename ${FILENAME}.SMALLFILE) + FFN=$(basename ${FILENAME}.SMALLFILE) DFN=$(dirname ${FILENAME}.SMALLFILE) echo "Error: effective area small file ${FFN} in ${DFN}" fi @@ -84,7 +84,7 @@ do echo "ERROR: directory with input data not found: ${ADIR}" fi # number of MSCW files - for P in gamma_onSource gamma_cone proton electron + for P in gamma_onSource gamma_cone proton electron do NF=$(ls -1 ${ADIR}/${P}*mscw.root | wc -l) if [[ $NF == "0" ]]; then @@ -105,8 +105,8 @@ do #for E in BDT.${OBSTIME}-V3.${EFFDATE} for E in BDT.30m-V3.${EFFDATE} do - #################### - # multiplicity loop + #################### + # multiplicity loop # (tmp: only MST and SST implemented) for M in 2 3 4 5 6 do @@ -174,7 +174,7 @@ do cd ./testProduction fi done - done + done done done done diff --git a/testProduction/test-CTA.EVNDISP.sh b/testProduction/test-CTA.EVNDISP.sh index 1bca11b..4db4fec 100755 --- a/testProduction/test-CTA.EVNDISP.sh +++ b/testProduction/test-CTA.EVNDISP.sh @@ -5,14 +5,14 @@ if [ ! -n "$2" ] then echo "./test-CTA.EVNDISP.sh " - echo + echo echo exit fi HDIR="$CTA_USER_DATA_DIR/analysis/AnalysisData/$1" if [[ ! -d ${HDIR} ]]; then - echo "Error: directory with data not found" + echo "Error: directory with data not found" exit fi diff --git a/testProduction/test-CTA.TMVA.sh b/testProduction/test-CTA.TMVA.sh index 14faac5..70ff09d 100755 --- a/testProduction/test-CTA.TMVA.sh +++ b/testProduction/test-CTA.TMVA.sh @@ -5,7 +5,7 @@ if [ ! -n "$3" ] then echo "test-TMVA.sh " - echo + echo echo " runmodes: EVNDISP" echo exit @@ -13,7 +13,7 @@ fi HDIR="$CTA_USER_DATA_DIR/analysis/AnalysisData/$1" if [[ ! -d ${HDIR} ]]; then - echo "Error: directory with data not found" + echo "Error: directory with data not found" exit fi diff --git a/towncrier.toml b/towncrier.toml new file mode 100644 index 0000000..3c0918b --- /dev/null +++ b/towncrier.toml @@ -0,0 +1,24 @@ +[tool.towncrier] +name = "Eventdisplay_AnalysisScripts_CTA" +directory = "docs/changes" +filename = "CHANGELOG.md" +underlines = [ "", "", "" ] +title_format = "## [{version}](https://github.com/Eventdisplay/Eventdisplay_AnalysisScripts_CTA/releases/tag/{version}) - {project_date}" +issue_format = "[#{issue}](https://github.com/Eventdisplay/Eventdisplay_AnalysisScripts_CTA/issues/{issue})" +start_string = "\n" + +[tool.towncrier.fragment.feature] +name = "New Feature" +showcontent = true + +[tool.towncrier.fragment.bugfix] +name = "Bugfixes" +showcontent = true + +[tool.towncrier.fragment.maintenance] +name = "Maintenance" +showcontent = true + +[tool.towncrier.fragment.doc] +name = "Documentation" +showcontent = true diff --git a/utilities/CTA.DISPTRAINING.trainingOptions.sh b/utilities/CTA.DISPTRAINING.trainingOptions.sh index e527d71..57d20a4 100755 --- a/utilities/CTA.DISPTRAINING.trainingOptions.sh +++ b/utilities/CTA.DISPTRAINING.trainingOptions.sh @@ -1,10 +1,10 @@ #!/bin/sh # -# simple script to step through different TMVA training options +# simple script to step through different TMVA training options # -# can be used with the run scripts +# can be used with the run scripts # analysis/CTA.DISPTRAINING.sub_analyse.sh -# will step through all parameter lines +# will step through all parameter lines # # NTrees=2000:BoostType=Grad:IgnoreNegWeightsInTraining:Shrinkage=0.1:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=20:MaxDepth=6:PruneMethod=ExpectedError @@ -60,4 +60,3 @@ do done done done - diff --git a/utilities/linkEvndispProductionToProductionDirectory.sh b/utilities/linkEvndispProductionToProductionDirectory.sh new file mode 100755 index 0000000..6e577bf --- /dev/null +++ b/utilities/linkEvndispProductionToProductionDirectory.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# +# link EVNDISP directories in analysis directories to +# production directory +# +if [ ! -n "$2" ] +then + echo "./linkEvndispProductionToProductionDirectory.sh " + echo + exit +fi + +# File lists +HDIR="$1" +if [[ ! -d ${HDIR} ]]; then + echo "Error: directory with file lists set not found" + exit +fi +TDIR="$CTA_USER_DATA_DIR/analysis/AnalysisData/${2}" +mkdir -p ${TDIR} + +if [[ $(basename $HDIR) == *"LaPalma"* ]]; then + A="N.hyperarray" +else + A="S.hyperarray" +fi + + +mkdir -p ${TDIR}/${A}/EVNDISP + +for P in gamma_cone gamma_onSource proton electron +do + rm -f ${TDIR}/${A}/EVNDISP/${P}/* + mkdir -p ${TDIR}/${A}/EVNDISP/${P} + echo "${TDIR}/${A}/EVNDISP/${P}" + FLIST=${HDIR}/${P}.list + FILES=$(cat $FLIST) + for F in $FILES; do + ln -s $F ${TDIR}/${A}/EVNDISP/${P}/$(basename $F) + done +done diff --git a/utilities/linkEvndispProduction.sh b/utilities/linkEvndispProductiontoHyperArrayDirectory.sh similarity index 80% rename from utilities/linkEvndispProduction.sh rename to utilities/linkEvndispProductiontoHyperArrayDirectory.sh index 85f9204..0e90f8f 100755 --- a/utilities/linkEvndispProduction.sh +++ b/utilities/linkEvndispProductiontoHyperArrayDirectory.sh @@ -1,22 +1,22 @@ #!/bin/bash # # link EVNDISP directories in analysis directories to -# general EVNDISP production +# general EVNDISP production # (assume that EVNDISP is stable in the development) # if [ ! -n "$4" ] then - echo "./linkEvndispProduction.sh " + echo "./linkEvndispProductiontoHyperArrayDirectory.sh " echo echo " North or South" - echo + echo exit fi # EVNDISP directory HDIR="$CTA_USER_DATA_DIR/analysis/AnalysisData/$1" if [[ ! -d ${HDIR} ]]; then - echo "Error: directory with EVNDISP data set not found" + echo "Error: directory with EVNDISP data set not found" exit fi TDIR="$CTA_USER_DATA_DIR/analysis/AnalysisData/${2}" diff --git a/utilities/linkEvndispProductiontoHyperArrayDirectory_all.sh b/utilities/linkEvndispProductiontoHyperArrayDirectory_all.sh new file mode 100755 index 0000000..38a8ccd --- /dev/null +++ b/utilities/linkEvndispProductiontoHyperArrayDirectory_all.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Link EVNDISP production directories to the hyperarray directories for all data sets in the list +# +# Note that many hardwired parameters. + +LIST="../prod6/subArray.prod6.SouthAlpha.list" +LIST="../prod6/subArray.prod6.SouthAlpha-sub.list" +DSET="prod6-LaPalma-ZEdeg-NSB-sq51-LL" +DSET="prod6-Paranal-ZEdeg-NSB-sq20-LL" + +for Z in 20 40 52 60; do + for N in moon; do + FSET=${DSET/ZE/$Z} + FSET=${FSET/NSB/$N} + echo $FSET + ./linkEvndispProductiontoHyperArrayDirectory.sh $FSET $FSET $LIST South + done + done diff --git a/utilities/pack_public_IRFs.sh b/utilities/pack_public_IRFs.sh index b95109b..a917f68 100755 --- a/utilities/pack_public_IRFs.sh +++ b/utilities/pack_public_IRFs.sh @@ -65,7 +65,7 @@ do for O in ${OBS} do if [[ ${O} == "50h" ]] || [[ ${O} == "5h" ]] ; then - # full arrays + # full arrays if [[ ${SITE} == "South" ]]; then M="NIM3LST3MST3SST4SCMST3" else diff --git a/utilities/prepareAnalysis.sh b/utilities/prepareAnalysis.sh index ad40488..c37fdc5 100755 --- a/utilities/prepareAnalysis.sh +++ b/utilities/prepareAnalysis.sh @@ -94,4 +94,3 @@ linksubarrays ( install ) - diff --git a/utilities/removeUnreaseonablePhysFiles.sh b/utilities/removeUnreaseonablePhysFiles.sh deleted file mode 100755 index 322b90d..0000000 --- a/utilities/removeUnreaseonablePhysFiles.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -# -# remove PHYS files which are definitely empty -# e.g., LST4 for 2LST arrays -# -if [ ! -n "$1" ] -then - echo "./removeUnreaseonablePhysFiles.sh " - echo - echo - exit -fi - -if [[ ! -d "$1" ]]; then - echo "Error, directory does not exist" - exit -fi - -#OPTION="-delete -print" -OPTION="-print -delete" - -# North SV arrays -find "$1" -name "*LST3MST*2LSTs00MSTs-*" ${OPTION} -find "$1" -name "*NIM4LST4*2LSTs00MSTs-*" ${OPTION} -find "$1" -name "*NIM4LST4*3LSTs00MSTs-*" ${OPTION} -find "$1" -name "*NIM3LST4*3LSTs00MSTs-*" ${OPTION} -find "$1" -name "*NIM4LST4MST4*0LSTs03MSTs-*" ${OPTION} -find "$1" -name "*MST3*0LSTs02MSTs-MSTN*" ${OPTION} -find "$1" -name "*MST4*0LSTs02MSTs-MSTN*" ${OPTION} -find "$1" -name "*MST4*0LSTs03MSTs-MSTN*" ${OPTION} -find "$1" -name "*LST4*2LSTs00MSTs-MSTN*" ${OPTION} -find "$1" -name "*LST4*2LSTs00MSTs-MSTN*" ${OPTION} - - -################## -# South LST arrays -# 4 LSTs -find "$1" -name "*NIM5LST5*4LSTs00MSTs00SSTs-*" ${OPTION} -find "$1" -name "*NIM6LST6*4LSTs00MSTs00SSTs-*" ${OPTION} -# 3 LSTs -find "$1" -name "*NIM5LST5*3LSTs00MSTs00SSTs-*" ${OPTION} -find "$1" -name "*NIM6LST6*3LSTs00MSTs00SSTs-*" ${OPTION} -find "$1" -name "*NIM4LST4*3LSTs00MSTs00SSTs-*" ${OPTION} -find "$1" -name "*NIM4LST4*S.BL-3LSTs[A-Z]*" ${OPTION} -# 2 LSTs -find "$1" -name "*NIM5LST5*2LSTs00MSTs00SSTs-*" ${OPTION} -find "$1" -name "*NIM6LST6*2LSTs00MSTs00SSTs-*" ${OPTION} -find "$1" -name "*NIM4LST4*2LSTs00MSTs00SSTs-*" ${OPTION} -find "$1" -name "*NIM3LST3*2LSTs00MSTs00SSTs-*" ${OPTION} -find "$1" -name "*NIM3LST3*S.BL-2LSTs[A-Z]*" ${OPTION} -find "$1" -name "*NIM4LST4*S.BL-2LSTs[A-Z]*" ${OPTION} -find "$1" -name "*NIM[3-9]LST*S-*2LSTs*" ${OPTION} -find "$1" -name "*NIM[4-9]LST*S-*3LSTs*" ${OPTION} -find "$1" -name "*NIM[5-9]LST*S-*4LSTs*" ${OPTION} -# 2LST 3 MSTs -find "$1" -name "*NIM3LST3MST3*2LSTs03MSTs*" ${OPTION} -find "$1" -name "*NIM4LST4MST4*2LSTs03MSTs*" ${OPTION} - -# South SST arrays only -find "$1" -name "*ID2*-[0-9][0-9]SSTs.*" ${OPTION} - -# South MST arrays only -find "$1" -name "*ID3*-[0-9][0-9]MSTs-MSTF*" ${OPTION} - -# North LST arrays -find "$1" -name "*NIM4LST4*3LSTs00MSTs-*" ${OPTION} -find "$1" -name "*NIM4LST4*2LSTs00MSTs-*" ${OPTION} -find "$1" -name "*NIM3LST3*2LSTs00MSTs-*" ${OPTION} - diff --git a/utilities/removeUnreasonablePhysFiles.sh b/utilities/removeUnreasonablePhysFiles.sh new file mode 100755 index 0000000..010f43c --- /dev/null +++ b/utilities/removeUnreasonablePhysFiles.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# +# remove PHYS files which are definitely empty +# e.g., LST4 for 2LST arrays +# +if [ ! -n "$1" ] +then + echo "./removeUnreasonablePhysFiles.sh " + echo + echo + exit +fi + +if [[ ! -d "$1" ]]; then + echo "Error, directory does not exist: $1" + exit +fi + +OPTION="-print -delete" + +# North SV arrays +find "$1" -name "*LST3MST*2LSTs00MSTs-*" ${OPTION} +find "$1" -name "*NIM3LST3*2LSTs00MSTs-*" ${OPTION} +find "$1" -name "*NIM4LST4*2LSTs00MSTs-*" ${OPTION} +find "$1" -name "*NIM4LST4*3LSTs00MSTs-*" ${OPTION} +find "$1" -name "*NIM3LST4*3LSTs00MSTs-*" ${OPTION} +find "$1" -name "*NIM4LST4MST4*0LSTs03MSTs.*" ${OPTION} +find "$1" -name "*NIM4LST4MST4*3LSTs00MSTs.*" ${OPTION} +find "$1" -name "*NIM4LST4MST4*2LSTs00MSTs.*" ${OPTION} +find "$1" -name "*NIM3LST3MST3*2LSTs00MSTs.*" ${OPTION} +find "$1" -name "*MST3*0LSTs02MSTs-MSTN*" ${OPTION} +find "$1" -name "*MST4*0LSTs02MSTs-MSTN*" ${OPTION} +find "$1" -name "*MST4*0LSTs03MSTs-MSTN*" ${OPTION} +find "$1" -name "*LST4*2LSTs00MSTs-MSTN*" ${OPTION} +find "$1" -name "*LST4*2LSTs00MSTs-MSTN*" ${OPTION} +find "$1" -name "*MST4*LSTs03MSTs*" ${OPTION} + + +################## +# South LST arrays +# 4 LSTs +find "$1" -name "*NIM5LST5*4LSTs00MSTs00SSTs*" ${OPTION} +find "$1" -name "*NIM6LST6*4LSTs00MSTs00SSTs*" ${OPTION} +# 3 LSTs +find "$1" -name "*NIM5LST5*3LSTs00MSTs00SSTs*" ${OPTION} +find "$1" -name "*NIM6LST6*3LSTs00MSTs00SSTs*" ${OPTION} +find "$1" -name "*NIM4LST4*3LSTs00MSTs00SSTs*" ${OPTION} +find "$1" -name "*NIM4LST4*S.BL-3LSTs[A-Z]*" ${OPTION} +# 2 LSTs +find "$1" -name "*NIM5LST5*2LSTs00MSTs00SSTs*" ${OPTION} +find "$1" -name "*NIM6LST6*2LSTs00MSTs00SSTs*" ${OPTION} +find "$1" -name "*NIM4LST4*2LSTs00MSTs00SSTs*" ${OPTION} +find "$1" -name "*NIM3LST3*2LSTs00MSTs00SSTs*" ${OPTION} +find "$1" -name "*NIM3LST3*S.BL-2LSTs[A-Z]*" ${OPTION} +find "$1" -name "*NIM4LST4*S.BL-2LSTs[A-Z]*" ${OPTION} +find "$1" -name "*NIM4LST4*-2LSTs*" ${OPTION} +find "$1" -name "*NIM3LST3*-2LSTs*" ${OPTION} +find "$1" -name "*NIM[3-9]LST*S-*2LSTs*" ${OPTION} +find "$1" -name "*NIM[4-9]LST*S-*3LSTs*" ${OPTION} +find "$1" -name "*NIM[5-9]LST*S-*4LSTs*" ${OPTION} +# 2LST 3 MSTs +find "$1" -name "*NIM3LST3MST3*2LSTs03MSTs*" ${OPTION} +find "$1" -name "*NIM4LST4MST4*2LSTs03MSTs*" ${OPTION} +# 2 MSTs +find "$1" -name "*NIM3LST3MST3*0LSTs02MSTs*" ${OPTION} +find "$1" -name "*NIM3*MST4*0LSTs02MST*" ${OPTION} +find "$1" -name "*NIM4LST4MST4*02MST*" ${OPTION} +find "$1" -name "*NIM4LST4MST4*0LSTs02MSTs00SSTs*" ${OPTION} +find "$1" -name "*ID0NIM2LST2MST3*02MSTs*" ${OPTION} +# 3 MSTs +find "$1" -name "*NIM4LST4MST4*0LSTs03MSTs00SSTs*" ${OPTION} + +# South SST arrays only +find "$1" -name "*ID2*-[0-9][0-9]SSTs.*" ${OPTION} + +# South MST arrays only +find "$1" -name "*ID3*-[0-9][0-9]MSTs-MSTF*" ${OPTION} + +# NIM6 +find "$1" -name "*ID0*NIM6*0LSTs05MSTs00SSTs*" ${OPTION} +find "$1" -name "*ID0*NIM6*0LSTs02MSTs00SSTs*" ${OPTION} +find "$1" -name "*ID0*NIM5*0LSTs02MSTs00SSTs*" ${OPTION} +find "$1" -name "*ID0*NIM6*0LSTs00MSTs05SSTs*" ${OPTION} +find "$1" -name "*ID0*NIM6*0LSTs02MSTs05SSTs*" ${OPTION} +find "$1" -name "*ID0*NIM6*2LSTs02MSTs05SSTs*" ${OPTION} +find "$1" -name "*ID0*NIM1*" ${OPTION} + +# North LST arrays +find "$1" -name "*NIM4LST4*3LSTs00MSTs-*" ${OPTION} +find "$1" -name "*NIM4LST4*2LSTs00MSTs-*" ${OPTION} +find "$1" -name "*NIM3LST3*2LSTs00MSTs-*" ${OPTION} diff --git a/utilities/submit_scripts_to_htcondor.sh b/utilities/submit_scripts_to_htcondor.sh index 8870262..742be55 100755 --- a/utilities/submit_scripts_to_htcondor.sh +++ b/utilities/submit_scripts_to_htcondor.sh @@ -12,37 +12,46 @@ set -e if [ $# -lt 1 ] then echo " - ./submit_scripts_to_htcondor.sh + ./submit_scripts_to_htcondor.sh [submit/nosubmit] [priority] " exit fi JDIR=${1} - -SUBMITF=${1}/submit.txt -rm -f ${SUBMITF} -touch ${SUBMITF} - -echo "Writing HTCondor job submission file ${SUBMITF}" - -echo "executable = \$(file)" >> ${SUBMITF} -echo "log = \$(file).log" >> ${SUBMITF} -echo "output = \$(file).output" >> ${SUBMITF} -echo "error = \$(file).error" >> ${SUBMITF} - -# assume that all condor files have similar requests -CONDORFILE=$(find ${JDIR} -name "*.condor" | head -n 1) -echo "$(grep -h request_memory $CONDORFILE)" >> ${SUBMITF} -echo "$(grep -h request_disk $CONDORFILE)" >> ${SUBMITF} -echo "getenv = True" >> ${SUBMITF} -echo "max_materialize = 5000" >> ${SUBMITF} -echo "priority = 50" >> ${SUBMITF} -echo "queue file matching files *.sh" >> ${SUBMITF} - -PDIR=$(pwd) -if [[ ${2} == "submit" ]]; then - cd ${JDIR} - condor_submit submit.txt requirements='OpSysAndVer=="AlmaLinux9"' - cd ${PDIR} +PRIORITY="${3:-1}" +SUBMITF=${JDIR}/submit.txt + +echo "Writing HTCondor job submission file ${SUBMITF} (job priority $PRIORITY) for ${JDIR}" +if find "${JDIR}" -name "*.condor" -print -quit | grep -q .; then + rm -f ${SUBMITF} + touch ${SUBMITF} + + mkdir -p ${JDIR}/log + mkdir -p ${JDIR}/output + mkdir -p ${JDIR}/error + + echo "executable = \$(file)" >> ${SUBMITF} + echo "log = log/\$(file).log" >> ${SUBMITF} + echo "output = output/\$(file).output" >> ${SUBMITF} + echo "error = error/\$(file).error" >> ${SUBMITF} + + # assume that all condor files have similar requests + CONDORFILE=$(find ${JDIR} -name "*.condor" | head -n 1) + echo "$(grep -h request_memory $CONDORFILE)" >> ${SUBMITF} + echo "$(grep -h request_disk $CONDORFILE)" >> ${SUBMITF} + echo "$(grep -h request_cpus $CONDORFILE)" >> ${SUBMITF} + echo "getenv = True" >> ${SUBMITF} + echo "max_materialize = 1800" >> ${SUBMITF} + echo "priority = $PRIORITY" >> ${SUBMITF} + echo "queue file matching files *.sh" >> ${SUBMITF} + + PDIR=$(pwd) + if [[ ${2} == "submit" ]]; then + cd ${JDIR} + condor_submit submit.txt requirements='OpSysAndVer=="AlmaLinux9"' + cd ${PDIR} + fi +else + echo "Error: no condor files found in ${JDIR}" fi