from os.path import join

CHROMOSOMES     = [str(i) for i in range(1, 23)]
PLINK_FORMATS   = ['bed', 'bim', 'fam']
PLINK_FORMATS_EXT   = ['bed', 'bim', 'fam', 'nosex']

REF_DIR            = config["ref_dir"]
LIFT_CHAIN         = join(REF_DIR, config["reference"]["lift_chain"]["file"])
GRCH37_FASTA       = join(REF_DIR, config["reference"]["GRCh37_fasta"]["file"])
IS_CLIENT          = config["mode"] == "client"
USE_SIMULATED_IBD  = config["use_simulated_ibd"] if "use_simulated_ibd" in config else False
GENETIC_MAP        = join(REF_DIR, config["reference"]["GENETIC_MAP"]["file"])
GENETIC_MAP_GRCH37 =join(REF_DIR, config["reference"]["genetic_map_GRCh37"]["file"])
REF_VCF            = join(REF_DIR, config["reference"]["vcfRef"]["file"])
REF_HAPS           = join(REF_DIR, config["reference"]["refHaps"]["file"])
CMMAP              = join(REF_DIR, config["reference"]["cmmap"]["file"])
SITE_1000GENOME    = join(REF_DIR, config["reference"]["SITE_1000GENOME"]["file"])

assembly = config['assembly']
need_remove_imputation = config['remove_imputation']
# WARNING: if need_imputation=True, then need_phase should also be true
need_phase = config['phase']
need_imputation = config['impute']


_IDEAL_LARGE_MEM_GB = 20

# if available, return the ideal pretty-large amount for most jobs (20GB)
def _mem_gb_for_ram_hungry_jobs():
  return min(_IDEAL_LARGE_MEM_GB, config["mem_gb"])


rule all:
    input:
        vcf="preprocessed/data.vcf.gz",
        bed="preprocessed/data.bed",
        bim="preprocessed/data.bim",
        fam="preprocessed/data.fam",
        fam_mapped="preprocessed/data_mapped.bim"
        # rules.report_benchmark_summary.output


include: "../../rules/preprocessing.smk"