from os.path import join

REF_DIR         = config["ref_dir"]
GRCH37_FASTA    = join(REF_DIR, config["reference"]["GRCh37_fasta"])
GENETIC_MAP     = join(REF_DIR, config["reference"]["GENETIC_MAP"])
GENETIC_MAP_GRCH37 = join(REF_DIR, config["reference"]["genetic_map_GRCh37"])
vcfRef          = join(REF_DIR, config["reference"]["vcfRef"])
refHaps         = join(REF_DIR, config["reference"]["refHaps"])
lift_chain      = join(REF_DIR, config["reference"]["lift_chain"])
cmmap           = join(REF_DIR, config["reference"]["cmmap"])
SITE_1000GENOME = join(REF_DIR, config["reference"]["SITE_1000GENOME"])
hapmap_fam      = join(REF_DIR, config["reference"]["hapmap_fam"])
AFFYMETRIX_CHIP = join(REF_DIR, config["reference"]["affymetrix_chip"])
pedsim_map      = join(REF_DIR, config["reference"]["pedsim_map"])
flow            = config["flow"]

PLINK_FORMATS   = ['bed', 'bim', 'fam']

CHROMOSOMES     = [str(i) for i in range(1, 23)]

assembly = config['assembly']
need_phase = config['phase']
need_imputation = config['impute']
need_remove_imputation = config['remove_imputation']

_IDEAL_LARGE_MEM_GB = 20

# if available, return the ideal pretty-large amount for most jobs (20GB)
def _mem_gb_for_ram_hungry_jobs():
  return min(_IDEAL_LARGE_MEM_GB, config["mem_gb"])


rule all:
    input:
        "results/relatives.tsv",
        "results/accuracy.png"


rule postprocess:
    input:
        vcf=AFFYMETRIX_CHIP,
        fam=hapmap_fam
    output:
        kin='kinship/reheaded_data.kinship',
        vcf='input.vcf.gz',
        iid_vcf=temp('input_iid.vcf.gz'),
        samples='input.samples',
        fam='kinship/reheaded_data.fam'
    conda:
        "envs/hapmap.yaml"
    script:
        "scripts/hapmap.py"

include: "../../rules/preprocessing.smk"

if flow == 'ibis':
    include: "../../rules/relatives_ibis.smk"
elif flow == 'germline':
    include: "../../rules/relatives.smk"
elif flow == 'ibis_king':
    include: "../../rules/relatives_ibis_king.smk"

rule evaluate_accuracy:
    input:
        rel='results/relatives.tsv',
        fam=rules.postprocess.output['fam']
    output:
        accuracy='results/accuracy.png',
        pr='results/precision_recall.png',
        conf_matrix='results/confusion_matrix.png',
        updated_rel='results/updated_relatives.tsv',
        pedigree_plot='results/pedigree_plot.png'
    params:
        source='ersa' if flow == 'ibis' else 'both',
        po_fs_plot='results/po_fs_plot.png' if flow == 'ibis' else None
    log:
        "logs/evaluation/accuracy.log"
    conda:
        "../../envs/evaluation.yaml"
    script:
         '../../scripts/evaluate.py'

'''
rule evaluate_accuracy:
    input:
        rel=rules.merge_king_ersa.output[0],
        fam=rules.postprocess.output['fam']
    output:
        'results/accuracy.png'
    script:
        '../../scripts/evaluate.py'
'''

