diff --git a/pipeline/UniProt2Reactome_All_Levels.txt.gz b/pipeline/UniProt2Reactome_All_Levels.txt.gz
deleted file mode 100644
index f8ab478..0000000
Binary files a/pipeline/UniProt2Reactome_All_Levels.txt.gz and /dev/null differ
diff --git a/pipeline/merge_data.py b/pipeline/merge_data.py
index 1094a13..fe3e338 100755
--- a/pipeline/merge_data.py
+++ b/pipeline/merge_data.py
@@ -20,44 +20,45 @@
 indir = args.indir # directory with outputs from pull_data.sh
 outdir = args.outdir # (default is '.') directory where outputs from this will go
 flybase = args.flybase # (default if 'NA') FB for Flybase and DME Reactome annotations, NA for none
-orthologs = args.orthologs # $3/orthofinder/Orthologues_"$noext"-cluster/"$noext"-cluster__v__dromel-cluster.tsv from pathannotator.sh script
+orthologs = args.orthologs # $outdir/orthofinder/Orthologues_"$noext"-cluster/"$noext"-cluster__v__dromel-cluster.tsv from pathannotator.sh script
 outbase = args.outbase # file basename for output files suppliec to the pathannotator.sh wrapper script
 
 pd.set_option('display.max_columns', None)
 
 #READ API TABLES INTO PANDAS DATAFRAMES
 if kofam == "no" and species != "NA":
-    ncbi_ver = pd.read_table(f"{indir}/ncbiver.tsv", dtype=str)
-    ncbi_spec = pd.read_table(f"{indir}/conv_ncbi-proteinid_{species}.tsv", dtype=str)
-    spec_ko = pd.read_table(f"{indir}/link_{species}_ko.tsv", dtype=str)
-    spec_pathway = pd.read_table(f"{indir}/link_pathway_{species}.tsv", dtype=str)
-    list_pathway_spec = pd.read_table(f"{indir}/list_pathway_{species}.tsv", dtype=str)
-    ko_pathway = pd.read_table(f"{indir}/link_ko_pathway.tsv", dtype=str)
-    pathway = pd.read_table(f"{indir}/list_pathway.tsv", dtype=str)
+    if os.path.exists(f"{indir}/ko_ncbi.tsv") and os.path.getsize(f"{indir}/ko_ncbi.tsv") > 0:
+        ncbi_ver = pd.read_table(f"{indir}/ncbiver.tsv", dtype=str)
+        ncbi_spec = pd.read_table(f"{indir}/conv_ncbi-proteinid_{species}.tsv", dtype=str)
+        spec_ko = pd.read_table(f"{indir}/link_{species}_ko.tsv", dtype=str)
+        spec_pathway = pd.read_table(f"{indir}/link_pathway_{species}.tsv", dtype=str)
+        list_pathway_spec = pd.read_table(f"{indir}/list_pathway_{species}.tsv", dtype=str)
+        ko_pathway = pd.read_table(f"{indir}/link_ko_pathway.tsv", dtype=str)
+        pathway = pd.read_table(f"{indir}/list_pathway.tsv", dtype=str)
 #ADD HEADERS TO DATAFRAME COLUMNS
-    ncbi_ver.columns = ['Input_protein_ID_version', 'Input_protein_ID']
-    ncbi_spec.columns = ['KEGG_genes_ID', 'Input_protein_ID']
-    spec_ko.columns = ['KEGG_KO', 'KEGG_genes_ID']
-    ko_pathway.columns = ['KEGG_ref_pathway', 'KEGG_KO']
-    pathway.columns = ['KEGG_ref_pathway', 'KEGG_ref_pathway_name']
-    spec_pathway.columns = ['KEGG_genes_ID', f"KEGG_{species}_pathway"]
-    list_pathway_spec.columns = [f"KEGG_{species}_pathway", f"KEGG_{species}_pathway_name"]
+        ncbi_ver.columns = ['Input_protein_ID_version', 'Input_protein_ID']
+        ncbi_spec.columns = ['KEGG_genes_ID', 'Input_protein_ID']
+        spec_ko.columns = ['KEGG_KO', 'KEGG_genes_ID']
+        ko_pathway.columns = ['KEGG_ref_pathway', 'KEGG_KO']
+        pathway.columns = ['KEGG_ref_pathway', 'KEGG_ref_pathway_name']
+        spec_pathway.columns = ['KEGG_genes_ID', f"KEGG_{species}_pathway"]
+        list_pathway_spec.columns = [f"KEGG_{species}_pathway", f"KEGG_{species}_pathway_name"]
 #MERGE DATAFRAMES INTO ONE FOR REFERENCE PATHWAYS
-    ncbi_ver_spec = pd.merge(ncbi_ver, ncbi_spec, on='Input_protein_ID', how='inner')
-    ncbi_ver_spec_ko = pd.merge(ncbi_ver_spec, spec_ko, on='KEGG_genes_ID', how='inner')
-    ncbi_ver_spec_ko_pathway = pd.merge(ncbi_ver_spec_ko, ko_pathway, on='KEGG_KO', how='inner')
-    ncbi_ver_spec_ko_pathway_pathname = pd.merge(ncbi_ver_spec_ko_pathway, pathway, on='KEGG_ref_pathway', how='left')
-    ncbi_ver_spec_ko_pathway_pathname.drop(['Input_protein_ID', 'KEGG_genes_ID'], axis=1, inplace=True)
-    ncbi_ver_spec_ko_pathway_pathname.rename(columns={'Input_protein_ID_version': 'Input_protein_ID'}, inplace=True)
-    ncbi_ver_spec_ko_pathway_pathname = ncbi_ver_spec_ko_pathway_pathname.drop_duplicates()
-    ncbi_ver_spec_ko_pathway_pathname.to_csv(f"{outdir}/{outbase}_KEGG_ref.tsv", sep='\t', index=False)
+        ncbi_ver_spec = pd.merge(ncbi_ver, ncbi_spec, on='Input_protein_ID', how='inner')
+        ncbi_ver_spec_ko = pd.merge(ncbi_ver_spec, spec_ko, on='KEGG_genes_ID', how='inner')
+        ncbi_ver_spec_ko_pathway = pd.merge(ncbi_ver_spec_ko, ko_pathway, on='KEGG_KO', how='inner')
+        ncbi_ver_spec_ko_pathway_pathname = pd.merge(ncbi_ver_spec_ko_pathway, pathway, on='KEGG_ref_pathway', how='left')
+        ncbi_ver_spec_ko_pathway_pathname.drop(['Input_protein_ID', 'KEGG_genes_ID'], axis=1, inplace=True)
+        ncbi_ver_spec_ko_pathway_pathname.rename(columns={'Input_protein_ID_version': 'Input_protein_ID'}, inplace=True)
+        ncbi_ver_spec_ko_pathway_pathname = ncbi_ver_spec_ko_pathway_pathname.drop_duplicates()
+        ncbi_ver_spec_ko_pathway_pathname.to_csv(f"{outdir}/{outbase}_KEGG_ref.tsv", sep='\t', index=False)
 #MERGE DATAFRAMES INTO ONE FOR { species } PATHWAYS
-    ncbi_ver_spec_ko_specpath = pd.merge(ncbi_ver_spec_ko, spec_pathway, on='KEGG_genes_ID', how='inner')
-    ncbi_ver_spec_ko_specpath_specpathname = pd.merge(ncbi_ver_spec_ko_specpath, list_pathway_spec, on=f"KEGG_{species}_pathway", how='left')
-    ncbi_ver_spec_ko_specpath_specpathname.drop(['Input_protein_ID', 'KEGG_genes_ID'], axis=1, inplace=True)
-    ncbi_ver_spec_ko_specpath_specpathname.rename(columns={'Input_protein_ID_version': 'Input_protein_ID'}, inplace=True)
-    ncbi_ver_spec_ko_specpath_specpathname = ncbi_ver_spec_ko_specpath_specpathname.drop_duplicates()
-    ncbi_ver_spec_ko_specpath_specpathname.to_csv(f"{outdir}/{outbase}_KEGG_species.tsv", sep='\t', index=False)
+        ncbi_ver_spec_ko_specpath = pd.merge(ncbi_ver_spec_ko, spec_pathway, on='KEGG_genes_ID', how='inner')
+        ncbi_ver_spec_ko_specpath_specpathname = pd.merge(ncbi_ver_spec_ko_specpath, list_pathway_spec, on=f"KEGG_{species}_pathway", how='left')
+        ncbi_ver_spec_ko_specpath_specpathname.drop(['Input_protein_ID', 'KEGG_genes_ID'], axis=1, inplace=True)
+        ncbi_ver_spec_ko_specpath_specpathname.rename(columns={'Input_protein_ID_version': 'Input_protein_ID'}, inplace=True)
+        ncbi_ver_spec_ko_specpath_specpathname = ncbi_ver_spec_ko_specpath_specpathname.drop_duplicates()
+        ncbi_ver_spec_ko_specpath_specpathname.to_csv(f"{outdir}/{outbase}_KEGG_species.tsv", sep='\t', index=False)
 #ADD FLYBASE AND REACTOME ANNOTATIONS WHEN DME IS THE SPECIFIED SPECIES
     if flybase == "FB" and species == "dme":
     #READ INTO DATAFRAMES
@@ -156,24 +157,25 @@
         print("You have not requested Flybase annotations.")
 elif kofam == "yes" and species == "NA":
 #READ API TABLES INTO PANDAS DATAFRAMES
-    ncbi_ver = pd.read_table(f"{indir}/ncbiver.tsv", dtype=str)
-    ncbi_ko = pd.read_table(f"{indir}/ko_ncbi.tsv", dtype=str)
-    ko_pathway = pd.read_table(f"{indir}/link_ko_pathway.tsv", dtype=str)
-    pathway = pd.read_table(f"{indir}/list_pathway.tsv", dtype=str)
+    if os.path.exists(f"{indir}/ko_ncbi.tsv") and os.path.getsize(f"{indir}/ko_ncbi.tsv") > 0:
+        ncbi_ver = pd.read_table(f"{indir}/ncbiver.tsv", dtype=str)
+        ncbi_ko = pd.read_table(f"{indir}/ko_ncbi.tsv", dtype=str)
+        ko_pathway = pd.read_table(f"{indir}/link_ko_pathway.tsv", dtype=str)
+        pathway = pd.read_table(f"{indir}/list_pathway.tsv", dtype=str)
 #ADD HEADERS TO DATAFRAME COLUMNS
-    ncbi_ver.columns = ['Input_protein_ID_version', 'Input_protein_ID']
-    ncbi_ko.columns = ['KEGG_KO', 'Input_protein_ID']
-    ko_pathway.columns = ['KEGG_ref_pathway', 'KEGG_KO']
-    pathway.columns = ['KEGG_ref_pathway', 'KEGG_ref_pathway_name']
+        ncbi_ver.columns = ['Input_protein_ID_version', 'Input_protein_ID']
+        ncbi_ko.columns = ['KEGG_KO', 'Input_protein_ID']
+        ko_pathway.columns = ['KEGG_ref_pathway', 'KEGG_KO']
+        pathway.columns = ['KEGG_ref_pathway', 'KEGG_ref_pathway_name']
 #MERGE DATAFRAMES INTO ONE FOR REFERENCE PATHWAYS
-    ncbi_ver_ko = pd.merge(ncbi_ver, ncbi_ko, on='Input_protein_ID', how='inner')
-    ncbi_ver_ko_pathway = pd.merge(ncbi_ver_ko, ko_pathway, on='KEGG_KO', how='inner')
-    ncbi_ver_ko_pathway_pathname = pd.merge(ncbi_ver_ko_pathway, pathway, on='KEGG_ref_pathway', how='left')
-    ncbi_ver_ko_pathway_pathname = ncbi_ver_ko_pathway_pathname[["Input_protein_ID_version","Input_protein_ID","KEGG_KO","KEGG_ref_pathway","KEGG_ref_pathway_name"]]
-    ncbi_ver_ko_pathway_pathname.drop('Input_protein_ID', axis=1, inplace=True)
-    ncbi_ver_ko_pathway_pathname.rename(columns={"Input_protein_ID_version": "Input_protein_ID"}, inplace=True)
-    ncbi_ver_ko_pathway_pathname = ncbi_ver_ko_pathway_pathname.drop_duplicates()
-    ncbi_ver_ko_pathway_pathname.to_csv(f"{outdir}/{outbase}_KEGG_ref.tsv", sep='\t', index=False)
+        ncbi_ver_ko = pd.merge(ncbi_ver, ncbi_ko, on='Input_protein_ID', how='inner')
+        ncbi_ver_ko_pathway = pd.merge(ncbi_ver_ko, ko_pathway, on='KEGG_KO', how='inner')
+        ncbi_ver_ko_pathway_pathname = pd.merge(ncbi_ver_ko_pathway, pathway, on='KEGG_ref_pathway', how='left')
+        ncbi_ver_ko_pathway_pathname = ncbi_ver_ko_pathway_pathname[["Input_protein_ID_version","Input_protein_ID","KEGG_KO","KEGG_ref_pathway","KEGG_ref_pathway_name"]]
+        ncbi_ver_ko_pathway_pathname.drop('Input_protein_ID', axis=1, inplace=True)
+        ncbi_ver_ko_pathway_pathname.rename(columns={"Input_protein_ID_version": "Input_protein_ID"}, inplace=True)
+        ncbi_ver_ko_pathway_pathname = ncbi_ver_ko_pathway_pathname.drop_duplicates()
+        ncbi_ver_ko_pathway_pathname.to_csv(f"{outdir}/{outbase}_KEGG_ref.tsv", sep='\t', index=False)
     if flybase == "FB":
     #READ INTO DATAFRAMES
         fbgn_CG = pd.read_table(f"{indir}/Fbgn_CG.tsv", dtype=str)
@@ -230,39 +232,40 @@
         print("You have not requested Flybase annotations.")
 elif kofam == "yes" and species != "NA":
 #READ API TABLES INTO PANDAS DATAFRAMES
-    ncbi_ver = pd.read_table(f"{indir}/ncbiver.tsv", dtype=str)
-    ncbi_ko = pd.read_table(f"{indir}/ko_ncbi.tsv", dtype=str)
-    ko_pathway = pd.read_table(f"{indir}/link_ko_pathway.tsv", dtype=str)
-    pathway = pd.read_table(f"{indir}/list_pathway.tsv", dtype=str)
-    spec_ko = pd.read_table(f"{indir}/link_{species}_ko.tsv", dtype=str)
-    spec_pathway = pd.read_table(f"{indir}/link_pathway_{species}.tsv", dtype=str)
-    list_pathway_spec = pd.read_table(f"{indir}/list_pathway_{species}.tsv", dtype=str)
+    if os.path.exists(f"{indir}/ko_ncbi.tsv") and os.path.getsize(f"{indir}/ko_ncbi.tsv") > 0:
+        ncbi_ver = pd.read_table(f"{indir}/ncbiver.tsv", dtype=str)
+        ncbi_ko = pd.read_table(f"{indir}/ko_ncbi.tsv", dtype=str)
+        ko_pathway = pd.read_table(f"{indir}/link_ko_pathway.tsv", dtype=str)
+        pathway = pd.read_table(f"{indir}/list_pathway.tsv", dtype=str)
+        spec_ko = pd.read_table(f"{indir}/link_{species}_ko.tsv", dtype=str)
+        spec_pathway = pd.read_table(f"{indir}/link_pathway_{species}.tsv", dtype=str)
+        list_pathway_spec = pd.read_table(f"{indir}/list_pathway_{species}.tsv", dtype=str)
 #ADD HEADERS TO DATAFRAME COLUMNS
-    ncbi_ver.columns = ['Input_protein_ID_version', 'Input_protein_ID']
-    ncbi_ko.columns = ['KEGG_KO', 'Input_protein_ID']
-    spec_ko.columns = ['KEGG_KO', 'KEGG_genes_ID']
-    ko_pathway.columns = ['KEGG_ref_pathway', 'KEGG_KO']
-    pathway.columns = ['KEGG_ref_pathway', 'KEGG_ref_pathway_name']
-    spec_pathway.columns = ['KEGG_genes_ID', f"KEGG_{species}_pathway"]
-    list_pathway_spec.columns = [f"KEGG_{species}_pathway", f"KEGG_{species}_pathway_name"]
+        ncbi_ver.columns = ['Input_protein_ID_version', 'Input_protein_ID']
+        ncbi_ko.columns = ['KEGG_KO', 'Input_protein_ID']
+        spec_ko.columns = ['KEGG_KO', 'KEGG_genes_ID']
+        ko_pathway.columns = ['KEGG_ref_pathway', 'KEGG_KO']
+        pathway.columns = ['KEGG_ref_pathway', 'KEGG_ref_pathway_name']
+        spec_pathway.columns = ['KEGG_genes_ID', f"KEGG_{species}_pathway"]
+        list_pathway_spec.columns = [f"KEGG_{species}_pathway", f"KEGG_{species}_pathway_name"]
 #MERGE DATAFRAMES INTO ONE FOR REFERENCE PATHWAYS
-    ncbi_ver_ko = pd.merge(ncbi_ver, ncbi_ko, on='Input_protein_ID', how='inner')
-    ncbi_ver_ko_pathway = pd.merge(ncbi_ver_ko, ko_pathway, on='KEGG_KO', how='inner')
-    ncbi_ver_ko_pathway_pathname = pd.merge(ncbi_ver_ko_pathway, pathway, on='KEGG_ref_pathway', how='left')
-    ncbi_ver_ko_pathway_pathname = ncbi_ver_ko_pathway_pathname[["Input_protein_ID_version","Input_protein_ID","KEGG_KO","KEGG_ref_pathway","KEGG_ref_pathway_name"]]
-    ncbi_ver_ko_pathway_pathname.drop('Input_protein_ID', axis=1, inplace=True)
-    ncbi_ver_ko_pathway_pathname.rename(columns={"Input_protein_ID_version": "Input_protein_ID"}, inplace=True)
-    ncbi_ver_ko_pathway_pathname = ncbi_ver_ko_pathway_pathname.drop_duplicates()
-    ncbi_ver_ko_pathway_pathname.to_csv(f"{outdir}/{outbase}_KEGG_ref.tsv", sep='\t', index=False)
+        ncbi_ver_ko = pd.merge(ncbi_ver, ncbi_ko, on='Input_protein_ID', how='inner')
+        ncbi_ver_ko_pathway = pd.merge(ncbi_ver_ko, ko_pathway, on='KEGG_KO', how='inner')
+        ncbi_ver_ko_pathway_pathname = pd.merge(ncbi_ver_ko_pathway, pathway, on='KEGG_ref_pathway', how='left')
+        ncbi_ver_ko_pathway_pathname = ncbi_ver_ko_pathway_pathname[["Input_protein_ID_version","Input_protein_ID","KEGG_KO","KEGG_ref_pathway","KEGG_ref_pathway_name"]]
+        ncbi_ver_ko_pathway_pathname.drop('Input_protein_ID', axis=1, inplace=True)
+        ncbi_ver_ko_pathway_pathname.rename(columns={"Input_protein_ID_version": "Input_protein_ID"}, inplace=True)
+        ncbi_ver_ko_pathway_pathname = ncbi_ver_ko_pathway_pathname.drop_duplicates()
+        ncbi_ver_ko_pathway_pathname.to_csv(f"{outdir}/{outbase}_KEGG_ref.tsv", sep='\t', index=False)
 #MERGE DATAFRAMES INTO ONE FOR { species } PATHWAYS
-    ncbi_ver_spec_ko = pd.merge(ncbi_ver_ko, spec_ko, on='KEGG_KO', how='inner')
-    ncbi_ver_spec_ko_specpath = pd.merge(ncbi_ver_spec_ko, spec_pathway, on='KEGG_genes_ID', how='inner')
-    ncbi_ver_spec_ko_specpath_specpathname = pd.merge(ncbi_ver_spec_ko_specpath, list_pathway_spec, on=f"KEGG_{species}_pathway", how='left')
-    ncbi_ver_spec_ko_specpath_specpathname = ncbi_ver_spec_ko_specpath_specpathname[["KEGG_genes_ID","Input_protein_ID_version","Input_protein_ID","KEGG_KO",f"KEGG_{species}_pathway",f"KEGG_{species}_pathway_name"]]
-    ncbi_ver_spec_ko_specpath_specpathname.drop(['Input_protein_ID', 'KEGG_genes_ID'], axis=1, inplace=True)
-    ncbi_ver_spec_ko_specpath_specpathname.rename(columns={'Input_protein_ID_version': 'Input_protein_ID'}, inplace=True)
-    ncbi_ver_spec_ko_specpath_specpathname = ncbi_ver_spec_ko_specpath_specpathname.drop_duplicates()
-    ncbi_ver_spec_ko_specpath_specpathname.to_csv(f"{outdir}/{outbase}_KEGG_species.tsv", sep='\t', index=False)
+        ncbi_ver_spec_ko = pd.merge(ncbi_ver_ko, spec_ko, on='KEGG_KO', how='inner')
+        ncbi_ver_spec_ko_specpath = pd.merge(ncbi_ver_spec_ko, spec_pathway, on='KEGG_genes_ID', how='inner')
+        ncbi_ver_spec_ko_specpath_specpathname = pd.merge(ncbi_ver_spec_ko_specpath, list_pathway_spec, on=f"KEGG_{species}_pathway", how='left')
+        ncbi_ver_spec_ko_specpath_specpathname = ncbi_ver_spec_ko_specpath_specpathname[["KEGG_genes_ID","Input_protein_ID_version","Input_protein_ID","KEGG_KO",f"KEGG_{species}_pathway",f"KEGG_{species}_pathway_name"]]
+        ncbi_ver_spec_ko_specpath_specpathname.drop(['Input_protein_ID', 'KEGG_genes_ID'], axis=1, inplace=True)
+        ncbi_ver_spec_ko_specpath_specpathname.rename(columns={'Input_protein_ID_version': 'Input_protein_ID'}, inplace=True)
+        ncbi_ver_spec_ko_specpath_specpathname = ncbi_ver_spec_ko_specpath_specpathname.drop_duplicates()
+        ncbi_ver_spec_ko_specpath_specpathname.to_csv(f"{outdir}/{outbase}_KEGG_species.tsv", sep='\t', index=False)
 #ADD FLYBASE AND REACTOME ANNOTATIONS
     if flybase == "FB" and species == "dme":
     #READ INTO DATAFRAMES
diff --git a/pipeline/pathannot_to_gmt.py b/pipeline/pathannot_to_gmt.py
index 3073004..fc16d83 100644
--- a/pipeline/pathannot_to_gmt.py
+++ b/pipeline/pathannot_to_gmt.py
@@ -158,114 +158,30 @@
     justlist = alltogether['Input_protein_ID'].apply(pd.Series)
     alltogether = pd.concat([alltogether.drop('Input_protein_ID', axis=1), justlist], axis=1)
     alltogether.to_csv(f"{outdir}/{outbase}_all_pathways.gmt", sep='\t', header=False, index=False)
-else:
-    print ("Cannot find the proper combination of output files.")
-
-#REMOVE ANY TRAILING TABS FROM EMPTY DF FIELDS
-gmtfile=f"{outdir}/{outbase}_all_pathways.gmt"
-notab=f"{outdir}/gmt.tmp"
-
-with open(gmtfile, 'r') as infile, open(notab, 'w') as outfile:
-    for line in infile:
-        cleaned_line = re.sub(r'\t+\n', '\n', line)
-        outfile.write(cleaned_line)
-
-os.rename(notab, gmtfile)
-
-#############################################################################################################
-'''
-#FIND THE OUTPUT FILES TO COMBINE
-keggref = f"{pathannotator}/*KEGG_ref.tsv"
-keggreffile = glob.glob(keggref)
-keggreffile = str(keggreffile[0])
-#READ TABLES INTO PANDAS DATAFRAMES AND ADD HEADERS TO IPRS
-kr = pd.read_table(f"{keggreffile}", dtype=str)
-kr.columns = ['Input_protein_ID', 'KEGG_KO', 'KEGG_ref_pathway', 'KEGG_ref_pathway_name']
-#DROP UNWANTED COLUMNS (KEEP PROTEIN ACCESSIONS (1), INTERPRO ANNOTATIONS (12) AND PATHWAY ANNOTATIONS (15)
-kr = kr.drop(columns=['KEGG_KO', 'KEGG_ref_pathway_name'])
-#MAKE HEADERS MATCH FOR ALL DFS
-kr.columns = ['Input_protein_ID', 'Pathway_or_domain']
-#ADD SECOND COLUMN TO EACH DF WITH 'OPTIONAL DESCRIPTION' FOR GMT FORMAT
-kr.insert(loc=1, column='Description', value='KEGG_reference_pathway')
-#REMOVE DUPLICATE ROWS
-kr = kr.drop_duplicates()
-
-if os.path.exists(f"{pathannotator}/*KEGG_species.tsv"):
-#keggspec = f"{pathannotator}/*KEGG_species.tsv"
-    keggspecfile = glob.glob(keggspec)
-    keggspecfile = str(keggspecfile[0])
-    #READ TABLES INTO PANDAS DATAFRAMES AND ADD HEADERS TO IPRS
-    ks = pd.read_table(f"{keggspecfile}", dtype=str)
-    ks.columns = ['Input_protein_ID', 'KEGG_KO', 'KEGG_species_pathway', 'KEGG_species_pathway_name']
-    #DROP UNWANTED COLUMNS (KEEP PROTEIN ACCESSIONS (1), INTERPRO ANNOTATIONS (12) AND PATHWAY ANNOTATIONS (15)
-    ks = ks.drop(columns=['KEGG_KO', 'KEGG_species_pathway_name'])
-    #MAKE HEADERS MATCH FOR ALL DFS
-    kr.columns = ['Input_protein_ID', 'Pathway_or_domain']
-    #ADD SECOND COLUMN TO EACH DF WITH 'OPTIONAL DESCRIPTION' FOR GMT FORMAT
-    ks.insert(loc=1, column='Description', value='KEGG_species_pathway')
-    #REMOVE DUPLICATE ROWS
-    ks = ks.drop_duplicates()
-
-if os.path.exists(f"{pathannotator}/*flybase.tsv"):
-#flybase = f"{pathannotator}/*flybase.tsv"
-    flybasefile = glob.glob(flybasetsv)
+elif keggreffile == [] and keggspecfile == [] and flybasefile != [] and reactomefile != []:
+    print ("FB, RT outputs are present")
+    #FIND OUTPUT FILES BASED ON PATTERN MATCH
     flybasefile = str(flybasefile[0])
+    reactomefile = str(reactomefile[0])
     #READ TABLES INTO PANDAS DATAFRAMES AND ADD HEADERS TO IPRS
     fb = pd.read_table(f"{flybasefile}", dtype=str)
     fb.columns = ['Input_protein_ID', 'KEGG_KO', 'Flybase_pathway_ID', 'Flybase_pathway_name']
-    #DROP UNWANTED COLUMNS (KEEP PROTEIN ACCESSIONS (1), INTERPRO ANNOTATIONS (12) AND PATHWAY ANNOTATIONS (15)
-    fb = fb.drop(columns=['KEGG_KO', 'Flybase_pathway_name'])
-    #MAKE HEADERS MATCH FOR ALL DFS
-    fb.columns = ['Input_protein_ID', 'Pathway_or_domain']
-    #ADD SECOND COLUMN TO EACH DF WITH 'OPTIONAL DESCRIPTION' FOR GMT FORMAT
-    fb.insert(loc=1, column='Description', value='FlyBase_pathway')
-    #REMOVE DUPLICATE ROWS
-    fb = fb.drop_duplicates()
-
-if os.path.exists(f"{pathannotator}/*reactome.tsv"):
-#reactome = f"{pathannotator}/*reactome.tsv"
-    reactomefile = glob.glob(reactometsv)
-    reactomefile = str(reactomefile[0])
-    #READ TABLES INTO PANDAS DATAFRAMES AND ADD HEADERS TO IPRS
     rt = pd.read_table(f"{reactomefile}", dtype=str)
     rt.columns = ['Input_protein_ID', 'UniProt_ID', 'Reactome_pathway_ID', 'Reactome_pathway_name']
     #DROP UNWANTED COLUMNS (KEEP PROTEIN ACCESSIONS (1), INTERPRO ANNOTATIONS (12) AND PATHWAY ANNOTATIONS (15)
+    fb = fb.drop(columns=['KEGG_KO', 'Flybase_pathway_name'])
     rt = rt.drop(columns=['UniProt_ID', 'Reactome_pathway_name'])
     #MAKE HEADERS MATCH FOR ALL DFS
+    fb.columns = ['Input_protein_ID', 'Pathway_or_domain']
     rt.columns = ['Input_protein_ID', 'Pathway_or_domain']
     #ADD SECOND COLUMN TO EACH DF WITH 'OPTIONAL DESCRIPTION' FOR GMT FORMAT
+    fb.insert(loc=1, column='Description', value='FlyBase_pathway')
     rt.insert(loc=1, column='Description', value='Reactome_pathway')
     #REMOVE DUPLICATE ROWS
+    fb = fb.drop_duplicates()
     rt = rt.drop_duplicates()
-
-#if 'keggreffile' in locals() and 'keggspecfile' in locals() and 'flybasefile' in locals() and 'reactomefile' in locals():
-if not kr.empty and not ks.empty and not fb.empty and not rt.empty:
-    print ("KR, KS, FB, RT outputs are all present")
-    alltogether = pd.concat([kr, ks, fb, rt])
-    alltogether = alltogether.groupby(['Pathway_or_domain', 'Description'])['Input_protein_ID'].agg(list).reset_index()
-    justlist = alltogether['Input_protein_ID'].apply(pd.Series)
-    alltogether = pd.concat([alltogether.drop('Input_protein_ID', axis=1), justlist], axis=1)
-    alltogether.to_csv(f"{outdir}/{outbase}_all_pathways.gmt", sep='\t', header=False, index=False)
-#elif 'keggreffile' in locals() and 'keggspecfile' in locals() and 'flybasefile' not in locals() and 'reactomefile' not in locals():
-elif not kr.empty and not ks.empty and fb.empty and rt.empty:
-    print ("KR and KS outputs are present")
-    alltogether = pd.concat([kr, ks])
-    alltogether = alltogether.groupby(['Pathway_or_domain', 'Description'])['Input_protein_ID'].agg(list).reset_index()
-    justlist = alltogether['Input_protein_ID'].apply(pd.Series)
-    alltogether = pd.concat([alltogether.drop('Input_protein_ID', axis=1), justlist], axis=1)
-    alltogether.to_csv(f"{outdir}/{outbase}_all_pathways.gmt", sep='\t', header=False, index=False)
-#elif 'keggreffile' in locals() and 'keggspecfile' not in locals() and 'flybasefile' in locals() and 'reactomefile' in locals():
-elif not kr.empty and ks.empty and not rb.empty and not rt.empty:
-    print ("KR, FB and RT outputs are present")
-    alltogether = pd.concat([kr, fb, rt])
-    alltogether = alltogether.groupby(['Pathway_or_domain', 'Description'])['Input_protein_ID'].agg(list).reset_index()
-    justlist = alltogether['Input_protein_ID'].apply(pd.Series)
-    alltogether = pd.concat([alltogether.drop('Input_protein_ID', axis=1), justlist], axis=1)
-    alltogether.to_csv(f"{outdir}/{outbase}_all_pathways.gmt", sep='\t', header=False, index=False)
-#elif 'keggreffile' in locals() and 'keggspecfile' not in locals() and 'flybasefile' not in locals() and 'reactomefile' not in locals():
-elif not kr.empty and ks.empty  and fb.empty and rt.empty:
-    print ("only KR output is present")
-    alltogether = kr
+    #BRING ALL DATA TOGETHER IN GMT FORMAT
+    alltogether = pd.concat([fb, rt])
     alltogether = alltogether.groupby(['Pathway_or_domain', 'Description'])['Input_protein_ID'].agg(list).reset_index()
     justlist = alltogether['Input_protein_ID'].apply(pd.Series)
     alltogether = pd.concat([alltogether.drop('Input_protein_ID', axis=1), justlist], axis=1)
@@ -283,4 +199,3 @@
         outfile.write(cleaned_line)
 
 os.rename(notab, gmtfile)
-'''
diff --git a/pipeline/pathannotator.sh b/pipeline/pathannotator.sh
index 9a8c067..e6c2587 100755
--- a/pipeline/pathannotator.sh
+++ b/pipeline/pathannotator.sh
@@ -1,6 +1,10 @@
 #! /bin/bash
 
 #CHECK FOR OUTDIR. IF IT DOESN'T EXIST CREATE IT
+if [ -z "$outdir" ]; then outdir=="."; fi
+if [ ! -d "$outdir" ]; then mkdir -p "$outdir"; fi
+
+
 if [ -f "$outdir"/link_ko_pathway.tsv ]; then rm "$outdir"/link_ko_pathway.tsv; fi
 if [ -f "$outdir"/list_pathway.tsv ]; then rm "$outdir"/list_pathway.tsv; fi
 if [ -f "$outdir"/conv_ncbi-proteinid_"$keggcode".tsv ]; then rm "$outdir"/conv_ncbi-proteinid_"$keggcode".tsv; fi
@@ -21,7 +25,7 @@ if [ -n "$(ls $outdir/fbgn_annotation_ID_fb* 2>/dev/null)" ]; then rm $outdir/fb
 if [ -n "$(ls $outdir/dmel-all-translation*.fasta* 2>/dev/null)" ]; then rm $outdir/dmel-all-translation*.fasta*; fi
 if [ -n "$(ls $outdir/fbgn_fbtr_fbpp_fb* 2>/dev/null)" ]; then rm $outdir/fbgn_fbtr_fbpp_fb*; fi
 if [ -f "$outdir"/Fbgn_fbpp.tsv ]; then rm "$outdir"/Fbgn_fbpp.tsv; fi
-if [ -d "$outdir"/tmp ]; then rm -r "$outdir"/tmp; fi
+#if [ -d "$outdir"/tmp ]; then rm -r "$outdir"/tmp; fi
 if [ -f "$outdir"/tmp.txt ]; then rm  "$outdir"/tmp.txt; fi
 if [ -f "$outdir"/ncbiversion.tmp ]; then rm "$outdir"/ncbiversion.tmp; fi
 if [ -f "$outdir"/ncbiver.tsv ]; then rm "$outdir"/ncbiver.tsv; fi
@@ -87,10 +91,8 @@ fi
 #######################################################################################################
 #SET DEFAULTS IF OPTIONS NOT PROVIDED
 if [ -z "${flybase}" ]; then $flybase == 'NA'; fi
-if [ -z "${outdir}" ]; then $outdir == '.'; fi
 if [ -z "${keggcode}" ]; then $keggcode == 'NA'; fi
 
-if [ ! -d "$outdir" ]; then mkdir -p "$outdir"; fi
 
 #GETTING NUMBER OF AVAILABLE PROCESSORS FOR USE IN THREADING
 avail=$(getconf _NPROCESSORS_ONLN)
@@ -222,8 +224,14 @@ then
 			#FILTER KOFAM HERE
 			echo "Filtering KofamScan results"
 			grep -P "^\*" $outdir/kofam_result_full.txt >> $outdir/kofam_filtered_asterisk.txt
-	        	awk '{ print $3"\t"$2 }' $outdir/kofam_filtered_asterisk.txt > $outdir/ko_ncbi.tsv
-	        	sed -i 's/\..*$//' $outdir/ko_ncbi.tsv
+				if [ -s $outdir/kofam_filtered_asterisk.txt ]
+				then
+					echo "Filtered KofamScan results NOT empty. Proceeding with KEGG annotation."
+		        		awk '{ print $3"\t"$2 }' $outdir/kofam_filtered_asterisk.txt > $outdir/ko_ncbi.tsv
+		        		sed -i 's/\..*$//' $outdir/ko_ncbi.tsv
+				else
+					echo "Filtered KofamScan results EMPTY. Moving on to FlyBase and Reactome annotation."
+				fi
 
 			#IF FB AND NOT 'DME' RUN ORTHOFINDER AND PROCEED TO MERGE (INCLUDING FLYBASE)
 			if [ "$keggcode" != "dme" ] && [ "$flybase" == "FB" ];
@@ -307,8 +315,14 @@ then
 		#FILTER KOFAM HERE
 		echo "Filtering KofamScan results"
 		grep -P "^\*" $outdir/kofam_result_full.txt >> $outdir/kofam_filtered_asterisk.txt
-	        awk '{ print $3"\t"$2 }' $outdir/kofam_filtered_asterisk.txt > $outdir/ko_ncbi.tsv
-	        sed -i 's/\..*$//' $outdir/ko_ncbi.tsv
+			if [ -s $outdir/kofam_filtered_asterisk.txt ]
+			then
+				echo "Filtered KofamScan results NOT empty. Proceeding with KEGG annotation."
+		        	awk '{ print $3"\t"$2 }' $outdir/kofam_filtered_asterisk.txt > $outdir/ko_ncbi.tsv
+		        	sed -i 's/\..*$//' $outdir/ko_ncbi.tsv
+			else
+				echo "Filtered KofamScan results EMPTY. Moving on to FlyBase and Reactome annotation."
+			fi
 
 		#IF FB AND NOT 'DME' RUN ORTHOFINDER AND PROCEED TO MERGE (INCLUDING FLYBASE)
 		if [ "$keggcode" != "dme" ] && [ "$flybase" == "FB" ];
@@ -405,8 +419,14 @@ else #ELSE MEANS THESE ARE NOT NCBI PROTEIN IDS.
 		#FILTER KOFAM HERE
 		echo "Filtering KofamScan results"
 		grep -P "^\*" $outdir/kofam_result_full.txt >> $outdir/kofam_filtered_asterisk.txt
-	        awk '{ print $3"\t"$2 }' $outdir/kofam_filtered_asterisk.txt > $outdir/ko_ncbi.tsv
-	        sed -i 's/\..*$//' $outdir/ko_ncbi.tsv
+			if [ -s $outdir/kofam_filtered_asterisk.txt ]
+			then
+				echo "Filtered KofamScan results NOT empty. Proceeding with KEGG annotation."
+		        	awk '{ print $3"\t"$2 }' $outdir/kofam_filtered_asterisk.txt > $outdir/ko_ncbi.tsv
+		        	sed -i 's/\..*$//' $outdir/ko_ncbi.tsv
+			else
+				echo "Filtered KofamScan results EMPTY. Moving on to FlyBase and Reactome annotation."
+			fi
 
 		#IF FB RUN ORTHOFINDER AND PROCEED TO MERGE (INCLUDING FLYBASE)
 		if [ "$flybase" == FB ];
@@ -488,8 +508,14 @@ else #ELSE MEANS THESE ARE NOT NCBI PROTEIN IDS.
 		#FILTER KOFAM HERE
 		echo "Filtering KofamScan results"
 		grep -P "^\*" $outdir/kofam_result_full.txt >> $outdir/kofam_filtered_asterisk.txt
-	        awk '{ print $3"\t"$2 }' $outdir/kofam_filtered_asterisk.txt > $outdir/ko_ncbi.tsv
-	        sed -i 's/\..*$//' $outdir/ko_ncbi.tsv
+			if [ -s $outdir/kofam_filtered_asterisk.txt ]
+			then
+				echo "Filtered KofamScan results NOT empty. Proceeding with KEGG annotation."
+		        	awk '{ print $3"\t"$2 }' $outdir/kofam_filtered_asterisk.txt > $outdir/ko_ncbi.tsv
+		        	sed -i 's/\..*$//' $outdir/ko_ncbi.tsv
+			else
+				echo "Filtered KofamScan results EMPTY. Moving on to FlyBase and Reactome annotation."
+			fi
 
 		#IF FB RUN ORTHOFINDER AND PROCEED TO MERGE (INCLUDING FLYBASE)
 		if [ "$flybase" == FB ];