Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions benchmarks/fplx_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
'integrin alpha': {'FPLX': 'ITGA'},
'DC': {'MESH': 'D003713'},
'BMD': {'MESH': 'D015519'}}
'PTPMeg2': {'HGNC': '9661'},
'alpha4': {'HGNC': '5461'}}


incorrect_assertions = {'IGF': {'HGNC': '5464'},
Expand Down
2 changes: 1 addition & 1 deletion gilda/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.11.1'
__version__ = '0.12.0'

import logging

Expand Down
32 changes: 31 additions & 1 deletion gilda/generate_terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def generate_chebi_terms():
row['COMPOUND_ID'])
continue
db = 'CHEBI'
name = str(row['NAME'])
name = str(row['NAME']).strip()
chebi_name = \
chebi_client.get_chebi_name_from_id(chebi_id, offline=True)
if chebi_name is None:
Expand Down Expand Up @@ -593,6 +593,8 @@ def terms_from_obo_json_entry(entry, prefix, ignore_mappings=False,
if doid_name:
db, db_id, name = 'DOID', doid, doid_name

name = name.strip()

# Add a term for the name first
name_term = Term(
norm_text=normalize(name),
Expand Down Expand Up @@ -624,6 +626,8 @@ def terms_from_obo_json_entry(entry, prefix, ignore_mappings=False,
if match:
synonym = match.groups()[0]

synonym = synonym.strip()

synonym_term = Term(
norm_text=normalize(synonym),
text=synonym,
Expand Down Expand Up @@ -653,6 +657,31 @@ def _generate_obo_terms(prefix, ignore_mappings=False, map_to_ns=None):
return terms


def generate_entrez_terms():
import pandas as pd
df = pd.read_csv('https://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/'
'Homo_sapiens.gene_info.gz', sep='\t',
keep_default_na=False, na_values=['_'])
terms = []
for _, row in df.iterrows():
entrez_id = str(row['GeneID'])
hgnc_id = hgnc_client.get_hgnc_from_entrez(entrez_id)
if not hgnc_id:
continue
hgnc_symbol = hgnc_client.get_hgnc_name(hgnc_id)
synonyms = row['Synonyms'].split('|') if row['Synonyms'] != '-' else []
other_designations = row['Other_designations'].split('|') \
if row['Other_designations'] != '-' else []
for syn in synonyms + other_designations:
if syn.startswith('(') and syn.endswith(')'):
continue
terms.append(
Term(normalize(syn), syn, 'HGNC', hgnc_id, hgnc_symbol,
'synonym', 'entrez', '9606', 'EGID', entrez_id)
)
return terms


def _make_mesh_mappings():
# Load MeSH ID/label mappings
from .resources import MESH_MAPPINGS_PATH
Expand Down Expand Up @@ -689,6 +718,7 @@ def get_all_terms():
generate_uniprot_terms(),
generate_famplex_terms(),
generate_hgnc_terms(),
generate_entrez_terms(),
generate_chebi_terms(),
generate_go_terms(),
generate_mesh_terms(),
Expand Down
2 changes: 1 addition & 1 deletion gilda/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class Term(object):

def __init__(self, norm_text, text, db, id, entry_name, status, source,
organism=None, source_db=None, source_id=None):
if not text:
if not text or not isinstance(text, str):
raise ValueError('Text for Term cannot be empty')
self.norm_text = norm_text
self.text = text
Expand Down
2 changes: 1 addition & 1 deletion gilda/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_organisms():
assert matches3[0].term.id == 'P63163'
# Here we use SMN again but prioritize human and get three bad groundings
matches4 = ground('SMN', organisms=['9606', '10090'])
assert len(matches4) == 2, matches4
assert len(matches4) == 3, matches4
assert all(m.term.organism == '9606' for m in matches4)
# Finally we try grounding SMN1 with mouse prioritized, don't find a match
# and end up with the human gene grounding
Expand Down
8 changes: 4 additions & 4 deletions gilda/tests/test_grounder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ def test_grounder():
assert entry.id == '6407', entry

scores = gr.ground('kras')
assert len(scores) == 1, scores
assert len(scores) == 2, scores
assert appreq(scores[0].score, 0.9845), scores
scores = gr.ground('k-ras')
assert len(scores) == 1, scores
assert len(scores) == 2, scores
assert appreq(scores[0].score, 0.9936), scores
scores = gr.ground('KRAS')
assert len(scores) == 1, scores
assert len(scores) == 2, scores
assert appreq(scores[0].score, 1.0), scores
scores = gr.ground('bRaf')
assert len(scores) == 1, scores
Expand All @@ -38,7 +38,7 @@ def test_grounder_num_entries():
entries = gr.lookup('NPM1')
assert len(entries) == 4, entries
entries = gr.lookup('H4')
assert len(entries) == 7, entries
assert len(entries) == 9, entries


def test_grounder_depluralize():
Expand Down