Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pypfb"
version = "0.5.25"
version = "0.5.26"
description = "Python SDK for PFB format"
authors = ["CTDS UChicago <[email protected]>"]
license = "Apache-2.0"
Expand Down
2 changes: 1 addition & 1 deletion src/pfb/etl/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ async def load_to_es(self):
i = 0
for row in self.spanning_tree_rows:
submission_json = {}
for (node_id, node_name) in row:
for node_id, node_name in row:
submission_json[node_name] = node_id
await self.helper.insert_document("spanning_tree_index", submission_json, i)
i += 1
45 changes: 36 additions & 9 deletions src/pfb/exporters/tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@

from ..cli import to_command

PLURAL_PARENTS = {
"subjects": "subject",
"timings": "timing",
"persons": "person",
"programs": "program",
"projects": "project",
}


@to_command.command("tsv", short_help="Convert PFB to tsv.")
@click.argument("output", default="./tsvs/", type=click.Path(file_okay=False))
Expand Down Expand Up @@ -61,18 +69,26 @@ def _to_tsv(reader, dir_path, handlers_by_name):
relations_by_node[node["name"]][link["dst"]]["id"] = name_id
name_submitter_id = link["name"] + ".submitter_id"
if "submitter_id" not in relations_by_node[node["name"]][link["dst"]]:
relations_by_node[node["name"]][link["dst"]]["submitter_id"] = name_submitter_id

relations_by_node[node["name"]][link["dst"]][
"submitter_id"
] = name_submitter_id

fields_by_name = {node["name"]: node["fields"] for node in reader.schema}

# Add the relation fields to the lis of fields by node
for node_name, links in relations_by_node.items():
for linked_node_name, linked_values in links.items():
for attr_name, attr_value in linked_values.items():
if {'name': attr_value, 'type': ['null', 'string'],} not in fields_by_name[node_name]:
fields_by_name[node_name].append({'name': attr_value, 'type': ['null', 'string'],})

if {
"name": attr_value,
"type": ["null", "string"],
} not in fields_by_name[node_name]:
fields_by_name[node_name].append(
{
"name": attr_value,
"type": ["null", "string"],
}
)

for row in reader:
name = row["name"]
Expand All @@ -92,9 +108,15 @@ def _to_tsv(reader, dir_path, handlers_by_name):
if "submitter_id" in obj:
node_submitter_ids[record_id] = obj["submitter_id"]

if {"name": "id", "type": ["null", "string"],} not in fields:
if {
"name": "id",
"type": ["null", "string"],
} not in fields:
fields.append(
{"name": "id", "type": ["null", "string"],}
{
"name": "id",
"type": ["null", "string"],
}
)

obj["id"] = record_id
Expand All @@ -103,12 +125,17 @@ def _to_tsv(reader, dir_path, handlers_by_name):
parent_node = r["dst_name"]
parent_id = r["dst_id"]

if parent_node in PLURAL_PARENTS:
parent_node = PLURAL_PARENTS[parent_node]

if relations_by_node[name][parent_node]["id"]:
obj[relations_by_node[name][parent_node]["id"]] = r["dst_id"]

if relations_by_node[name][parent_node]["submitter_id"]:
if parent_id in node_submitter_ids:
obj[relations_by_node[name][parent_node]["submitter_id"]] = node_submitter_ids[parent_id]
obj[
relations_by_node[name][parent_node]["submitter_id"]
] = node_submitter_ids[parent_id]
else:
obj[relations_by_node[name][parent_node]["submitter_id"]] = "null"

Expand Down Expand Up @@ -146,7 +173,7 @@ def _to_tsv(reader, dir_path, handlers_by_name):
):
if field["name"] not in obj:
continue
value = obj[field["name"]] if field["name"] in obj else None
value = obj[field["name"]] if field["name"] in obj else None
data_row.append(value)

w.writerow(data_row)
Expand Down
2 changes: 1 addition & 1 deletion src/pfb/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __next__(self):
to_update = {}
for name, value in list(obj.items()):
if value and self.is_encode(rv["name"], name):
if isinstance(value,list):
if isinstance(value, list):
thing = []
for val in value:
thing.append(decode_enum(val))
Expand Down