Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions odml/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ def __init__(self, author=None, date=None, version=None, repository=None, oid=No
self._date = None
self.date = date

# Enable setting of the file name from whence this document came.
# It is for knowing while processing and will not be serialized to a file.
self._origin_file_name = None

def __repr__(self):
return "<Doc %s by %s (%d sections)>" % (self._version, self._author,
len(self._sections))
Expand Down
19 changes: 17 additions & 2 deletions odml/tools/odmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@

import datetime
import json
import sys
import yaml

from os.path import basename

from . import xmlparser
from .dict_parser import DictWriter, DictReader
from ..info import FORMAT_VERSION
Expand All @@ -17,6 +20,11 @@
from .rdf_converter import RDFReader, RDFWriter
from ..validation import Validation

try:
unicode = unicode
except NameError:
unicode = str


class ODMLWriter:
"""
Expand Down Expand Up @@ -58,10 +66,10 @@ def to_string(self, odml_document):
string_doc = ''

if self.parser == 'XML':
string_doc = str(xmlparser.XMLWriter(odml_document))
string_doc = unicode(xmlparser.XMLWriter(odml_document))
elif self.parser == "RDF":
# Use turtle as default output format for now.
string_doc = RDFWriter(odml_document).get_rdf_str("turtle")
string_doc = RDFWriter(odml_document).get_rdf_str("xml")
else:
self.parsed_doc = DictWriter().to_dict(odml_document)

Expand All @@ -74,6 +82,9 @@ def to_string(self, odml_document):
string_doc = json.dumps(odml_output, indent=4,
cls=JSONDateTimeSerializer)

if sys.version_info.major < 3:
string_doc = string_doc.encode("utf-8")

return string_doc


Expand Down Expand Up @@ -122,6 +133,8 @@ def from_file(self, file, doc_format=None):
return

self.doc = DictReader().to_odml(self.parsed_doc)
# Provide original file name via the in memory document
self.doc._origin_file_name = basename(file)
return self.doc

elif self.parser == 'JSON':
Expand All @@ -133,6 +146,8 @@ def from_file(self, file, doc_format=None):
return

self.doc = DictReader().to_odml(self.parsed_doc)
# Provide original file name via the in memory document
self.doc._origin_file_name = basename(file)
return self.doc

elif self.parser == 'RDF':
Expand Down
31 changes: 20 additions & 11 deletions odml/tools/rdf_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def save_element(self, e, node=None):
fmt = e.format()

if not node:
curr_node = URIRef(odmlns + str(e.id))
curr_node = URIRef(odmlns + unicode(e.id))
else:
curr_node = node

Expand All @@ -88,6 +88,11 @@ def save_element(self, e, node=None):
if isinstance(fmt, Document.__class__):
self.g.add((self.hub_root, odmlns.hasDocument, curr_node))

# If available add the documents filename to the document node
# so we can identify where the data came from.
if hasattr(e, "_origin_file_name"):
self.g.add((curr_node, odmlns.hasFileName, Literal(e._origin_file_name)))

for k in fmt.rdf_map_keys:
if k == 'id':
continue
Expand All @@ -101,7 +106,7 @@ def save_element(self, e, node=None):
self.g.add((curr_node, fmt.rdf_map(k), terminology_node))
else:
# adding terminology to the hub and to link with the doc
node = URIRef(odmlns + str(uuid.uuid4()))
node = URIRef(odmlns + unicode(uuid.uuid4()))
self.g.add((node, RDF.type, URIRef(terminology_url)))
self.g.add((self.hub_root, odmlns.hasTerminology, node))
self.g.add((curr_node, fmt.rdf_map(k), node))
Expand All @@ -111,20 +116,20 @@ def save_element(self, e, node=None):
k == 'sections' and len(getattr(e, k)) > 0:
sections = getattr(e, k)
for s in sections:
node = URIRef(odmlns + str(s.id))
node = URIRef(odmlns + unicode(s.id))
self.g.add((curr_node, fmt.rdf_map(k), node))
self.save_element(s, node)
elif isinstance(fmt, Section.__class__) and \
k == 'properties' and len(getattr(e, k)) > 0:
properties = getattr(e, k)
for p in properties:
node = URIRef(odmlns + str(p.id))
node = URIRef(odmlns + unicode(p.id))
self.g.add((curr_node, fmt.rdf_map(k), node))
self.save_element(p, node)
elif isinstance(fmt, Property.__class__) and \
k == 'value' and len(getattr(e, k)) > 0:
values = getattr(e, k)
seq = URIRef(odmlns + str(uuid.uuid4()))
seq = URIRef(odmlns + unicode(uuid.uuid4()))
self.g.add((seq, RDF.type, RDF.Seq))
self.g.add((curr_node, fmt.rdf_map(k), seq))
# rdflib so far does not respect RDF:li item order
Expand All @@ -133,15 +138,15 @@ def save_element(self, e, node=None):
# this should be reversed to RDF:li again!
# see https://github.com/RDFLib/rdflib/issues/280
# -- keep until supported
# bag = URIRef(odmlns + str(uuid.uuid4()))
# bag = URIRef(odmlns + unicode(uuid.uuid4()))
# self.g.add((bag, RDF.type, RDF.Bag))
# self.g.add((curr_node, fmt.rdf_map(k), bag))
# for v in values:
# self.g.add((bag, RDF.li, Literal(v)))

counter = 1
for v in values:
pred = "%s_%s" % (str(RDF), counter)
pred = "%s_%s" % (unicode(RDF), counter)
self.g.add((seq, URIRef(pred), Literal(v)))
counter = counter + 1

Expand Down Expand Up @@ -222,7 +227,11 @@ def to_odml(self):

def from_file(self, filename, doc_format):
self.g = Graph().parse(source=filename, format=doc_format)
return self.to_odml()
docs = self.to_odml()
for d in docs:
# Provide original file name via the document
d._origin_file_name = os.path.basename(filename)
return docs

def from_string(self, file, doc_format):
self.g = Graph().parse(source=StringIO(file), format=doc_format)
Expand All @@ -242,7 +251,7 @@ def parse_document(self, doc_uri):
doc_attrs[attr[0]] = doc_uri.split("#", 1)[1]
else:
if len(elems) > 0:
doc_attrs[attr[0]] = str(elems[0].toPython())
doc_attrs[attr[0]] = unicode(elems[0].toPython())

return {'Document': doc_attrs, 'odml-version': FORMAT_VERSION}

Expand All @@ -264,7 +273,7 @@ def parse_section(self, sec_uri):
sec_attrs[attr[0]] = sec_uri.split("#", 1)[1]
else:
if len(elems) > 0:
sec_attrs[attr[0]] = str(elems[0].toPython())
sec_attrs[attr[0]] = unicode(elems[0].toPython())
self._check_mandatory_attrs(sec_attrs)
return sec_attrs

Expand Down Expand Up @@ -293,7 +302,7 @@ def parse_property(self, prop_uri):
prop_attrs[attr[0]] = prop_uri.split("#", 1)[1]
else:
if len(elems) > 0:
prop_attrs[attr[0]] = str(elems[0].toPython())
prop_attrs[attr[0]] = unicode(elems[0].toPython())
self._check_mandatory_attrs(prop_attrs)
return prop_attrs

Expand Down
3 changes: 2 additions & 1 deletion odml/tools/version_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def _parse_xml(self):
doc = doc.replace(elem, val)

# Make sure encoding is present for the xml parser
doc = doc.encode('utf-8')
if sys.version_info.major > 2:
doc = doc.encode('utf-8')

# Make pretty print available by resetting format
parser = ET.XMLParser(remove_blank_text=True)
Expand Down
8 changes: 7 additions & 1 deletion odml/tools/xmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from lxml.builder import E
# this is needed for py2exe to include lxml completely
from lxml import _elementpath as _dummy
from os.path import basename

try:
from StringIO import StringIO
Expand Down Expand Up @@ -187,7 +188,12 @@ def from_file(self, xml_file):
raise ParserException(e.msg)

self._handle_version(root)
return self.parse_element(root)
doc = self.parse_element(root)

# Provide original file name via the in memory document
if isinstance(xml_file, unicode):
doc._origin_file_name = basename(xml_file)
return doc

def from_string(self, string):
try:
Expand Down
2 changes: 1 addition & 1 deletion test/test_parser_odml.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_json_file(self):

def test_rdf_file(self):
self.rdf_writer.write_file(self.odml_doc, self.rdf_file)
rdf_doc = self.rdf_reader.from_file(self.rdf_file, "turtle")
rdf_doc = self.rdf_reader.from_file(self.rdf_file, "xml")

self.assertEqual(self.odml_doc, rdf_doc[0])

Expand Down