1+ # Copyright (c) 2014, The MITRE Corporation. All rights reserved.
2+ # See LICENSE.txt for complete terms.
3+
4+ import maec
5+ from distutils .version import StrictVersion
6+ from lxml import etree
7+
8+ class UnsupportedVersionError (Exception ):
9+ pass
10+
11+ class UnknownVersionError (Exception ):
12+ pass
13+
14+ class UnsupportedRootElement (Exception ):
15+ pass
16+
17+ class EntityParser (object ):
18+ def __init__ (self ):
19+ self .is_bundle = False
20+ self .is_package = False
21+
22+ def _check_version (self , tree ):
23+ '''Returns true of the instance document @tree is a version supported by python-maec'''
24+
25+ try :
26+ root = tree .getroot () # is tree an lxml.Element or lxml.ElementTree
27+ except AttributeError :
28+ root = tree
29+
30+ if not root .attrib .get ('schema_version' ):
31+ raise UnknownVersionError ("No version attribute set on xml instance. Unable to determine version compatibility" )
32+
33+ python_maec_version = maec .__version__ # ex: '4.1.0.0'
34+ supported_maec_version = ('4.1' , '2.1' ) # ex: '4.1.0'
35+ document_version = root .attrib ['schema_version' ]
36+
37+ if document_version not in supported_maec_version :
38+ raise UnsupportedVersionError ("Your python-maec library supports MAEC %s, or the MAEC Bundle Schema at %s and MAEC Package Schema at %s. Document version was %s" % (supported_maec_version [0 ], supported_maec_version [0 ], supported_maec_version [1 ], document_version ))
39+
40+ return True
41+
42+ def _check_root (self , tree ):
43+ try :
44+ root = tree .getroot () # is tree an lxml.Element or lxml.ElementTree
45+ except AttributeError :
46+ root = tree
47+ # General compatibility check
48+ if root .tag != "{http://maec.mitre.org/XMLSchema/maec-bundle-4}MAEC_Bundle" or "{http://maec.mitre.org/XMLSchema/maec-package-2}MAEC_Package" :
49+ raise UnsupportedRootElement ("Document root element must be an instance of MAEC_Package or MAEC_Bundle" )
50+
51+ # Determine if we're dealing with a MAEC Bundle or MAEC Package
52+ if "MAEC_Bundle" in root .tag :
53+ self .is_bundle = True
54+ elif "MAEC_Package" in root .tag :
55+ self .is_package = True
56+
57+ return True
58+
59+ def _apply_input_namespaces (self , tree , entity ):
60+ try :
61+ root = tree .getroot () # is tree an lxml.Element or lxml.ElementTree
62+ except AttributeError :
63+ root = tree
64+
65+ entity .__input_namespaces__ = {}
66+ for alias ,ns in root .nsmap .iteritems ():
67+ entity .__input_namespaces__ [ns ] = alias
68+
69+ def parse_xml_to_obj (self , xml_file , check_version = True ):
70+ """Creates a MAEC binding object from the supplied xml file.
71+
72+ Arguments:
73+ xml_file -- A filename/path or a file-like object reprenting a MAEC instance document
74+ check_version -- Inspect the version before parsing.
75+ """
76+ parser = etree .ETCompatXMLParser (huge_tree = True )
77+ tree = etree .parse (xml_file , parser = parser )
78+
79+ # Check the root and determine the type of document we're dealing with
80+ self ._check_root (tree )
81+
82+ if check_version :
83+ self ._check_version (tree )
84+
85+ binding_obj = None
86+ if self .is_package :
87+ import maec .bindings .maec_package as maec_package_binding
88+ binding_obj = maec_package_binding .PackageType ().factory ()
89+ binding_obj .build (tree .getroot ())
90+ elif self .is_bundle :
91+ import maec .bindings .maec_bundle as maec_bundle_binding
92+ binding_obj = maec_bundle_binding .BundleType ().factory ()
93+ binding_obj .build (tree .getroot ())
94+
95+ return binding_obj
96+
97+ def parse_xml (self , xml_file , check_version = True ):
98+ """Creates a python-maec Bundle or Package object from the supplied xml_file.
99+
100+ Arguments:
101+ xml_file -- A filename/path or a file-like object reprenting a MAEC instance (i.e. Package or Bundle) document
102+ check_version -- Inspect the version before parsing.
103+ """
104+
105+ api_obj = None
106+ binding_obj = self .parse_xml_to_obj (xml_file , check_version )
107+ if self .is_package :
108+ from maec .package .package import Package # resolve circular dependencies
109+ api_obj = Package ().from_obj (binding_obj )
110+ elif self .is_bundle :
111+ from maec .bundle .bundle import Bundle # resolve circular dependencies
112+ api_obj = Bundle ().from_obj (binding_obj )
113+ self ._apply_input_namespaces (tree , api_obj )
114+
115+ return api_obj
0 commit comments