@@ -816,15 +816,15 @@ def __init__(self, version,
816816 expand = 1 ,
817817 cjk_check = True ):
818818 self .changed = []
819- file = open_data (UNICODE_DATA , version )
820819 table = [None ] * 0x110000
821- while 1 :
822- s = file .readline ()
823- if not s :
824- break
825- s = s .strip ().split (";" )
826- char = int (s [0 ], 16 )
827- table [char ] = s
820+ with open_data (UNICODE_DATA , version ) as file :
821+ while 1 :
822+ s = file .readline ()
823+ if not s :
824+ break
825+ s = s .strip ().split (";" )
826+ char = int (s [0 ], 16 )
827+ table [char ] = s
828828
829829 cjk_ranges_found = []
830830
@@ -855,69 +855,74 @@ def __init__(self, version,
855855 self .table = table
856856 self .chars = list (range (0x110000 )) # unicode 3.2
857857
858- file = open_data (COMPOSITION_EXCLUSIONS , version )
859858 self .exclusions = {}
860- for s in file :
861- s = s .strip ()
862- if not s :
863- continue
864- if s [0 ] == '#' :
865- continue
866- char = int (s .split ()[0 ],16 )
867- self .exclusions [char ] = 1
859+ with open_data (COMPOSITION_EXCLUSIONS , version ) as file :
860+ for s in file :
861+ s = s .strip ()
862+ if not s :
863+ continue
864+ if s [0 ] == '#' :
865+ continue
866+ char = int (s .split ()[0 ],16 )
867+ self .exclusions [char ] = 1
868868
869869 widths = [None ] * 0x110000
870- for s in open_data (EASTASIAN_WIDTH , version ):
871- s = s .strip ()
872- if not s :
873- continue
874- if s [0 ] == '#' :
875- continue
876- s = s .split ()[0 ].split (';' )
877- if '..' in s [0 ]:
878- first , last = [int (c , 16 ) for c in s [0 ].split ('..' )]
879- chars = list (range (first , last + 1 ))
880- else :
881- chars = [int (s [0 ], 16 )]
882- for char in chars :
883- widths [char ] = s [1 ]
870+ with open_data (EASTASIAN_WIDTH , version ) as file :
871+ for s in file :
872+ s = s .strip ()
873+ if not s :
874+ continue
875+ if s [0 ] == '#' :
876+ continue
877+ s = s .split ()[0 ].split (';' )
878+ if '..' in s [0 ]:
879+ first , last = [int (c , 16 ) for c in s [0 ].split ('..' )]
880+ chars = list (range (first , last + 1 ))
881+ else :
882+ chars = [int (s [0 ], 16 )]
883+ for char in chars :
884+ widths [char ] = s [1 ]
885+
884886 for i in range (0 , 0x110000 ):
885887 if table [i ] is not None :
886888 table [i ].append (widths [i ])
887889
888890 for i in range (0 , 0x110000 ):
889891 if table [i ] is not None :
890892 table [i ].append (set ())
891- for s in open_data (DERIVED_CORE_PROPERTIES , version ):
892- s = s .split ('#' , 1 )[0 ].strip ()
893- if not s :
894- continue
895893
896- r , p = s .split (";" )
897- r = r .strip ()
898- p = p .strip ()
899- if ".." in r :
900- first , last = [int (c , 16 ) for c in r .split ('..' )]
901- chars = list (range (first , last + 1 ))
902- else :
903- chars = [int (r , 16 )]
904- for char in chars :
905- if table [char ]:
906- # Some properties (e.g. Default_Ignorable_Code_Point)
907- # apply to unassigned code points; ignore them
908- table [char ][- 1 ].add (p )
909-
910- for s in open_data (LINE_BREAK , version ):
911- s = s .partition ('#' )[0 ]
912- s = [i .strip () for i in s .split (';' )]
913- if len (s ) < 2 or s [1 ] not in MANDATORY_LINE_BREAKS :
914- continue
915- if '..' not in s [0 ]:
916- first = last = int (s [0 ], 16 )
917- else :
918- first , last = [int (c , 16 ) for c in s [0 ].split ('..' )]
919- for char in range (first , last + 1 ):
920- table [char ][- 1 ].add ('Line_Break' )
894+ with open_data (DERIVED_CORE_PROPERTIES , version ) as file :
895+ for s in file :
896+ s = s .split ('#' , 1 )[0 ].strip ()
897+ if not s :
898+ continue
899+
900+ r , p = s .split (";" )
901+ r = r .strip ()
902+ p = p .strip ()
903+ if ".." in r :
904+ first , last = [int (c , 16 ) for c in r .split ('..' )]
905+ chars = list (range (first , last + 1 ))
906+ else :
907+ chars = [int (r , 16 )]
908+ for char in chars :
909+ if table [char ]:
910+ # Some properties (e.g. Default_Ignorable_Code_Point)
911+ # apply to unassigned code points; ignore them
912+ table [char ][- 1 ].add (p )
913+
914+ with open_data (LINE_BREAK , version ) as file :
915+ for s in file :
916+ s = s .partition ('#' )[0 ]
917+ s = [i .strip () for i in s .split (';' )]
918+ if len (s ) < 2 or s [1 ] not in MANDATORY_LINE_BREAKS :
919+ continue
920+ if '..' not in s [0 ]:
921+ first = last = int (s [0 ], 16 )
922+ else :
923+ first , last = [int (c , 16 ) for c in s [0 ].split ('..' )]
924+ for char in range (first , last + 1 ):
925+ table [char ][- 1 ].add ('Line_Break' )
921926
922927 # We only want the quickcheck properties
923928 # Format: NF?_QC; Y(es)/N(o)/M(aybe)
@@ -928,31 +933,33 @@ def __init__(self, version,
928933 # for older versions, and no delta records will be created.
929934 quickchecks = [0 ] * 0x110000
930935 qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC' .split ()
931- for s in open_data (DERIVEDNORMALIZATION_PROPS , version ):
932- if '#' in s :
933- s = s [:s .index ('#' )]
934- s = [i .strip () for i in s .split (';' )]
935- if len (s ) < 2 or s [1 ] not in qc_order :
936- continue
937- quickcheck = 'MN' .index (s [2 ]) + 1 # Maybe or No
938- quickcheck_shift = qc_order .index (s [1 ])* 2
939- quickcheck <<= quickcheck_shift
940- if '..' not in s [0 ]:
941- first = last = int (s [0 ], 16 )
942- else :
943- first , last = [int (c , 16 ) for c in s [0 ].split ('..' )]
944- for char in range (first , last + 1 ):
945- assert not (quickchecks [char ]>> quickcheck_shift )& 3
946- quickchecks [char ] |= quickcheck
936+ with open_data (DERIVEDNORMALIZATION_PROPS , version ) as file :
937+ for s in file :
938+ if '#' in s :
939+ s = s [:s .index ('#' )]
940+ s = [i .strip () for i in s .split (';' )]
941+ if len (s ) < 2 or s [1 ] not in qc_order :
942+ continue
943+ quickcheck = 'MN' .index (s [2 ]) + 1 # Maybe or No
944+ quickcheck_shift = qc_order .index (s [1 ])* 2
945+ quickcheck <<= quickcheck_shift
946+ if '..' not in s [0 ]:
947+ first = last = int (s [0 ], 16 )
948+ else :
949+ first , last = [int (c , 16 ) for c in s [0 ].split ('..' )]
950+ for char in range (first , last + 1 ):
951+ assert not (quickchecks [char ]>> quickcheck_shift )& 3
952+ quickchecks [char ] |= quickcheck
947953 for i in range (0 , 0x110000 ):
948954 if table [i ] is not None :
949955 table [i ].append (quickchecks [i ])
950956
951- zip = zipfile .ZipFile (open_data (UNIHAN , version ))
952- if version == '3.2.0' :
953- data = zip .open ('Unihan-3.2.0.txt' ).read ()
954- else :
955- data = zip .open ('Unihan_NumericValues.txt' ).read ()
957+ with open_data (UNIHAN , version ) as file :
958+ zip = zipfile .ZipFile (file )
959+ if version == '3.2.0' :
960+ data = zip .open ('Unihan-3.2.0.txt' ).read ()
961+ else :
962+ data = zip .open ('Unihan_NumericValues.txt' ).read ()
956963 for line in data .decode ("utf-8" ).splitlines ():
957964 if not line .startswith ('U+' ):
958965 continue
0 commit comments