1
+ #!/usr/bin/env python3
2
+ """
3
+ Create a focused terminology dictionary for the most important Python terms.
4
+
5
+ This script extracts the most critical Python terminology for translation consistency.
6
+ """
7
+
8
+ import csv
9
+ from collections import defaultdict , Counter
10
+
11
+
12
+ def create_focused_dictionary ():
13
+ """Create a focused dictionary with the most important terms."""
14
+
15
+ # Read the full terminology dictionary
16
+ important_terms = []
17
+
18
+ with open ("terminology_dictionary.csv" , 'r' , encoding = 'utf-8' ) as csvfile :
19
+ reader = csv .DictReader (csvfile )
20
+
21
+ for row in reader :
22
+ source_term = row ['source_term' ].strip ()
23
+ frequency = int (row ['frequency' ])
24
+ files_count = int (row ['files_count' ])
25
+
26
+ # Focus on high-priority terms
27
+ is_important = False
28
+
29
+ # High priority: Python built-in types and keywords
30
+ if source_term .lower () in {
31
+ 'class' , 'function' , 'method' , 'module' , 'package' , 'object' , 'type' ,
32
+ 'int' , 'str' , 'list' , 'dict' , 'tuple' , 'set' , 'float' , 'bool' , 'complex' ,
33
+ 'none' , 'true' , 'false' , 'return' , 'import' , 'def' , 'async' , 'await' ,
34
+ 'lambda' , 'yield' , 'raise' , 'try' , 'except' , 'finally' , 'with' , 'as'
35
+ }:
36
+ is_important = True
37
+
38
+ # High priority: Common Python concepts
39
+ elif any (concept in source_term .lower () for concept in [
40
+ 'exception' , 'error' , 'iterator' , 'generator' , 'decorator' , 'property' ,
41
+ 'classmethod' , 'staticmethod' , 'metaclass' , 'inheritance' , 'polymorphism'
42
+ ]):
43
+ is_important = True
44
+
45
+ # High priority: Terms that appear in many files (widespread usage)
46
+ elif files_count >= 20 and frequency >= 10 :
47
+ is_important = True
48
+
49
+ # Medium priority: Code elements in backticks
50
+ elif '`' in source_term or source_term .startswith ('__' ) and source_term .endswith ('__' ):
51
+ is_important = True
52
+
53
+ # Medium priority: Terms with technical patterns
54
+ elif any (pattern in source_term for pattern in ['()' , 'Error' , 'Exception' , 'Class' ]):
55
+ is_important = True
56
+
57
+ if is_important :
58
+ important_terms .append (row )
59
+
60
+ # Sort by frequency (most common first)
61
+ important_terms .sort (key = lambda x : int (x ['frequency' ]), reverse = True )
62
+
63
+ # Write focused dictionary
64
+ with open ("focused_terminology_dictionary.csv" , 'w' , newline = '' , encoding = 'utf-8' ) as csvfile :
65
+ fieldnames = ['source_term' , 'translated_term' , 'frequency' , 'files_count' ,
66
+ 'priority' , 'category' , 'example_files' ]
67
+ writer = csv .DictWriter (csvfile , fieldnames = fieldnames )
68
+
69
+ writer .writeheader ()
70
+
71
+ for term_data in important_terms :
72
+ source_term = term_data ['source_term' ].strip ()
73
+
74
+ # Categorize the term
75
+ category = 'Other'
76
+ priority = 'Medium'
77
+
78
+ if source_term .lower () in {
79
+ 'class' , 'function' , 'method' , 'module' , 'package' , 'object' , 'type'
80
+ }:
81
+ category = 'Core Concepts'
82
+ priority = 'High'
83
+ elif source_term .lower () in {
84
+ 'int' , 'str' , 'list' , 'dict' , 'tuple' , 'set' , 'float' , 'bool' , 'complex'
85
+ }:
86
+ category = 'Built-in Types'
87
+ priority = 'High'
88
+ elif source_term .lower () in {
89
+ 'none' , 'true' , 'false' , 'return' , 'import' , 'def' , 'async' , 'await'
90
+ }:
91
+ category = 'Keywords/Constants'
92
+ priority = 'High'
93
+ elif 'error' in source_term .lower () or 'exception' in source_term .lower ():
94
+ category = 'Exceptions'
95
+ priority = 'High'
96
+ elif '`' in source_term :
97
+ category = 'Code Elements'
98
+ priority = 'Medium'
99
+ elif int (term_data ['files_count' ]) >= 50 :
100
+ category = 'Common Terms'
101
+ priority = 'High'
102
+
103
+ writer .writerow ({
104
+ 'source_term' : source_term ,
105
+ 'translated_term' : term_data ['translated_term' ],
106
+ 'frequency' : term_data ['frequency' ],
107
+ 'files_count' : term_data ['files_count' ],
108
+ 'priority' : priority ,
109
+ 'category' : category ,
110
+ 'example_files' : term_data ['example_files' ]
111
+ })
112
+
113
+ print (f"Created focused terminology dictionary with { len (important_terms )} important terms" )
114
+
115
+ # Print category statistics
116
+ categories = defaultdict (int )
117
+ priorities = defaultdict (int )
118
+
119
+ for term in important_terms :
120
+ source_term = term ['source_term' ].strip ()
121
+ if source_term .lower () in {'class' , 'function' , 'method' , 'module' , 'package' , 'object' , 'type' }:
122
+ categories ['Core Concepts' ] += 1
123
+ elif source_term .lower () in {'int' , 'str' , 'list' , 'dict' , 'tuple' , 'set' , 'float' , 'bool' , 'complex' }:
124
+ categories ['Built-in Types' ] += 1
125
+ elif source_term .lower () in {'none' , 'true' , 'false' , 'return' , 'import' , 'def' , 'async' , 'await' }:
126
+ categories ['Keywords/Constants' ] += 1
127
+ elif 'error' in source_term .lower () or 'exception' in source_term .lower ():
128
+ categories ['Exceptions' ] += 1
129
+ elif '`' in source_term :
130
+ categories ['Code Elements' ] += 1
131
+ else :
132
+ categories ['Common Terms' ] += 1
133
+
134
+ print ("\n Category breakdown:" )
135
+ for category , count in categories .items ():
136
+ print (f" { category } : { count } terms" )
137
+
138
+
139
+ if __name__ == "__main__" :
140
+ create_focused_dictionary ()
0 commit comments