| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2008 Zuza Software Foundation
5 #
6 # This file is part of translate.
7 #
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22 """This is a set of validation checks that can be performed on translation
23 units.
24
25 Derivatives of UnitChecker (like StandardUnitChecker) check translation units,
26 and derivatives of TranslationChecker (like StandardChecker) check
27 (source, target) translation pairs.
28
29 When adding a new test here, please document and explain the behaviour on the
30 U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}.
31 """
32
33 from translate.filters import helpers
34 from translate.filters import decoration
35 from translate.filters import prefilters
36 from translate.filters import spelling
37 from translate.lang import factory
38 from translate.lang import data
39 # The import of xliff could fail if the user doesn't have lxml installed. For
40 # now we try to continue gracefully to help users who aren't interested in
41 # support for XLIFF or other XML formats.
42 try:
43 from translate.storage import xliff
44 except ImportError, e:
45 xliff = None
46 # The import of xliff fail silently in the absence of lxml if another module
47 # already tried to import it unsuccessfully, so let's make 100% sure:
48 if not hasattr(xliff, "xliffunit"):
49 xliff = None
50 import re
51
52 # These are some regular expressions that are compiled for use in some tests
53
54 # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as
55 # this should capture printf types defined in other platforms.
56 # extended to support Python named format specifiers
57 printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))')
58
59 # The name of the XML tag
60 tagname_re = re.compile("<[\s]*([\w\/]*)")
61
62 # We allow escaped quotes, probably for old escaping style of OOo helpcontent
63 #TODO: remove escaped strings once usage is audited
64 property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))")
65
66 # The whole tag
67 tag_re = re.compile("<[^>]+>")
68
69 gconf_attribute_re = re.compile('"[a-z_]+?"')
70
72 """Returns the name of the XML/HTML tag in string"""
73 return tagname_re.match(string).groups(1)[0]
74
76 """Tests to see if pair == (a,b,c) is in list, but handles None entries in
77 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut
78 by only considering "c" if "b" has already matched."""
79 a, b, c = pair
80 if (b, c) == (None, None):
81 #This is a tagname
82 return pair
83 for pattern in list:
84 x, y, z = pattern
85 if (x, y) in [(a, b), (None, b)]:
86 if z in [None, c]:
87 return pattern
88 return pair
89
91 """Returns all the properties in the XML/HTML tag string as
92 (tagname, propertyname, propertyvalue), but ignore those combinations
93 specified in ignore."""
94 properties = []
95 for string in strings:
96 tag = tagname(string)
97 properties += [(tag, None, None)]
98 #Now we isolate the attribute pairs.
99 pairs = property_re.findall(string)
100 for property, value, a, b in pairs:
101 #Strip the quotes:
102 value = value[1:-1]
103
104 canignore = False
105 if (tag, property, value) in ignore or \
106 intuplelist((tag,property,value), ignore) != (tag,property,value):
107 canignore = True
108 break
109 if not canignore:
110 properties += [(tag, property, value)]
111 return properties
112
113
115 """This exception signals that a Filter didn't pass, and gives an explanation
116 or a comment"""
118 if not isinstance(messages, list):
119 messages = [messages]
120 assert isinstance(messages[0], unicode) # Assumption: all of same type
121 joined = u", ".join(messages)
122 Exception.__init__(self, joined)
123 # Python 2.3 doesn't have .args
124 if not hasattr(self, "args"):
125 self.args = joined
126
128 """This exception signals that a Filter didn't pass, and the bad translation
129 might break an application (so the string will be marked fuzzy)"""
130 pass
131
132 #(tag, attribute, value) specifies a certain attribute which can be changed/
133 #ignored if it exists inside tag. In the case where there is a third element
134 #in the tuple, it indicates a property value that can be ignored if present
135 #(like defaults, for example)
136 #If a certain item is None, it indicates that it is relevant for all values of
137 #the property/tag that is specified as None. A non-None value of "value"
138 #indicates that the value of the attribute must be taken into account.
139 common_ignoretags = [(None, "xml-lang", None)]
140 common_canchangetags = [("img", "alt", None), (None, "title", None)]
141 # Actually the title tag is allowed on many tags in HTML (but probably not all)
142
144 """object representing the configuration of a checker"""
145 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None,
146 notranslatewords=None, musttranslatewords=None, validchars=None,
147 punctuation=None, endpunctuation=None, ignoretags=None,
148 canchangetags=None, criticaltests=None, credit_sources=None):
149 # Init lists
150 self.accelmarkers = self._init_list(accelmarkers)
151 self.varmatches = self._init_list(varmatches)
152 self.criticaltests = self._init_list(criticaltests)
153 self.credit_sources = self._init_list(credit_sources)
154 # Lang data
155 self.targetlanguage = targetlanguage
156 self.updatetargetlanguage(targetlanguage)
157 self.sourcelang = factory.getlanguage('en')
158 # Inits with default values
159 self.punctuation = self._init_default(data.normalized_unicode(punctuation), self.lang.punctuation)
160 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), self.lang.sentenceend)
161 self.ignoretags = self._init_default(ignoretags, common_ignoretags)
162 self.canchangetags = self._init_default(canchangetags, common_canchangetags)
163 # Other data
164 # TODO: allow user configuration of untranslatable words
165 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)])
166 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)])
167 validchars = data.normalized_unicode(validchars)
168 self.validcharsmap = {}
169 self.updatevalidchars(validchars)
170
172 """initialise configuration paramaters that are lists
173
174 @type list: List
175 @param list: None (we'll initialise a blank list) or a list paramater
176 @rtype: List
177 """
178 if list is None:
179 list = []
180 return list
181
183 """initialise parameters that can have default options
184
185 @param param: the user supplied paramater value
186 @param default: default values when param is not specified
187 @return: the paramater as specified by the user of the default settings
188 """
189 if param is None:
190 return default
191 return param
192
194 """combines the info in otherconfig into this config object"""
195 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage
196 self.updatetargetlanguage(self.targetlanguage)
197 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers])
198 self.varmatches.extend(otherconfig.varmatches)
199 self.notranslatewords.update(otherconfig.notranslatewords)
200 self.musttranslatewords.update(otherconfig.musttranslatewords)
201 self.validcharsmap.update(otherconfig.validcharsmap)
202 self.punctuation += otherconfig.punctuation
203 self.endpunctuation += otherconfig.endpunctuation
204 #TODO: consider also updating in the following cases:
205 self.ignoretags = otherconfig.ignoretags
206 self.canchangetags = otherconfig.canchangetags
207 self.criticaltests.extend(otherconfig.criticaltests)
208 self.credit_sources = otherconfig.credit_sources
209
211 """updates the map that eliminates valid characters"""
212 if validchars is None:
213 return True
214 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)])
215 self.validcharsmap.update(validcharsmap)
216
218 """Updates the target language in the config to the given target language"""
219 self.lang = factory.getlanguage(langcode)
220
222 def cached_f(self, param1):
223 key = (f.__name__, param1)
224 res_cache = self.results_cache
225 if key in res_cache:
226 return res_cache[key]
227 else:
228 value = f(self, param1)
229 res_cache[key] = value
230 return value
231 return cached_f
232
234 """Parent Checker class which does the checking based on functions available
235 in derived classes."""
236 preconditions = {}
237
238 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
239 self.errorhandler = errorhandler
240 if checkerconfig is None:
241 self.setconfig(CheckerConfig())
242 else:
243 self.setconfig(checkerconfig)
244 # exclude functions defined in UnitChecker from being treated as tests...
245 self.helperfunctions = {}
246 for functionname in dir(UnitChecker):
247 function = getattr(self, functionname)
248 if callable(function):
249 self.helperfunctions[functionname] = function
250 self.defaultfilters = self.getfilters(excludefilters, limitfilters)
251
252 self.results_cache = {}
253
255 """returns dictionary of available filters, including/excluding those in
256 the given lists"""
257 filters = {}
258 if limitfilters is None:
259 # use everything available unless instructed
260 limitfilters = dir(self)
261 if excludefilters is None:
262 excludefilters = {}
263 for functionname in limitfilters:
264 if functionname in excludefilters: continue
265 if functionname in self.helperfunctions: continue
266 if functionname == "errorhandler": continue
267 filterfunction = getattr(self, functionname, None)
268 if not callable(filterfunction): continue
269 filters[functionname] = filterfunction
270 return filters
271
273 """sets the accelerator list"""
274 self.config = config
275 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers]
276 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname)
277 for startmatch, endmatch in self.config.varmatches]
278 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone)
279 for startmatch, endmatch in self.config.varmatches]
280
282 """Sets the filename that a checker should use for evaluating suggestions."""
283 self.suggestion_store = store
284 if self.suggestion_store:
285 self.suggestion_store.require_index()
286
290 filtervariables = cache_results(filtervariables)
291
295 removevariables = cache_results(removevariables)
296
298 """filter out accelerators from str1"""
299 return helpers.multifilter(str1, self.accfilters, None)
300 filteraccelerators = cache_results(filteraccelerators)
301
303 """filter out accelerators from str1"""
304 return helpers.multifilter(str1, self.accfilters, acceptlist)
305
307 """replaces words with punctuation with their unpunctuated equivalents"""
308 return prefilters.filterwordswithpunctuation(str1)
309 filterwordswithpunctuation = cache_results(filterwordswithpunctuation)
310
314 filterxml = cache_results(filterxml)
315
317 """Runs the given test on the given unit.
318
319 Note that this can raise a FilterFailure as part of normal operation"""
320 return test(unit)
321
323 """run all the tests in this suite, return failures as testname, message_or_exception"""
324 self.results_cache = {}
325 failures = {}
326 ignores = self.config.lang.ignoretests[:]
327 functionnames = self.defaultfilters.keys()
328 priorityfunctionnames = self.preconditions.keys()
329 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames)
330 for functionname in priorityfunctionnames + otherfunctionnames:
331 if functionname in ignores:
332 continue
333 filterfunction = getattr(self, functionname, None)
334 # this filterfunction may only be defined on another checker if using TeeChecker
335 if filterfunction is None:
336 continue
337 filtermessage = filterfunction.__doc__
338 try:
339 filterresult = self.run_test(filterfunction, unit)
340 except FilterFailure, e:
341 filterresult = False
342 filtermessage = e.args[0]
343 except Exception, e:
344 if self.errorhandler is None:
345 raise ValueError("error in filter %s: %r, %r, %s" % \
346 (functionname, unit.source, unit.target, e))
347 else:
348 filterresult = self.errorhandler(functionname, unit.source, unit.target, e)
349 if not filterresult:
350 # we test some preconditions that aren't actually a cause for failure
351 if functionname in self.defaultfilters:
352 failures[functionname] = filtermessage
353 if functionname in self.preconditions:
354 for ignoredfunctionname in self.preconditions[functionname]:
355 ignores.append(ignoredfunctionname)
356 self.results_cache = {}
357 return failures
358
360 """A checker that passes source and target strings to the checks, not the
361 whole unit.
362
363 This provides some speedup and simplifies testing."""
364 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
365 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
366
368 """Runs the given test on the given unit.
369
370 Note that this can raise a FilterFailure as part of normal operation."""
371 if self.hasplural:
372 filtermessages = []
373 filterresult = True
374 for pluralform in unit.target.strings:
375 try:
376 if not test(self.str1, unicode(pluralform)):
377 filterresult = False
378 except FilterFailure, e:
379 filterresult = False
380 filtermessages.append( unicode(e.args) )
381 if not filterresult and filtermessages:
382 raise FilterFailure(filtermessages)
383 else:
384 return filterresult
385 else:
386 return test(self.str1, self.str2)
387
389 """Do some optimisation by caching some data of the unit for the benefit
390 of run_test()."""
391 self.str1 = data.normalized_unicode(unit.source) or u""
392 self.str2 = data.normalized_unicode(unit.target) or u""
393 self.hasplural = unit.hasplural()
394 self.locations = unit.getlocations()
395 return super(TranslationChecker, self).run_filters(unit)
396
398 """A Checker that controls multiple checkers."""
399 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None,
400 checkerclasses=None, errorhandler=None, languagecode=None):
401 """construct a TeeChecker from the given checkers"""
402 self.limitfilters = limitfilters
403 if checkerclasses is None:
404 checkerclasses = [StandardChecker]
405 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses]
406 if languagecode:
407 for checker in self.checkers:
408 checker.config.updatetargetlanguage(languagecode)
409 # Let's hook up the language specific checker
410 lang_checker = self.checkers[0].config.lang.checker
411 if lang_checker:
412 self.checkers.append(lang_checker)
413
414 self.combinedfilters = self.getfilters(excludefilters, limitfilters)
415 self.config = checkerconfig or self.checkers[0].config
416
418 """returns dictionary of available filters, including/excluding those in
419 the given lists"""
420 if excludefilters is None:
421 excludefilters = {}
422 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers]
423 self.combinedfilters = {}
424 for filters in filterslist:
425 self.combinedfilters.update(filters)
426 # TODO: move this somewhere more sensible (a checkfilters method?)
427 if limitfilters is not None:
428 for filtername in limitfilters:
429 if not filtername in self.combinedfilters:
430 import sys
431 print >> sys.stderr, "warning: could not find filter %s" % filtername
432 return self.combinedfilters
433
435 """run all the tests in the checker's suites"""
436 failures = {}
437 for checker in self.checkers:
438 failures.update(checker.run_filters(unit))
439 return failures
440
442 """Sets the filename that a checker should use for evaluating suggestions."""
443 for checker in self.checkers:
444 checker.setsuggestionstore(store)
445
446
448 """The basic test suite for source -> target translations."""
450 """checks whether a string has been translated at all"""
451 str2 = prefilters.removekdecomments(str2)
452 return not (len(str1.strip()) > 0 and len(str2) == 0)
453
455 """checks whether a translation is basically identical to the original string"""
456 str1 = self.filteraccelerators(self.removevariables(str1)).strip()
457 str2 = self.filteraccelerators(self.removevariables(str2)).strip()
458 if len(str1) < 2:
459 return True
460 # If the whole string is upperase, or nothing in the string can go
461 # towards uppercase, let's assume there is nothing translatable
462 # TODO: reconsider
463 if (str1.isupper() or str1.upper() == str1) and str1 == str2:
464 return True
465 if self.config.notranslatewords:
466 words1 = str1.split()
467 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]:
468 #currently equivalent to:
469 # if len(words1) == 1 and words1[0] in self.config.notranslatewords:
470 #why do we only test for one notranslate word?
471 return True
472 # we could also check for things like str1.isnumeric(), but the test
473 # above (str1.upper() == str1) makes this unnecessary
474 if str1.lower() == str2.lower():
475 raise FilterFailure(u"please translate")
476 return True
477
479 """checks whether a translation only contains spaces"""
480 len1 = len(str1.strip())
481 len2 = len(str2.strip())
482 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
483
485 """checks whether a translation is much shorter than the original string"""
486 len1 = len(str1.strip())
487 len2 = len(str2.strip())
488 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
489
491 """checks whether a translation is much longer than the original string"""
492 len1 = len(str1.strip())
493 len2 = len(str2.strip())
494 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
495
497 """checks whether escaping is consistent between the two strings"""
498 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")):
499 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word])
500 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word])
501 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2))
502 else:
503 return True
504
506 """checks whether newlines are consistent between the two strings"""
507 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")):
508 raise FilterFailure(u"line endings in original don't match line endings in translation")
509 else:
510 return True
511
513 """checks whether tabs are consistent between the two strings"""
514 if not helpers.countmatch(str1, str2, "\t"):
515 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation")
516 else:
517 return True
518
520 """checks whether singlequoting is consistent between the two strings"""
521 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))
522 str1 = self.config.lang.punctranslate(str1)
523 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))
524 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
525
527 """checks whether doublequoting is consistent between the two strings"""
528 str1 = self.filteraccelerators(self.filtervariables(str1))
529 str1 = self.filterxml(str1)
530 str1 = self.config.lang.punctranslate(str1)
531 str2 = self.filteraccelerators(self.filtervariables(str2))
532 str2 = self.filterxml(str2)
533 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", u"»", u"“", u"”"))
534
536 """checks for bad double-spaces by comparing to original"""
537 str1 = self.filteraccelerators(str1)
538 str2 = self.filteraccelerators(str2)
539 return helpers.countmatch(str1, str2, u" ")
540
542 """checks for bad spacing after punctuation"""
543 # Convert all nbsp to space, and just check spaces. Useful intermediate step to stricter nbsp checking?
544 str1 = self.filteraccelerators(self.filtervariables(str1))
545 str1 = self.config.lang.punctranslate(str1)
546 str1 = str1.replace(u"\u00a0", u" ")
547 if str1.find(u" ") == -1:
548 return True
549 str2 = self.filteraccelerators(self.filtervariables(str2))
550 str2 = str2.replace(u"\u00a0", u" ")
551 for puncchar in self.config.punctuation:
552 plaincount1 = str1.count(puncchar)
553 plaincount2 = str2.count(puncchar)
554 if not plaincount1 or plaincount1 != plaincount2:
555 continue
556 spacecount1 = str1.count(puncchar + u" ")
557 spacecount2 = str2.count(puncchar + u" ")
558 if spacecount1 != spacecount2:
559 # handle extra spaces that are because of transposed punctuation
560 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1:
561 continue
562 return False
563 return True
564
566 """checks whether printf format strings match"""
567 count1 = count2 = plural = None
568 # self.hasplural only set by run_filters, not always available
569 if 'hasplural' in self.__dict__:
570 plural = self.hasplural
571 for var_num2, match2 in enumerate(printf_pat.finditer(str2)):
572 count2 = var_num2 + 1
573 str2key = match2.group('key')
574 if match2.group('ord'):
575 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
576 count1 = var_num1 + 1
577 if int(match2.group('ord')) == var_num1 + 1:
578 if match2.group('fullvar') != match1.group('fullvar'):
579 return 0
580 elif str2key:
581 str1key = None
582 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
583 count1 = var_num1 + 1
584 if match1.group('key') and str2key == match1.group('key'):
585 str1key = match1.group('key')
586 # '%.0s' "placeholder" in plural will match anything
587 if plural and match2.group('fullvar') == '.0s':
588 continue
589 if match1.group('fullvar') != match2.group('fullvar'):
590 return 0
591 if str1key == None:
592 return 0
593 else:
594 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
595 count1 = var_num1 + 1
596 # '%.0s' "placeholder" in plural will match anything
597 if plural and match2.group('fullvar') == '.0s':
598 continue
599 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')):
600 return 0
601
602 if count2 is None:
603 if list(printf_pat.finditer(str1)):
604 return 0
605
606 if (count1 or count2) and (count1 != count2):
607 return 0
608 return 1
609
611 """checks whether accelerators are consistent between the two strings"""
612 str1 = self.filtervariables(str1)
613 str2 = self.filtervariables(str2)
614 messages = []
615 for accelmarker in self.config.accelmarkers:
616 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel)
617 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel)
618 count1, countbad1 = counter1(str1)
619 count2, countbad2 = counter2(str2)
620 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel)
621 accel2, bad2 = getaccel(str2)
622 if count1 == count2:
623 continue
624 if count1 == 1 and count2 == 0:
625 if countbad2 == 1:
626 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0]))
627 else:
628 messages.append(u"accelerator %s is missing from translation" % accelmarker)
629 elif count1 == 0:
630 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker)
631 elif count1 == 1 and count2 > count1:
632 messages.append(u"accelerator %s is repeated in translation" % accelmarker)
633 else:
634 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2))
635 if messages:
636 if "accelerators" in self.config.criticaltests:
637 raise SeriousFilterFailure(messages)
638 else:
639 raise FilterFailure(messages)
640 return True
641
642 # def acceleratedvariables(self, str1, str2):
643 # """checks that no variables are accelerated"""
644 # messages = []
645 # for accelerator in self.config.accelmarkers:
646 # for variablestart, variableend in self.config.varmatches:
647 # error = accelerator + variablestart
648 # if str1.find(error) >= 0:
649 # messages.append(u"original has an accelerated variable")
650 # if str2.find(error) >= 0:
651 # messages.append(u"translation has an accelerated variable")
652 # if messages:
653 # raise FilterFailure(messages)
654 # return True
655
657 """checks whether variables of various forms are consistent between the two strings"""
658 messages = []
659 mismatch1, mismatch2 = [], []
660 varnames1, varnames2 = [], []
661 for startmarker, endmarker in self.config.varmatches:
662 varchecker = decoration.getvariables(startmarker, endmarker)
663 if startmarker and endmarker:
664 if isinstance(endmarker, int):
665 redecorate = lambda var: startmarker + var
666 else:
667 redecorate = lambda var: startmarker + var + endmarker
668 elif startmarker:
669 redecorate = lambda var: startmarker + var
670 else:
671 redecorate = lambda var: var
672 vars1 = varchecker(str1)
673 vars2 = varchecker(str2)
674 if vars1 != vars2:
675 # we use counts to compare so we can handle multiple variables
676 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)]
677 # filter variable names we've already seen, so they aren't matched by more than one filter...
678 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2]
679 varnames1.extend(vars1)
680 varnames2.extend(vars2)
681 vars1 = map(redecorate, vars1)
682 vars2 = map(redecorate, vars2)
683 mismatch1.extend(vars1)
684 mismatch2.extend(vars2)
685 if mismatch1:
686 messages.append(u"do not translate: %s" % u", ".join(mismatch1))
687 elif mismatch2:
688 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2))
689 if messages and mismatch1:
690 raise SeriousFilterFailure(messages)
691 elif messages:
692 raise FilterFailure(messages)
693 return True
694
696 """checks that function names are not translated"""
697 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
698
700 """checks that emails are not translated"""
701 return helpers.funcmatch(str1, str2, decoration.getemails)
702
704 """checks that URLs are not translated"""
705 return helpers.funcmatch(str1, str2, decoration.geturls)
706
708 """checks whether numbers of various forms are consistent between the two strings"""
709 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
710
712 """checks whether whitespace at the beginning of the strings matches"""
713 return helpers.funcmatch(str1, str2, decoration.spacestart)
714
716 """checks whether whitespace at the end of the strings matches"""
717 str1 = self.config.lang.punctranslate(str1)
718 return helpers.funcmatch(str1, str2, decoration.spaceend)
719
721 """checks whether punctuation at the beginning of the strings match"""
722 str1 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))))
723 str1 = self.config.lang.punctranslate(str1)
724 str2 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))))
725 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
726
728 """checks whether punctuation at the end of the strings match"""
729 str1 = self.filtervariables(str1)
730 str1 = self.config.lang.punctranslate(str1)
731 str2 = self.filtervariables(str2)
732 str1 = str1.rstrip()
733 str2 = str2.rstrip()
734 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
735
737 """checks that strings that are purely punctuation are not changed"""
738 # this test is a subset of startandend
739 if (decoration.ispurepunctuation(str1)):
740 return str1 == str2
741 else:
742 return not decoration.ispurepunctuation(str2)
743
745 """checks that the number of brackets in both strings match"""
746 str1 = self.filtervariables(str1)
747 str2 = self.filtervariables(str2)
748 messages = []
749 missing = []
750 extra = []
751 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"):
752 count1 = str1.count(bracket)
753 count2 = str2.count(bracket)
754 if count2 < count1:
755 missing.append(u"'%s'" % bracket)
756 elif count2 > count1:
757 extra.append(u"'%s'" % bracket)
758 if missing:
759 messages.append(u"translation is missing %s" % u", ".join(missing))
760 if extra:
761 messages.append(u"translation has extra %s" % u", ".join(extra))
762 if messages:
763 raise FilterFailure(messages)
764 return True
765
767 """checks that the number of sentences in both strings match"""
768 str1 = self.filteraccelerators(str1)
769 str2 = self.filteraccelerators(str2)
770 sentences1 = len(self.config.sourcelang.sentences(str1))
771 sentences2 = len(self.config.lang.sentences(str2))
772 if not sentences1 == sentences2:
773 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2))
774 return True
775
777 """checks that options are not translated"""
778 str1 = self.filtervariables(str1)
779 for word1 in str1.split():
780 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum():
781 parts = word1.split(u"=")
782 if not parts[0] in str2:
783 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0])
784 if len(parts) > 1 and parts[1] in str2:
785 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]})
786 return True
787
789 """checks that the message starts with the correct capitalisation"""
790 str1 = self.filteraccelerators(str1)
791 str2 = self.filteraccelerators(str2)
792 if len(str1) > 1 and len(str2) > 1:
793 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2)
794 if len(str1) == 0 and len(str2) == 0:
795 return True
796 if len(str1) == 0 or len(str2) == 0:
797 return False
798 return True
799
801 """checks the capitalisation of two strings isn't wildly different"""
802 str1 = self.removevariables(str1)
803 str2 = self.removevariables(str2)
804 # TODO: review this. The 'I' is specific to English, so it probably serves
805 # no purpose to get sourcelang.sentenceend
806 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1)
807 capitals1 = helpers.filtercount(str1, unicode.isupper)
808 capitals2 = helpers.filtercount(str2, unicode.isupper)
809 alpha1 = helpers.filtercount(str1, unicode.isalpha)
810 alpha2 = helpers.filtercount(str2, unicode.isalpha)
811 # Capture the all caps case
812 if capitals1 == alpha1:
813 return capitals2 == alpha2
814 # some heuristic tests to try and see that the style of capitals is vaguely the same
815 if capitals1 == 0 or capitals1 == 1:
816 return capitals2 == capitals1
817 elif capitals1 < len(str1) / 10:
818 return capitals2 <= len(str2) / 8
819 elif len(str1) < 10:
820 return abs(capitals1 - capitals2) < 3
821 elif capitals1 > len(str1) * 6 / 10:
822 return capitals2 > len(str2) * 6 / 10
823 else:
824 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
825
827 """checks that acronyms that appear are unchanged"""
828 acronyms = []
829 allowed = []
830 for startmatch, endmatch in self.config.varmatches:
831 allowed += decoration.getvariables(startmatch, endmatch)(str1)
832 allowed += self.config.musttranslatewords.keys()
833 str1 = self.filteraccelerators(self.filtervariables(str1))
834 iter = self.config.lang.word_iter(str1)
835 str2 = self.filteraccelerators(self.filtervariables(str2))
836 #TODO: strip XML? - should provide better error messsages
837 # see mail/chrome/messanger/smime.properties.po
838 #TODO: consider limiting the word length for recognising acronyms to
839 #something like 5/6 characters
840 for word in iter:
841 if word.isupper() and len(word) > 1 and word not in allowed:
842 if str2.find(word) == -1:
843 acronyms.append(word)
844 if acronyms:
845 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms))
846 return True
847
849 """checks for repeated words in the translation"""
850 lastword = ""
851 without_newlines = "\n".join(str2.split("\n"))
852 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(u".", u"").lower().split()
853 for word in words:
854 if word == lastword and word not in self.config.lang.validdoublewords:
855 raise FilterFailure(u"The word '%s' is repeated" % word)
856 lastword = word
857 return True
858
860 """checks that words configured as untranslatable appear in the translation too"""
861 if not self.config.notranslatewords:
862 return True
863 str1 = self.filtervariables(str1)
864 str2 = self.filtervariables(str2)
865 #The above is full of strange quotes and things in utf-8 encoding.
866 #single apostrophe perhaps problematic in words like "doesn't"
867 for seperator in self.config.punctuation:
868 str1 = str1.replace(seperator, u" ")
869 str2 = str2.replace(seperator, u" ")
870 words1 = self.filteraccelerators(str1).split()
871 words2 = self.filteraccelerators(str2).split()
872 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
873 if stopwords:
874 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords)))
875 return True
876
878 """checks that words configured as definitely translatable don't appear in
879 the translation"""
880 if not self.config.musttranslatewords:
881 return True
882 str1 = self.removevariables(str1)
883 str2 = self.removevariables(str2)
884 #The above is full of strange quotes and things in utf-8 encoding.
885 #single apostrophe perhaps problematic in words like "doesn't"
886 for seperator in self.config.punctuation:
887 str1 = str1.replace(seperator, u" ")
888 str2 = str2.replace(seperator, u" ")
889 words1 = self.filteraccelerators(str1).split()
890 words2 = self.filteraccelerators(str2).split()
891 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2]
892 if stopwords:
893 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords)))
894 return True
895
897 """checks that only characters specified as valid appear in the translation"""
898 if not self.config.validcharsmap:
899 return True
900 invalid1 = str1.translate(self.config.validcharsmap)
901 invalid2 = str2.translate(self.config.validcharsmap)
902 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1]
903 if invalidchars:
904 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars)))
905 return True
906
908 """checks that file paths have not been translated"""
909 for word1 in self.filteraccelerators(str1).split():
910 if word1.startswith(u"/"):
911 if not helpers.countsmatch(str1, str2, (word1,)):
912 return False
913 return True
914
941
943 """checks to ensure that no KDE style comments appear in the translation"""
944 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
945
947 """checks for Gettext compendium conflicts (#-#-#-#-#)"""
948 return str2.find(u"#-#-#-#-#") == -1
949
951 """checks for English style plural(s) for you to review"""
952 def numberofpatterns(string, patterns):
953 number = 0
954 for pattern in patterns:
955 number += len(re.findall(pattern, string))
956 return number
957
958 sourcepatterns = ["\(s\)"]
959 targetpatterns = ["\(s\)"]
960 sourcecount = numberofpatterns(str1, sourcepatterns)
961 targetcount = numberofpatterns(str2, targetpatterns)
962 if self.config.lang.nplurals == 1:
963 return not targetcount
964 return sourcecount == targetcount
965
967 """checks words that don't pass a spell check"""
968 if not self.config.targetlanguage:
969 return True
970 if not spelling.available:
971 return True
972 # TODO: filterxml?
973 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel)
974 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel)
975 ignore1 = []
976 messages = []
977 for word, index, suggestions in spelling.check(str1, lang="en"):
978 ignore1.append(word)
979 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage):
980 if word in self.config.notranslatewords:
981 continue
982 if word in ignore1:
983 continue
984 # hack to ignore hyphenisation rules
985 if word in suggestions:
986 continue
987 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5])))
988 if messages:
989 raise FilterFailure(messages)
990 return True
991
993 """checks for messages containing translation credits instead of normal translations."""
994 return not str1 in self.config.credit_sources
995
996 # If the precondition filter is run and fails then the other tests listed are ignored
997 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps",
998 "accelerators", "brackets", "endpunc",
999 "acronyms", "xmltags", "startpunc",
1000 "endwhitespace", "startwhitespace",
1001 "escapes", "doublequoting", "singlequoting",
1002 "filepaths", "purepunc", "doublespacing",
1003 "sentencecount", "numbers", "isfuzzy",
1004 "isreview", "notranslatewords", "musttranslatewords",
1005 "emails", "simpleplurals", "urls", "printf",
1006 "tabs", "newlines", "functions", "options",
1007 "blank", "nplurals", "gconf"),
1008 "blank": ("simplecaps", "variables", "startcaps",
1009 "accelerators", "brackets", "endpunc",
1010 "acronyms", "xmltags", "startpunc",
1011 "endwhitespace", "startwhitespace",
1012 "escapes", "doublequoting", "singlequoting",
1013 "filepaths", "purepunc", "doublespacing",
1014 "sentencecount", "numbers", "isfuzzy",
1015 "isreview", "notranslatewords", "musttranslatewords",
1016 "emails", "simpleplurals", "urls", "printf",
1017 "tabs", "newlines", "functions", "options",
1018 "gconf"),
1019 "credits": ("simplecaps", "variables", "startcaps",
1020 "accelerators", "brackets", "endpunc",
1021 "acronyms", "xmltags", "startpunc",
1022 "escapes", "doublequoting", "singlequoting",
1023 "filepaths", "doublespacing",
1024 "sentencecount", "numbers",
1025 "emails", "simpleplurals", "urls", "printf",
1026 "tabs", "newlines", "functions", "options"),
1027 "purepunc": ("startcaps", "options"),
1028 # This is causing some problems since Python 2.6, as
1029 # startcaps is now seen as an important one to always execute
1030 # and could now be done before it is blocked by a failing
1031 # "untranslated" or "blank" test. This is probably happening
1032 # due to slightly different implementation of the internal
1033 # dict handling since Python 2.6. We should never have relied
1034 # on this ordering anyway.
1035 #"startcaps": ("simplecaps",),
1036 "endwhitespace": ("endpunc",),
1037 "startwhitespace":("startpunc",),
1038 "unchanged": ("doublewords",),
1039 "compendiumconflicts": ("accelerators", "brackets", "escapes",
1040 "numbers", "startpunc", "long", "variables",
1041 "startcaps", "sentencecount", "simplecaps",
1042 "doublespacing", "endpunc", "xmltags",
1043 "startwhitespace", "endwhitespace",
1044 "singlequoting", "doublequoting",
1045 "filepaths", "purepunc", "doublewords", "printf") }
1046
1047 # code to actually run the tests (use unittest?)
1048
1049 openofficeconfig = CheckerConfig(
1050 accelmarkers = ["~"],
1051 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)],
1052 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)],
1053 canchangetags = [("link", "name", None)]
1054 )
1055
1058 checkerconfig = kwargs.get("checkerconfig", None)
1059 if checkerconfig is None:
1060 checkerconfig = CheckerConfig()
1061 kwargs["checkerconfig"] = checkerconfig
1062 checkerconfig.update(openofficeconfig)
1063 StandardChecker.__init__(self, **kwargs)
1064
1065 mozillaconfig = CheckerConfig(
1066 accelmarkers = ["&"],
1067 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")],
1068 criticaltests = ["accelerators"]
1069 )
1070
1073 checkerconfig = kwargs.get("checkerconfig", None)
1074 if checkerconfig is None:
1075 checkerconfig = CheckerConfig()
1076 kwargs["checkerconfig"] = checkerconfig
1077 checkerconfig.update(mozillaconfig)
1078 StandardChecker.__init__(self, **kwargs)
1079
1086
1087 drupalconfig = CheckerConfig(
1088 varmatches = [("%", None), ("@", None), ("!", None)],
1089 )
1090
1093 checkerconfig = kwargs.get("checkerconfig", None)
1094 if checkerconfig is None:
1095 checkerconfig = CheckerConfig()
1096 kwargs["checkerconfig"] = checkerconfig
1097 checkerconfig.update(drupalconfig)
1098 StandardChecker.__init__(self, **kwargs)
1099
1100 gnomeconfig = CheckerConfig(
1101 accelmarkers = ["_"],
1102 varmatches = [("%", 1), ("$(", ")")],
1103 credit_sources = [u"translator-credits"]
1104 )
1105
1108 checkerconfig = kwargs.get("checkerconfig", None)
1109 if checkerconfig is None:
1110 checkerconfig = CheckerConfig()
1111 kwargs["checkerconfig"] = checkerconfig
1112 checkerconfig.update(gnomeconfig)
1113 StandardChecker.__init__(self, **kwargs)
1114
1116 """Checks if we have any gconf config settings translated."""
1117 for location in self.locations:
1118 if location.find('schemas.in') != -1:
1119 gconf_attributes = gconf_attribute_re.findall(str1)
1120 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
1121 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2]
1122 if stopwords:
1123 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords)))
1124 return True
1125
1126 kdeconfig = CheckerConfig(
1127 accelmarkers = ["&"],
1128 varmatches = [("%", 1)],
1129 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"]
1130 )
1131
1134 # TODO allow setup of KDE plural and translator comments so that they do
1135 # not create false postives
1136 checkerconfig = kwargs.get("checkerconfig", None)
1137 if checkerconfig is None:
1138 checkerconfig = CheckerConfig()
1139 kwargs["checkerconfig"] = checkerconfig
1140 checkerconfig.update(kdeconfig)
1141 StandardChecker.__init__(self, **kwargs)
1142
1143 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1146 checkerconfig = kwargs.get("checkerconfig", None)
1147 if checkerconfig is None:
1148 checkerconfig = CheckerConfig()
1149 kwargs["checkerconfig"] = checkerconfig
1150 checkerconfig.update(cclicenseconfig)
1151 StandardChecker.__init__(self, **kwargs)
1152
1153 projectcheckers = {
1154 "openoffice": OpenOfficeChecker,
1155 "mozilla": MozillaChecker,
1156 "kde": KdeChecker,
1157 "wx": KdeChecker,
1158 "gnome": GnomeChecker,
1159 "creativecommons": CCLicenseChecker,
1160 "drupal": DrupalChecker,
1161 }
1162
1163
1165 """The standard checks for common checks on translation units."""
1169
1173
1175 """Checks for the correct number of noun forms for plural translations."""
1176 if unit.hasplural():
1177 # if we don't have a valid nplurals value, don't run the test
1178 nplurals = self.config.lang.nplurals
1179 if nplurals > 0:
1180 return len(unit.target.strings) == nplurals
1181 return True
1182
1184 """Checks if there is at least one suggested translation for this unit."""
1185 self.suggestion_store = getattr(self, 'suggestion_store', None)
1186 suggestions = []
1187 if self.suggestion_store:
1188 suggestions = self.suggestion_store.findunits(unit.source)
1189 elif xliff and isinstance(unit, xliff.xliffunit):
1190 # TODO: we probably want to filter them somehow
1191 suggestions = unit.getalttrans()
1192 return not bool(suggestions)
1193
1194
1196 """verifies that the tests pass for a pair of strings"""
1197 from translate.storage import base
1198 str1 = data.normalized_unicode(str1)
1199 str2 = data.normalized_unicode(str2)
1200 unit = base.TranslationUnit(str1)
1201 unit.target = str2
1202 checker = StandardChecker(excludefilters=ignorelist)
1203 failures = checker.run_filters(unit)
1204 for test in failures:
1205 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2)
1206 return failures
1207
1209 """runs test on a batch of string pairs"""
1210 passed, numpairs = 0, len(pairs)
1211 for str1, str2 in pairs:
1212 if runtests(str1, str2):
1213 passed += 1
1214 print
1215 print "total: %d/%d pairs passed" % (passed, numpairs)
1216
1217 if __name__ == '__main__':
1218 testset = [(r"simple", r"somple"),
1219 (r"\this equals \that", r"does \this equal \that?"),
1220 (r"this \'equals\' that", r"this 'equals' that"),
1221 (r" start and end! they must match.", r"start and end! they must match."),
1222 (r"check for matching %variables marked like %this", r"%this %variable is marked"),
1223 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"),
1224 (r"check for mismatching %variables% too", r"how many %variable% are marked"),
1225 (r"%% %%", r"%%"),
1226 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"),
1227 (r"simple lowercase", r"it is all lowercase"),
1228 (r"simple lowercase", r"It Is All Lowercase"),
1229 (r"Simple First Letter Capitals", r"First Letters"),
1230 (r"SIMPLE CAPITALS", r"First Letters"),
1231 (r"SIMPLE CAPITALS", r"ALL CAPITALS"),
1232 (r"forgot to translate", r" ")
1233 ]
1234 batchruntests(testset)
1235
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Wed Mar 3 16:38:10 2010 | http://epydoc.sourceforge.net |