Package translate :: Package lang :: Module team
[hide private]
[frames] | no frames]

Source Code for Module translate.lang.team

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2010 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Module to guess the language ISO code based on the 'Language-Team entry in 
 22  the header of a Gettext PO file.""" 
 23   
 24  import re 
 25   
 26  from translate.misc.typecheck import accepts, returns, IsOneOf 
 27  from translate.misc.typecheck.typeclasses import String 
 28   
 29  __all__ = ['LANG_TEAM_CONTACT_SNIPPETS', 'guess_language'] 
 30   
 31  LANG_TEAM_REGEX = ( 
 32     ("@li.org", "([a-z_A-Z]{2,})@li.org", ["LL", "XX", "TEAM"]), 
 33     ("translation-team", 
 34      "translation-team-([a-z_A-Z]+)@lists.sourceforge.net", None), 
 35     ("fedora-trans", "fedora-trans-([a-z_A-Z]+)@redhat.com", ["list"]), 
 36     ("ubuntu-l10n", "ubuntu-l10n-([a-z_A-Z]+)@lists.ubuntu.com", None), 
 37     ("translate-discuss", 
 38      "translate-discuss-([a-z_A-Z]+)@lists.sourceforge.net", None), 
 39     ("kde-i18n", "kde-i18n-([a-z_A-Z]+)@(?:lists\.|mail\.|)kde.org", ["doc"]), 
 40     ("kde-l10n", "kde-l10n-([a-z_A-Z]+)@kde.org", None), 
 41     ("fedoraproject", "trans-([a-z_A-Z]+)@lists.fedoraproject.org", None), 
 42     ("gnome.org", "gnome-([a-z_A-Z]+)-list@gnome.org", ["latin"]), 
 43  ) 
 44  """Data for regular expression based extraction.  The fieds are: prefilter 
 45  information, regex with single group that contains the language code, 
 46  postfilter.""" 
 47   
 48  LANG_TEAM_CONTACT_SNIPPETS = { 
 49      "af": ("i18n@af.org.za", "Petri Jooste",), 
 50      "am": ("@geez.org", ), 
 51      "ar": ("arabeyes.org", "Arabeyes", ), 
 52      "as": ("assam@mm.assam-glug.org", ), 
 53      "ast": ("@softastur.org", "launchpad.net/~ubuntu-l10n-ast", 
 54              "softast-xeneral@lists.sourceforge.net", "Softastur",), 
 55      "az": ("linuxaz@azerimal.net", "gnome@azitt.com", u"gnome@azətt.com",), 
 56      "az_IR": ("az-ir@lists.sharif.edu",), 
 57      "be": ("i18n@mova.org", "i18n@tut.by", "mozilla_byx@poczta.fm",), 
 58      "be@latin": ("translation-team-be-latin@lists", "be-latin.open-tran.eu",), 
 59      "bg": ("dict@fsa-bg.org", "dict@linux.zonebg.com", ), 
 60      "bn": ("gnome-translation@bengalinux.org", "core@bengalinux.org", 
 61             "ankur-bd-l10n@googlegroups.com", 
 62             "redhat-translation@bengalinux.org", ), 
 63      "bn_IN": ("anubad@lists.ankur.org.in", ), 
 64      "br": ("drouizig@drouizig.org", "brenux@free.fr", 
 65             "tradgnome@softcatala.net", "fedora@softcatala.org", ), 
 66      "bs": ("lokal@linux.org.ba", "lokal@lugbih.org", ), 
 67      "ca": ("@softcatala.org",), 
 68      "crh": ("tilde-birlik-tercime@lists.sourceforge.net", ), 
 69      "cs": ("fedora-cs-list@redhat.com", "cs-users@lists.fedoraproject.org", 
 70             "debian-l10n-czech@lists.debian.org", 
 71             "kde-czech-apps@lists.sourceforge.net", 
 72             "kde-czech-apps@lists.sf.net", "translations.cs@gnupg.cz"), 
 73      "cy": ("gnome-cy@lists.linux.org.uk", "gnome-cy@pengwyn.linux.org.uk", 
 74             "gnome-cy@www.linux.org", "gnome-cy@www.linux.org.uk", 
 75             "cy@pengwyn.linux.org.uk", ), 
 76      "da": ("dansk@dansk-gruppen.dk", "dansk@klid.dk", 
 77             "sslug-locale@sslug.dk", ), 
 78      "de": ("gnome-de@gnome.org", "debian-l10n-german@lists.debian.org", ), 
 79      "dz": ("pgeyleg@dit.gov.bt", "pgyeleg@dit.gov.bt", ), 
 80      "el": ("debian-l10n-greek@lists.debian.org", "i18ngr@lists.hellug.gr", 
 81             "i18n@hellug.gr", "nls@tux.hellug.gr", "team@gnome.gr", 
 82             "team@lists.gnome.gr", "users@el.openoffice.org", ), 
 83      "en_AU": ("trans@six-by-nine.com.au", ), 
 84      "en_CA": ("adamw@gnome.org", "adamw@freebsd.org", ), 
 85      "en_GB": ("kde-en-gb@kde.me.uk", ), 
 86      "en@shaw": ("ubuntu-l10n-en-shaw@launchpad.net", 
 87                  "ubuntu-l10n-en-shaw@lists.launchpad.net", ), 
 88      "eo": ("eo-tradukado@lists.tuxfamily.org", 
 89             "debian-l10n-esperanto@lists.debian.org", 
 90             "ubuntu-l10n-eo@lists.launchpad.net", 
 91             "eo-tradukado.tuxfamily.org", ), 
 92      "es": ("pgsql-es-ayuda@postgresql.org", 
 93             "debian-l10n-spanish@lists.debian.org", 
 94             "gnome-es@gnome.org", "traductores@es.gnome.org", ), 
 95      "et": ("gnome-et@linux.ee", "kde-et@linux.ee", "linux-ee@lists.eenet.ee", 
 96             "linux-et@lists.eenet.ee", "et-gnome@linux.ee", 
 97             "linux-ee@eenet.ee", ), 
 98      "eu": ("debian-l10n-basque@lists.debian.org", 
 99             "debian-l10n-eu@lists.debian.org", "itzulpena@euskalgnu.org", 
100             "gnome@euskalgnu.org", "librezale@librezale.org", 
101             "linux-eu@chanae.alphanet.ch", ), 
102      "fa": ("farsi@lists.sharif.edu", "Farsiweb.info", ), 
103      "fi": ("debian-l10n-finnish@lists.debian.org", 
104             "gnome-fi-laatu@lists.sourceforge.net", "laatu@lokalisointi.org", 
105             "lokalisointi-laatu@linux-aktivaattori.org", "laatu@gnome.fi", 
106             "yast-trans-fi@kotoistaminen.novell.fi", ), 
107      "fr": ("debian-l10n-french@lists.debian.org", "gnomefr@traduc.org", 
108             "kde-francophone@kde.org", "traduc@traduc.org", 
109             "pgsql-fr-generale@postgresql.org", "rpm-fr@livna.org", ), 
110      "ga": ("gaeilge-gnulinux@lists.sourceforge.net", 
111             "gaeilge-a@listserv.heanet.ie", ), 
112      "gl": ("trasno@ceu.fi.udc.es", "gnome@g11n.net", 
113             "gpul-traduccion@ceu.fi.udc.es", "proxecto@trasno.net", 
114             "trasno@gpul.org", ), 
115      "gu": ("indianoss-gujarati@lists.sourceforge.net", ), 
116      "he": ("debian-hebrew-common@lists.alioth.debian.org", 
117             "kde-il@yahoogroups.com", "fedora-he-list@redhat.com", 
118             "mdk-hebrew@iglu.org.il", ), 
119      "hi": ("indlinux-hindi-gnome@lists.sourceforge.net", 
120             "indlinux-hindi@lists.sourceforge.net", ), 
121      "hr": ("translator-shop.org", "lokalizacija@linux.hr", ), 
122      "hu": ("debian-l10n-hungarian@lists.debian.org", "gnome@fsf.hu", 
123             "gnome@gnome.hu", "magyar@lists.linux.hu", ), 
124      "id": ("@id.gnome.org", "@gnome.linux.or.id", "mdk-id@yahoogroups.com", 
125             "linux.or.id", "gnome@i15n.org"), 
126      "io": ("gnome-ido@lists.mterry.name", ), 
127      "is": ("gnome@techattack.nu", "kde-isl@mmedia.is", "kde-isl@molar.is", ), 
128      "it": ("debian-l10n-italian@lists.debian.org", "traduzioni@itpug.org", 
129             "fedora-trans-it@redhat.com", "tp@lists.linux.it", ), 
130      "ja": ("debian-doc@debian.or.jp", "debian-japanese@lists.debian.org", 
131             "gnome-translation@gnome.gr.jp", "translation@gnome.gr.jp", 
132             "jpug-doc@ml.postgresql.jp", ), 
133      "ka": ("geognome@googlegroups.com", 
134             "Ubuntu-Georgian-Translators@googlegroups.com", ), 
135      "kk": ("kk_KZ@googlegroups.com", ), 
136      "km": ("@khmeros.info", ), 
137      "kn": ("debian-l10n-kannada@lists.debian.org", ), 
138      "ko": ("gnome-kr-hackers@list.kldp.net", "gnome-kr-hackers@lists.kldp.net", 
139             "gnome-kr-translation@lists.kldp.net", "pgsql-kr@postgresql.or.kr", 
140             "hangul-hackers@lists.kldp.net", 
141             "debian-l10n-korean@lists.debian.org", 
142             "gnome-kr-translation@lists.sourceforge.net", ), 
143      "ks": ("ks-gnome-trans-commits@lists.code.indlinux.net", ), 
144      "ku": ("gnu-ku-wergerandin@lists.sourceforge.net", ), 
145      "ky": ("i18n-team-ky-kyrgyz@lists.sourceforge.net", "ky-li@mail.ru", ), 
146      "la": ("gnome-latin-list@gnome.org", ), 
147      "li": ("li@gnome.org", ), 
148      "lt": ("gimp-lt@lists.akl.lt", "gnome-lt@lists.akl.lt", 
149             "gnome-lt@lists.gnome.org", "komp_lt@konferencijos.lt", ), 
150      "lv": ("lata-l10n@googlegroups.com", "lata-i18n@groups.google.com", 
151             "locale@laka.lv", "ll10nt@os.lv", ), 
152      "mai": ("maithili.sf.net", ), 
153      "mg": ("i18n-malagasy-gnome@gnome.org", ), 
154      "mi": ("maori@nzlinux.org.nz", ), 
155      "mk": ("gnomk-main@lists.sourceforge.net", "lug@lists.linux.net.mk", 
156             "mkde-l10n@lists.sourceforge.net", 
157             "ossm-members@hedona.on.net.mk", ), 
158      "ml": ("smc-discuss@googlegroups.com", ), 
159      "mn": ("openmn-", "openmn.org", ), 
160      "ms": ("gabai-penyumbang@lists.sourceforge.net", 
161             "gabai-penyumbang@lists.sf.net", "kedidiemas@yahoogroups.com", ), 
162      "nb": ("i18n-nb@lister.ping.uio.no", ), 
163      "nds": ("nds-lowgerman@lists.sourceforge.net", ), 
164      "ne": ("info@mpp.org.np", ), 
165      "nl": ("debian-l10n-dutch@lists.debian.org", "vertaling@nl.gnome.org", 
166             "vertaling@vrijschrift.org", "nl@vrijschrift.org", 
167             "vertaling@nl.linux.org", "vertaling@nl.li.org", ), 
168      "nn": ("i18n-nn@lister.ping.uio.no", ), 
169      "nso": ("sepedi@translate.org.za", ), 
170      "or": ("oriya-group@lists.sarovar.org", "oriya-it@googlegroups.com", ), 
171      "pa": ("punjabi-l10n@users.sf.net", "fedora-pa-list@redhat.com", 
172             "punjabi-users@lists.sf.net", "punjabi-l10n@lists.sourceforge.net", 
173             "punlinux-i18n@lists.sourceforge.net", ), 
174      "pl": ("gnomepl@aviary.pl", "debian-l10n-polish@lists.debian.org", 
175             "gnome-l10n@lists.aviary.pl", "translators@gnomepl.org", ), 
176      "ps": ("pathanisation@googelgroups.com", ), 
177      "pt": ("fedora-trans-pt@redhat.org", "gnome_pt@yahoogroups.com", 
178             "traduz@debianpt.org", "traduz@debian.pt", ), 
179      "pt_BR": ("gnome-l10n-br@listas.cipsga.org.br", 
180                "gnome-pt_br-list@gnome.org", "fedora-docs-br@redhat.com", 
181                "fedora-trans-pt-br@redhat.com", "ldp-br@bazar.conectiva.com.br", 
182                "pgbr-dev@postgresql.org.br", 
183                "pgbr-dev@listas.postgresql.org.br", 
184                "debian-l10n-portuguese@lists.debian.org", ), 
185      "ro": ("fedora-ro@googlegroups.com", "gnomero-list@lists.sourceforge.net", 
186             "debian-l10n-romanian@lists.debian.org", ), 
187      "ru": ("pgsql-rus@yahoogroups.com", "debian-l10n-russian@lists.debian.org", 
188             "gnupg-ru@gnupg.org", ), 
189      "sk": ("sk-i18n@lists.linux.sk", "kde-sk@linux.sk", ), 
190      "sl": ("gnome-si@googlegroups.com", ), 
191      "sq": ("gnome-albanian-perkthyesit@lists.sourceforge.net", 
192             "debian-l10n-albanian@lists.debian.org", ), 
193      "sr": ("@prevod.org", "serbiangnome-lista@nongnu.org", ), 
194      "sv": ("debian-l10n-swedish@lists.debian.org", "tp-sv@listor.tp-sv.se", ), 
195      "ta": ("gnome-tamil-translation@googlegroups.com", 
196             "tamilinix@yahoogroups.com", "Ubuntu-l10n-tam@lists.ubuntu.com", 
197             "tamil-DI@yahoogroups.com", ), 
198      "te": ("localisation@swecha.org", 
199             "indlinux-telugu@lists.sourceforge.net", ), 
200      "th": ("l10n@opentle.org", "thai-l10n@googlegroup.com", 
201             "thailang@buraphalinux.org", "thai-l10n@googlegroups.com", 
202             "l10n.opentle.org", ), 
203      "tk": ("kakilikgroup@yahoo.com", ), 
204      "tl": ("debian-tl@banwa.upm.edu.ph", ), 
205      "tr": ("debian-l10n-turkish@lists.debian.org", "gnome-turk@gnome.org", 
206             "gnu-tr-u12a@lists.sourceforge.net", "turkce@pardus.org.tr", ), 
207      "tt": ("tatarish.l10n@gmail.com", ), 
208      "ug": ("gnome-uighur@yahoogroups.com", ), 
209      "uk": ("linux@linux.org.ua", ), 
210      "ur": ("l10n@urduweb.org", "urdu.scs.gift@gmail.com", ), 
211      "ve": ("venda@translate.org.za", ), 
212      "vi": ("gnomevi-list@lists.sourceforge.net", "vi-VN@googlegroups.com", ), 
213      "wa": ("linux-wa@", ), 
214      "xh": ("xh-translate@ubuntu.com", "xhosa@translate.org.za", 
215             "xhosa@ubuntu.com", ), 
216      "zh_CN": ("i18n-translation@lists.linux.net.cn", 
217                "i18n-zh@googlegroups.com", 
218                "translation-team-zh-cn@lists.sourceforge.net", 
219                "i18n-zh@googlegroup.com", ), 
220      "zh_TW": ("zh-l10n@lists.linux.org.tw", "chinese-l10n@googlegroups.com", 
221                "community@linuxhall.org", "zh-l10n@linux.org.tw", ), 
222      "zu": ("zulu@translate.org.za", ), 
223  } 
224  """Language codes with snippets of contact information that can be used to 
225  uniquely identify the language""" 
226   
227  LANG_TEAM_LANGUAGE_SNIPPETS = { 
228      "af": ("Afrikaans",), 
229      "am": ("Amharic",), 
230      "ang": ("Old English",), 
231      "ar": ("Arabic", ), 
232      "as": ("Assamese", ), 
233      "ast": ("Asturian", ), 
234      "az": ("Azerbaijani", u"Azərbaycan", ), 
235      "bg": ("Bulgarian", ), 
236      "be@latin": ("Belarusian Latin", ), 
237      "be": ("Belarusian", "Belorussian", ), 
238      "bn_IN": ("Bengali (India)", "Bengali INDIA", "Bengali India", ), 
239      "bn": ("Bangladeshi", "Bengali", ), 
240      "br": ("Breton", "Britton", ), 
241      "bs": ("Bosanski", "Bosnian", ), 
242      "byn": ("Blin", ), 
243      "ca": ("Catalan", ), 
244      "ckb": ("Kurdish (Sorani)", ), 
245      "crh": ("Crimean Tatar", "Crimean Turkish", ), 
246      "cs": ("Czech", ), 
247      "cy": ("Cymru", "Welsh", ), 
248      "da": ("Danish", "Dansk", ), 
249      "de": ("Deutsch", "German", ), 
250      "dz": ("Dzongkha", ), 
251      "el": ("Greek", ), 
252      "en_GB": ("British English", "en_GB", "English (Great Britain)", ), 
253      "eo": ("Esperanto", ), 
254      "es": ("Spanish", "es_ES", u"Español", ), 
255      "et": ("Eesti", "Estonian", ), 
256      "eu": ("Basque", "Euskara", ), 
257      "fa": ("Persian", ), 
258      "fi": ("Finnish", "Suomi", ), 
259      "fo": ("Faroese", ), 
260      "fr": ("French", u"Français", ), 
261      "fur": ("Friulian", ), 
262      "ga": ("Irish", ), 
263      "gez": ("Geez", ), 
264      "gl": ("Galego", "Galician", "Gallegan", "gl_ES", ), 
265      "gu": ("Gujarati", ), 
266      "haw": ("Hawaiian", ), 
267      "he": ("Hebrew", ), 
268      "hi": ("Hindi", ), 
269      "hr": ("Croatian", ), 
270      "hu": ("Hungarian", ), 
271      "hy": ("Armenian", ), 
272      "ia": ("Interlingua", ), 
273      "id": ("Bahasa Indonesia", "Indonesia", "Indonesian", ), 
274      "ig": ("Igbo", ), 
275      "is": ("Icelandic", ), 
276      "it": ("Italian", ), 
277      "ja": ("Japanese", ), 
278      "ka": ("Georgian", ), 
279      "kk": ("Kazakh", ), 
280      "km": ("Khmer", ), 
281      "kn": ("Kannada", ), 
282      "ko": ("Korean", "Hangul", ), 
283      "kok": ("Konkani", ), 
284      "ks": ("Kashmiri", ), 
285      "ku": ("Kurdish", ), 
286      "ky": ("Kitghiz", "Kirghiz", ), 
287      "lg": ("Luganda", ), 
288      "li": ("Limburgish", ), 
289      "lt": ("Lithuanian", ), 
290      "lv": ("Latvian", "lv_LV", "Valoda", u"Latviešu", ), 
291      "mal": ("Malayalam", ), 
292      "mg": ("Malagasy", ), 
293      "mi": ("Maori", ), 
294      "mk": ("Macedonian", ), 
295      "ml": ("Malayalam", ), 
296      "mn": ("Mongolian", ), 
297      "mt": ("Marathi", ), 
298      "ms": ("Malay", "Bahasa Melayu", ), 
299      "my": ("Burmese", ), 
300      "nb": ("Norwegian Bokmaal", u"Norsk bokmål", u"Norwegian Bokmål", 
301             u"Norwegian bokmål", ), 
302      "nds": ("Low Saxon", ), 
303      "nl": ("Dutch", "Nederlands", ), 
304      "nn": ("Norwegian nynorsk", "Nynorsk", ), 
305      "oc": ("Occitan", ), 
306      "or": ("Oriya", ), 
307      "pa": ("Punjabi", "Panjabi", ), 
308      "pl": ("Polish", ), 
309      "ps": ("Pashto", "Pushto", ), 
310      "pt_BR": ("Brazilian Portuguese", u"Português/Brasil", 
311                u"Português do Brasil", ), 
312      "pt": ("Portuguese", ), 
313      "rm": ("Rhaeto-Romance", ), 
314      "ro": ("Romania", "Romanian", u"Română", ), 
315      "ru": ("Russian", ), 
316      "si": ("Sinhala", "Sinhalese", ), 
317      "sk": ("Slovak", ), 
318      "sl": ("Slovene", "Slovenian", ), 
319      "so": ("Somali", ), 
320      "sq": ("Albanian", ), 
321      "sr": ("Serbian", ), 
322      "sv": ("Swedish", ), 
323      "sw": ("Swahili", ), 
324      "ta": ("Tamil", ), 
325      "te": ("Telugu", ), 
326      "tet": ("Tetum", ), 
327      "tg": ("Tajik", ), 
328      "th": ("Thai", ), 
329      "ti": ("Tigrinya", ), 
330      "tig": ("Tigre", ), 
331      "tl": ("Tagalog", ), 
332      "tr": ("Turkish", u"Türkçe", u"Türkiye", ), 
333      "tt": ("Tatarish", ), 
334      "ug": ("Uighur", ), 
335      "uk": ("Ukrainian", ), 
336      "ur": ("Urdu", ), 
337      "uz": ("Uzbek", ), 
338      "ve": ("Venda", u"Tshivenḓa", "Tshivenda", ), 
339      "vi": ("Vietnamese", ), 
340      "wa": ("Walloon", ), 
341      "wal": ("Walamo", ), 
342      "wo": ("Wolof", ), 
343      "xh": ("Xhosa", "IsiXhosa", "isiXhosa", ), 
344      "yi": ("Yiddish", ), 
345      "yo": ("Yoruba", ), 
346      "zh_CN": ("Chinese Simplified", "Chinese/Simplified", 
347                "Chinese (simplified)", "Simplified Chinese", ), 
348      "zh_HK": ("Chinese (Hong Kong)", ), 
349      "zh_TW": ("Chinese (traditional)", "Chinese/Traditional", 
350                "Traditional Chinese", ), 
351  } 
352  """Language codes with snippets of language names, including English, native 
353  spelling and varients, that can be used to uniquely identify the language""" 
354 355 356 -def _regex_guesser(prefilter, regex, string, postfilter=None):
357 """Use regular expressions to extract the language team 358 359 @param prefilter: simple filter to apply before attempting the regex 360 @param regex: regular expression with one group that will contain 361 the language code 362 @param string: the language team string that should be examined 363 @param postfilter: filter to apply to reject any potential matches 364 after they have been retreived by the regex 365 @return: ISO language code for the found language 366 """ 367 # TODO instead of a posfilter, have a dictionary of transform rules 368 # e.g. for debian-l10n-albanian a dict of {'russian': 'ru' would allow 369 # transformation. {'default': None} would ensure that anything we 370 # don't understand gets ignored. Or {'default': 'nothing'} means to 371 # nothing. 372 if prefilter in string: 373 found = re.search(regex, string) 374 if found: 375 regex_lang = found.groups()[0] 376 else: 377 return None 378 if postfilter is not None and regex_lang in postfilter: 379 return None 380 if regex_lang and regex_lang != 'en': 381 return regex_lang 382 return None
383
384 385 -def _nofilter(text):
386 """Return the supplied text unchanged""" 387 return text
388
389 390 -def _lower(text):
391 """Convert the supplied text to lowercase""" 392 return text.lower()
393
394 395 -def _snippet_guesser(snippets_dict, string, filter_=_nofilter):
396 """Guess the language based on a snippet of text in the language team 397 string. 398 399 @param snippets_dict: A dict of snippets that can be used to identify a 400 language in the format {'lang': ('snippet1', 'snippet2'), 'lang2'...} 401 @param string: The language string to be analysed 402 @param filter_: a function to be applied to the string and snippets 403 before examination 404 """ 405 string = filter_(string) 406 for possible_lang, snippets in snippets_dict.iteritems(): 407 for snippet in snippets: 408 if filter_(snippet) in string: 409 return possible_lang 410 return None
411
412 413 @accepts(unicode) 414 @returns(IsOneOf(String, type(None))) 415 -def guess_language(team_string):
416 """Gueses the language of a PO file based on the Language-Team entry""" 417 418 for prefilter, regex, postfilter in LANG_TEAM_REGEX: 419 lang = _regex_guesser(prefilter, regex, team_string, postfilter) 420 if lang: 421 break 422 423 if not lang: 424 lang = _snippet_guesser(LANG_TEAM_CONTACT_SNIPPETS, team_string, 425 _lower) 426 427 if not lang: 428 lang = _snippet_guesser(LANG_TEAM_LANGUAGE_SNIPPETS, team_string) 429 430 # TODO Maybe clean everything and see of we have a language code only 431 432 if not lang: 433 #print (u"MISSED: '%s'" % team_string).encode('utf-8') 434 return None 435 return lang
436 437 if __name__ == "__main__": 438 from sys import argv 439 from translate.storage import factory 440 for fname in argv[1:]: 441 store = factory.getobject(fname) 442 print fname, guess_language(store.parseheader().get('Language-Team', u"")) 443