Last active
June 12, 2019 11:22
-
-
Save flodolo/5051d8063c00b4d5d11ae373dbe7a8d1 to your computer and use it in GitHub Desktop.
Check for markup
#!/usr/bin/env python3 | |
import argparse | |
import codecs | |
import json | |
import logging | |
import os | |
import re | |
import six | |
import sys | |
logging.basicConfig() | |
try: | |
from compare_locales import parser | |
except ImportError as e: | |
print('FATAL: make sure that dependencies are installed') | |
print(e) | |
sys.exit(1) | |
class StringExtraction(): | |
def __init__(self): | |
'''Initialize object.''' | |
# Set defaults | |
self.supported_formats = [ | |
'.dtd', | |
] | |
def setLocale(self, locale): | |
'''Set current locale.''' | |
self.reference_locale = True if locale == 'en-US' else False | |
self.locale = locale | |
def setRepositoryPath(self, path): | |
'''Set path to repository.''' | |
# Strip trailing '/' from repository path | |
self.repository_path = path.rstrip(os.path.sep) | |
def extractFileList(self): | |
'''Extract the list of supported files.''' | |
self.file_list = [] | |
for root, dirs, files in os.walk( | |
self.repository_path, followlinks=True): | |
for file in files: | |
for supported_format in self.supported_formats: | |
if file.endswith(supported_format): | |
self.file_list.append(os.path.join(root, file)) | |
self.file_list.sort() | |
def getRelativePath(self, file_name): | |
''' | |
Get the relative path of a filename, prepend prefix_storage if | |
defined. | |
''' | |
return file_name[len(self.repository_path) + 1:] | |
def extractStrings(self): | |
'''Extract strings from all files.''' | |
# Create a list of files to analyze | |
self.extractFileList() | |
self.translations = {} | |
for file_name in self.file_list: | |
file_extension = os.path.splitext(file_name)[1] | |
file_parser = parser.getParser(file_extension) | |
file_parser.readFile(file_name) | |
try: | |
entities = file_parser.parse() | |
for entity in entities: | |
# Ignore Junk | |
if isinstance(entity, parser.Junk): | |
#print('JUNK in {}\nFile: {}\nJunk: {}'.format(self.locale, file_name, entity)) | |
continue | |
string_id = u'{0}:{1}'.format( | |
self.getRelativePath(file_name), six.text_type(entity)) | |
self.translations[string_id] = entity.raw_val | |
# Store reference strings | |
if self.reference_locale: | |
self.getReferenceStringsMinor() | |
except Exception as e: | |
print('Error parsing file: {0}'.format(file_name)) | |
print(e) | |
def getReferenceStringsMinor(self): | |
'''Get a list of string IDs with <''' | |
self.minor_strings = [] | |
self.reference_ids = self.translations.keys() | |
for string_id, translation in self.translations.items(): | |
if '<' in translation: | |
self.minor_strings.append(string_id) | |
def checkIssues(self): | |
'''Extract strings from all files.''' | |
issues = [] | |
for string_id, translation in self.translations.items(): | |
if '<' not in translation: | |
continue | |
if string_id not in self.reference_ids: | |
# Obsolete string | |
continue | |
if string_id not in self.minor_strings: | |
issues.append('{}: {}'.format(string_id, translation)) | |
if issues: | |
print('\n\nLocale: {}'.format(self.locale)) | |
print('\n'.join(issues)) | |
def main(): | |
repos_path = '/Users/flodolo/mozilla/mercurial/l10n_clones/locales' | |
locales = [x for x in os.listdir(repos_path) if not x.startswith('.')] | |
ignored_locales = [] | |
locales = list(set(locales) - set(ignored_locales)) | |
locales.sort() | |
# Initialize class | |
extracted_strings = StringExtraction() | |
# Extract strings for en-US, and keep them stored for comparison later | |
print('Extracting reference en-US strings') | |
gecko_string_path = '/Users/flodolo/mozilla/mercurial/gecko-strings-quarantine' | |
extracted_strings.setLocale('en-US') | |
extracted_strings.setRepositoryPath(gecko_string_path) | |
extracted_strings.extractStrings() | |
# Check other locales | |
print('Checking other locales ({})'.format(len(locales))) | |
for locale in locales: | |
locale_path = os.path.join(repos_path, locale) | |
extracted_strings.setRepositoryPath(locale_path) | |
extracted_strings.setLocale(locale) | |
extracted_strings.extractStrings() | |
extracted_strings.checkIssues() | |
if __name__ == '__main__': | |
main() |
#!/usr/bin/env bash | |
cd /Users/flodolo/mozilla/mercurial/l10n_clones/ | |
source venv/bin/activate | |
compare-locales --version | |
python check_dtd_bug1539759.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment