Created
December 15, 2016 14:44
-
-
Save rhizoome/65fd734f84c4dcf13a6c324fcd4d3e28 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def match_licenses(self): | |
"""Match license text against loaded licenses.""" | |
margin = min(20, len(self._known_licenses) / 20) | |
found = 0 | |
test_ratio = 103 | |
while found < margin and test_ratio > 0: | |
to_check = [] | |
test_ratio -= 3 | |
for digest in self._known_licenses.keys(): | |
license = self._known_licenses[digest] | |
ratio = set_ratio( | |
self._cache, | |
(self.license_hash_lower, digest), | |
self.license_text_lower, | |
license[0].text.lower() | |
) | |
if ratio > test_ratio: | |
to_check.append(digest) | |
found = len(to_check) | |
lg.debug( | |
"Prefiltered %d of %d licenses", | |
len(to_check), | |
len(self._known_licenses) | |
) | |
for digest in to_check: | |
license = self._known_licenses[digest] | |
ratio = partial_ratio( | |
self._cache, | |
(self.license_hash_lower, digest), | |
clean_license.sub('', self.license_text_lower), | |
clean_license.sub('', license[0].text.lower()) | |
) | |
lg.debug( | |
"Check %s for match against %s (%02d)", | |
self.name, | |
license[0].name, | |
ratio | |
) | |
if ratio > 95: | |
for license_copy in license: | |
name = license_copy.name | |
if name in _match_aliases: | |
self.licenses.add(_match_aliases[name]) | |
else: | |
self.licenses.add(name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment