Skip to content

Instantly share code, notes, and snippets.

@rhizoome
Created December 15, 2016 14:44
Show Gist options
  • Save rhizoome/65fd734f84c4dcf13a6c324fcd4d3e28 to your computer and use it in GitHub Desktop.
Save rhizoome/65fd734f84c4dcf13a6c324fcd4d3e28 to your computer and use it in GitHub Desktop.
def match_licenses(self):
"""Match license text against loaded licenses."""
margin = min(20, len(self._known_licenses) / 20)
found = 0
test_ratio = 103
while found < margin and test_ratio > 0:
to_check = []
test_ratio -= 3
for digest in self._known_licenses.keys():
license = self._known_licenses[digest]
ratio = set_ratio(
self._cache,
(self.license_hash_lower, digest),
self.license_text_lower,
license[0].text.lower()
)
if ratio > test_ratio:
to_check.append(digest)
found = len(to_check)
lg.debug(
"Prefiltered %d of %d licenses",
len(to_check),
len(self._known_licenses)
)
for digest in to_check:
license = self._known_licenses[digest]
ratio = partial_ratio(
self._cache,
(self.license_hash_lower, digest),
clean_license.sub('', self.license_text_lower),
clean_license.sub('', license[0].text.lower())
)
lg.debug(
"Check %s for match against %s (%02d)",
self.name,
license[0].name,
ratio
)
if ratio > 95:
for license_copy in license:
name = license_copy.name
if name in _match_aliases:
self.licenses.add(_match_aliases[name])
else:
self.licenses.add(name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment