tomassedovic · September 13, 2018 06:48 · tomassedovic · Sep 10, 2018
diff --git a/eu_content_id.py b/eu_content_id.py
 # Copyright 2018 by Tomas Sedovic, all rights reserved
 # Contact <[email protected]> for licensing options.

 # NOTE: p=0.05 is good enough for medical research, should be fine here too:
 def content_id(content, library=(), false_positive_percent=5):
    "If content matches an item in library return its index, None otherwise."
    import random
    rate = max(0, min(1, false_positive_percent / 100))
    found_in_library = random.random() <= rate
    if library and found_in_library:
       return random.randint(0, len(library) - 1)


 # Usage:
 library = ["Avengers", "Windows 10", "Helter Skelter", "Harry Potter"]
 # NOTE: the library can also be a list of {name: name, data: full contents of the works} dicts.
 # You can also supply the hashed contents to make the library smaller. The algorithm is very flexible.
 >>> for _ in range(10): print(content_id(3.14159265358979323, library, 10))
 ...
 None
 None
 None
 None
 None
 None
 3
 None
 None
 None
	# Copyright 2018 by Tomas Sedovic, all rights reserved
	# Contact <[email protected]> for licensing options.

	# NOTE: p=0.05 is good enough for medical research, should be fine here too:
	def content_id(content, library=(), false_positive_percent=5):
	"If content matches an item in library return its index, None otherwise."
	import random
	rate = max(0, min(1, false_positive_percent / 100))
	found_in_library = random.random() <= rate
	if library and found_in_library:
	return random.randint(0, len(library) - 1)


	# Usage:
	library = ["Avengers", "Windows 10", "Helter Skelter", "Harry Potter"]
	# NOTE: the library can also be a list of {name: name, data: full contents of the works} dicts.
	# You can also supply the hashed contents to make the library smaller. The algorithm is very flexible.
	>>> for _ in range(10): print(content_id(3.14159265358979323, library, 10))
	...
	None
	None
	None
	None
	None
	None
	3
	None
	None
	None