Skip to content

Instantly share code, notes, and snippets.

@Norod
Last active March 28, 2023 08:34
Show Gist options
  • Save Norod/c5e6174e6e9c1274eaac92b6251fbb10 to your computer and use it in GitHub Desktop.
Save Norod/c5e6174e6e9c1274eaac92b6251fbb10 to your computer and use it in GitHub Desktop.
Input hebrew text with diacritics (Nikud / Niqqud) and output the same text without it
#pip install hebrew
import sys
from hebrew import Hebrew
def open_input_file_name(file_name):
try:
file = open(file_name, "r")
return file
except FileNotFoundError:
print("Error: Input file does not exist")
sys.exit(1)
def open_output_file_name(file_name):
try:
file = open(file_name, "w")
return file
except:
print("Unexpected error:", sys.exc_info()[0])
sys.exit(1)
def remove_nikud(input_file, output_file):
lines_processed = 0
for line in input_file:
no_nikud = Hebrew(line).no_niqqud().string
output_file.write(no_nikud)
lines_processed += 1
return lines_processed
# Main
def main():
print("Remove Nikud")
print("============")
#First argument is the file name
if len(sys.argv) < 2:
print("Error: No input file name")
sys.exit(1)
input_file_name = sys.argv[1]
input_file = open_input_file_name(input_file_name)
#Second argument is the output file name
if len(sys.argv) < 3:
print("Error: No output file name")
input_file.close()
sys.exit(1)
output_file_name = sys.argv[2]
output_file = open_output_file_name(output_file_name)
lines_processed = remove_nikud(input_file, output_file)
print("Lines processed: " + str(lines_processed))
input_file.close()
output_file.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment