pennz · July 24, 2020 01:03
diff --git a/kg_get_entities.py b/kg_get_entities.py
 def get_entities(sent):
  """get_entities is the fork of https://gist.github.com/prateekjoshi565/241aee30cc95aaf54a3533c2ec0f0b40#file-kg_extract_entities-py, added some comments"""
  ## chunk 1
  ent1 = ""
  ent2 = ""

  prv_tok_dep = ""    # dependency tag of previous token in the sentence
  prv_tok_text = ""   # previous token in the sentence

  prefix = ""
  modifier = ""

  #############################################################
  
  for tok in nlp(sent):
    ## chunk 2: find subject or object in the sentence
    # if token is a punctuation mark then move on to the next token
    if tok.dep_ != "punct":
      # check: token is a compound word or not
      if tok.dep_ == "compound":
        prefix = tok.text
        # if the previous word was also a 'compound' then add the current word to it
        if prv_tok_dep == "compound":
          prefix = prv_tok_text + " "+ tok.text
      
      # check: token is a modifier or not
      if tok.dep_.endswith("mod") == True:
        modifier = tok.text
        # if the previous word was also a 'compound' then add the current word to it
        if prv_tok_dep == "compound":
          modifier = prv_tok_text + " "+ tok.text
      
      ## chunk 3: clean up tmp variables
      if tok.dep_.find("subj") == True:
        ent1 = modifier +" "+ prefix + " "+ tok.text
        prefix = ""
        modifier = ""
        prv_tok_dep = ""
        prv_tok_text = ""      

      ## chunk 4: get object
      if tok.dep_.find("obj") == True:
        ent2 = modifier +" "+ prefix +" "+ tok.text
        
      ## chunk 5  
      # update variables
      prv_tok_dep = tok.dep_
      prv_tok_text = tok.text
  #############################################################

  return [ent1.strip(), ent2.strip()]
	def get_entities(sent):
	"""get_entities is the fork of https://gist.github.com/prateekjoshi565/241aee30cc95aaf54a3533c2ec0f0b40#file-kg_extract_entities-py, added some comments"""
	## chunk 1
	ent1 = ""
	ent2 = ""

	prv_tok_dep = "" # dependency tag of previous token in the sentence
	prv_tok_text = "" # previous token in the sentence

	prefix = ""
	modifier = ""

	#############################################################

	for tok in nlp(sent):
	## chunk 2: find subject or object in the sentence
	# if token is a punctuation mark then move on to the next token
	if tok.dep_ != "punct":
	# check: token is a compound word or not
	if tok.dep_ == "compound":
	prefix = tok.text
	# if the previous word was also a 'compound' then add the current word to it
	if prv_tok_dep == "compound":
	prefix = prv_tok_text + " "+ tok.text

	# check: token is a modifier or not
	if tok.dep_.endswith("mod") == True:
	modifier = tok.text
	# if the previous word was also a 'compound' then add the current word to it
	if prv_tok_dep == "compound":
	modifier = prv_tok_text + " "+ tok.text

	## chunk 3: clean up tmp variables
	if tok.dep_.find("subj") == True:
	ent1 = modifier +" "+ prefix + " "+ tok.text
	prefix = ""
	modifier = ""
	prv_tok_dep = ""
	prv_tok_text = ""

	## chunk 4: get object
	if tok.dep_.find("obj") == True:
	ent2 = modifier +" "+ prefix +" "+ tok.text

	## chunk 5
	# update variables
	prv_tok_dep = tok.dep_
	prv_tok_text = tok.text
	#############################################################

	return [ent1.strip(), ent2.strip()]
No results found