NewscatcherAPI · July 26, 2023 01:13
diff --git a/company_acquisition_func.py b/company_acquisition_func.py
 def is_company_acquisition(headline_doc):
    # check if the acquisition lemma (why lemma?)
    for token in headline_doc:
        if 'acquire' not in [token.lemma_ for token in headline_doc]:
            return False
        # check that at least 2 ORG entities
        elif len([ent.label_ for ent in headline_doc.ents if ent.label_ == 'ORG']) < 2:
            return False
        return True
diff --git a/detect_acquisition.py b/detect_acquisition.py
 def detect_acquisition(headline_doc):
    if is_company_acquisition(headline_doc):
        if all(t is not None for t in [find_acquired(headline_doc), find_acquirer(headline_doc)]):
            print(str(headline_doc) + " --> " + str(find_acquirer(headline_doc)) + " acquires " + str(find_acquired(headline_doc)))
    else:
        print(str(headline_doc) + " --> " + "no acquisition detected")
diff --git a/displacy_example.py b/displacy_example.py
 doc = nlp("PayPal decided to acquire Paidy Inc. and Microsoft")
 spacy.displacy.serve(doc, style= "dep")
diff --git a/find_acquired.py b/find_acquired.py
 def find_acquired(headline_doc):
    acquired_list = []
    for token in headline_doc:
        if (token.ent_type_ == 'ORG') and (token.head.lemma_ == 'acquire') and (token.dep_ in ('attr', 'dobj')):
            for noun_chunk in doc.noun_chunks:
                if token in noun_chunk:
                    acquired_list.append(noun_chunk) if noun_chunk not in acquired_list else acquired_list

                elif len(list(token.rights)) > 0:
                    for tright in list(token.rights):
                        if (tright.ent_type_ == 'ORG') and (tright.dep_ in ('conj')):
                            for noun_chunk in doc.noun_chunks:
                                if tright in noun_chunk:
                                    acquired_list.append(noun_chunk) if noun_chunk not in acquired_list else acquired_list
    return acquired_list
diff --git a/find_acquirer.py b/find_acquirer.py
 def find_acquirer(headline_doc):
    acquirer_list = []
    for token in headline_doc:
        if (token.ent_type_ == 'ORG') and (token.dep_ in ('nsubj', 'ROOT')):
            for noun_chunk in doc.noun_chunks:
                if token in noun_chunk:
                    acquirer_list.append(noun_chunk) if noun_chunk not in acquirer_list else acquirer_list
                elif len(list(token.rights)) > 0:
                    for tright in list(token.rights):
                        if (tright.ent_type_ == 'ORG') and (tright.dep_ in ('conj')):
                            for noun_chunk in doc.noun_chunks:
                                if tright in noun_chunk:
                                    acquirer_list.append(noun_chunk) if noun_chunk not in acquirer_list else acquirer_list
    if acquirer_list != []:
        return acquirer_list
diff --git a/full_script.py b/full_script.py
 import spacy
 nlp = spacy.load('en_core_web_lg')


 # 2. preset headlines
 headlines = ["PayPal to acquire Paidy Inc.",
             "Pump solutions and water technology company Grundfos has entered into an agreement to acquire Mechanical Equipment Company (MECO) for an undisclosed sum.",
             "Cavaliers acquire Lauri Markkanen",
             "Mastercard to acquire CipherTrace",
             "TransUnion to acquire Neustar and Newstar",
             "Microsoft enters into agreement to acquire Myhotelshop to help hotels optimize guest acquisition",
             "CDK to help car buyers acquire insurance",
             "Tyber Medical reaches agreement to acquire CatapultMD",
             "Rebels acquire forward Stevenson from Blades",
             "Google to acquire 1.3M-SF Manhattan office for $2.1B",
             "PayPal decided to acquire Paidy Inc. and Microsoft",
             ]

 docs = [nlp(headline) for headline in headlines]

 for doc in docs:
    print([(ent.text, ent.label_) for ent in doc.ents])

 def is_company_acquisition(headline_doc):
    # check if the acquisition lemma (why lemma?)
    for token in headline_doc:
        if 'acquire' not in [token.lemma_ for token in headline_doc]:
            return False
        # check that at least 2 ORG entities
        elif len([ent.label_ for ent in headline_doc.ents if ent.label_ == 'ORG']) < 2:
            return False
        return True



 # 4. find the dependency
 # 4.1 one of a company should be in a dependancy
 def find_acquired(headline_doc):
    acquired_list = []
    for token in headline_doc:
        if (token.ent_type_ == 'ORG') and (token.head.lemma_ == 'acquire') and (token.dep_ in ('attr', 'dobj')):
            for noun_chunk in doc.noun_chunks:
                if token in noun_chunk:
                    acquired_list.append(noun_chunk) if noun_chunk not in acquired_list else acquired_list

                elif len(list(token.rights)) > 0:
                    for tright in list(token.rights):
                        if (tright.ent_type_ == 'ORG') and (tright.dep_ in ('conj')):
                            for noun_chunk in doc.noun_chunks:
                                if tright in noun_chunk:
                                    acquired_list.append(noun_chunk) if noun_chunk not in acquired_list else acquired_list
    if acquired_list != []:
        return acquired_list

 # 5. find the acquirer
 def find_acquirer(headline_doc):
    acquirer_list = []
    for token in headline_doc:
        if (token.ent_type_ == 'ORG') and (token.dep_ in ('nsubj', 'ROOT')):
            for noun_chunk in doc.noun_chunks:
                if token in noun_chunk:
                    acquirer_list.append(noun_chunk) if noun_chunk not in acquirer_list else acquirer_list
                elif len(list(token.rights)) > 0:
                    for tright in list(token.rights):
                        if (tright.ent_type_ == 'ORG') and (tright.dep_ in ('conj')):
                            for noun_chunk in doc.noun_chunks:
                                if tright in noun_chunk:
                                    acquirer_list.append(noun_chunk) if noun_chunk not in acquirer_list else acquirer_list
    if acquirer_list != []:
        return acquirer_list


 # 6. combine to find who acquiring
 def detect_acquisition(headline_doc):
    if is_company_acquisition(headline_doc):
        if all(t is not None for t in [find_acquired(headline_doc), find_acquirer(headline_doc)]):
            print(str(headline_doc) + " --> " + str(find_acquirer(headline_doc)) + " acquires " + str(find_acquired(headline_doc)))
    else:
        print(str(headline_doc) + " --> " + "no acquisition detected")

 # showcase
 for headline in headlines:
    doc = nlp(headline)
    detect_acquisition(doc)
diff --git a/load_testing_headlines.py b/load_testing_headlines.py
 headlines = ["PayPal to acquire Paidy Inc.",
             "Pump solutions and water technology company Grundfos has entered into an agreement to acquire Mechanical Equipment Company (MECO) for an undisclosed sum.",
             "Cavaliers acquire Lauri Markkanen",
             "Mastercard to acquire CipherTrace",
             "TransUnion to acquire Neustar and Newstar",
             "Microsoft enters into agreement to acquire Myhotelshop to help hotels optimize guest acquisition",
             "CDK to help car buyers acquire insurance",
             "Tyber Medical reaches agreement to acquire CatapultMD",
             "Rebels acquire forward Stevenson from Blades",
             "Google to acquire 1.3M-SF Manhattan office for $2.1B",
             "PayPal decided to acquire Paidy Inc. and Microsoft",
             ]

 docs = [nlp(headline) for headline in headlines]

 for doc in docs:
    print([(ent.text, ent.label_) for ent in doc.ents])
diff --git a/newscatcher_sdk_test.py b/newscatcher_sdk_test.py
 from newscatcherapi import NewsCatcherApiClient
 newscatcherapi = NewsCatcherApiClient(x_api_key='YOUR_API_KEY')

 acquisition_articles = newscatcherapi.get_search(q='acquire',
                                         search_in = 'title',
                                         lang='en',
                                         from_='24 hours ago',
                                         sources='prnewswire.com, businesswire.com',
                                         page_size=100,
                                         page=1)


 for article in acquisition_articles['articles']:
    print(article['title'])
diff --git a/package_load.py b/package_load.py
 import spacy
 import time
 nlp = spacy.load('en_core_web_lg')
diff --git a/print_results.py b/print_results.py
 for headline in headlines:
    doc = nlp(headline)
    detect_acquisition(doc)
    time.sleep(0.5)
	def is_company_acquisition(headline_doc):
	# check if the acquisition lemma (why lemma?)
	for token in headline_doc:
	if 'acquire' not in [token.lemma_ for token in headline_doc]:
	return False
	# check that at least 2 ORG entities
	elif len([ent.label_ for ent in headline_doc.ents if ent.label_ == 'ORG']) < 2:
	return False
	return True
	def detect_acquisition(headline_doc):
	if is_company_acquisition(headline_doc):
	if all(t is not None for t in [find_acquired(headline_doc), find_acquirer(headline_doc)]):
	print(str(headline_doc) + " --> " + str(find_acquirer(headline_doc)) + " acquires " + str(find_acquired(headline_doc)))
	else:
	print(str(headline_doc) + " --> " + "no acquisition detected")
	doc = nlp("PayPal decided to acquire Paidy Inc. and Microsoft")
	spacy.displacy.serve(doc, style= "dep")
	def find_acquired(headline_doc):
	acquired_list = []
	for token in headline_doc:
	if (token.ent_type_ == 'ORG') and (token.head.lemma_ == 'acquire') and (token.dep_ in ('attr', 'dobj')):
	for noun_chunk in doc.noun_chunks:
	if token in noun_chunk:
	acquired_list.append(noun_chunk) if noun_chunk not in acquired_list else acquired_list

	elif len(list(token.rights)) > 0:
	for tright in list(token.rights):
	if (tright.ent_type_ == 'ORG') and (tright.dep_ in ('conj')):
	for noun_chunk in doc.noun_chunks:
	if tright in noun_chunk:
	acquired_list.append(noun_chunk) if noun_chunk not in acquired_list else acquired_list
	return acquired_list
	def find_acquirer(headline_doc):
	acquirer_list = []
	for token in headline_doc:
	if (token.ent_type_ == 'ORG') and (token.dep_ in ('nsubj', 'ROOT')):
	for noun_chunk in doc.noun_chunks:
	if token in noun_chunk:
	acquirer_list.append(noun_chunk) if noun_chunk not in acquirer_list else acquirer_list
	elif len(list(token.rights)) > 0:
	for tright in list(token.rights):
	if (tright.ent_type_ == 'ORG') and (tright.dep_ in ('conj')):
	for noun_chunk in doc.noun_chunks:
	if tright in noun_chunk:
	acquirer_list.append(noun_chunk) if noun_chunk not in acquirer_list else acquirer_list
	if acquirer_list != []:
	return acquirer_list
	import spacy
	nlp = spacy.load('en_core_web_lg')


	# 2. preset headlines
	headlines = ["PayPal to acquire Paidy Inc.",
	"Pump solutions and water technology company Grundfos has entered into an agreement to acquire Mechanical Equipment Company (MECO) for an undisclosed sum.",
	"Cavaliers acquire Lauri Markkanen",
	"Mastercard to acquire CipherTrace",
	"TransUnion to acquire Neustar and Newstar",
	"Microsoft enters into agreement to acquire Myhotelshop to help hotels optimize guest acquisition",
	"CDK to help car buyers acquire insurance",
	"Tyber Medical reaches agreement to acquire CatapultMD",
	"Rebels acquire forward Stevenson from Blades",
	"Google to acquire 1.3M-SF Manhattan office for $2.1B",
	"PayPal decided to acquire Paidy Inc. and Microsoft",
	]

	docs = [nlp(headline) for headline in headlines]

	for doc in docs:
	print([(ent.text, ent.label_) for ent in doc.ents])

	def is_company_acquisition(headline_doc):
	# check if the acquisition lemma (why lemma?)
	for token in headline_doc:
	if 'acquire' not in [token.lemma_ for token in headline_doc]:
	return False
	# check that at least 2 ORG entities
	elif len([ent.label_ for ent in headline_doc.ents if ent.label_ == 'ORG']) < 2:
	return False
	return True



	# 4. find the dependency
	# 4.1 one of a company should be in a dependancy
	def find_acquired(headline_doc):
	acquired_list = []
	for token in headline_doc:
	if (token.ent_type_ == 'ORG') and (token.head.lemma_ == 'acquire') and (token.dep_ in ('attr', 'dobj')):
	for noun_chunk in doc.noun_chunks:
	if token in noun_chunk:
	acquired_list.append(noun_chunk) if noun_chunk not in acquired_list else acquired_list

	elif len(list(token.rights)) > 0:
	for tright in list(token.rights):
	if (tright.ent_type_ == 'ORG') and (tright.dep_ in ('conj')):
	for noun_chunk in doc.noun_chunks:
	if tright in noun_chunk:
	acquired_list.append(noun_chunk) if noun_chunk not in acquired_list else acquired_list
	if acquired_list != []:
	return acquired_list

	# 5. find the acquirer
	def find_acquirer(headline_doc):
	acquirer_list = []
	for token in headline_doc:
	if (token.ent_type_ == 'ORG') and (token.dep_ in ('nsubj', 'ROOT')):
	for noun_chunk in doc.noun_chunks:
	if token in noun_chunk:
	acquirer_list.append(noun_chunk) if noun_chunk not in acquirer_list else acquirer_list
	elif len(list(token.rights)) > 0:
	for tright in list(token.rights):
	if (tright.ent_type_ == 'ORG') and (tright.dep_ in ('conj')):
	for noun_chunk in doc.noun_chunks:
	if tright in noun_chunk:
	acquirer_list.append(noun_chunk) if noun_chunk not in acquirer_list else acquirer_list
	if acquirer_list != []:
	return acquirer_list


	# 6. combine to find who acquiring
	def detect_acquisition(headline_doc):
	if is_company_acquisition(headline_doc):
	if all(t is not None for t in [find_acquired(headline_doc), find_acquirer(headline_doc)]):
	print(str(headline_doc) + " --> " + str(find_acquirer(headline_doc)) + " acquires " + str(find_acquired(headline_doc)))
	else:
	print(str(headline_doc) + " --> " + "no acquisition detected")

	# showcase
	for headline in headlines:
	doc = nlp(headline)
	detect_acquisition(doc)
	from newscatcherapi import NewsCatcherApiClient
	newscatcherapi = NewsCatcherApiClient(x_api_key='YOUR_API_KEY')

	acquisition_articles = newscatcherapi.get_search(q='acquire',
	search_in = 'title',
	lang='en',
	from_='24 hours ago',
	sources='prnewswire.com, businesswire.com',
	page_size=100,
	page=1)


	for article in acquisition_articles['articles']:
	print(article['title'])