Last active
August 9, 2020 18:23
-
-
Save parajain/8ea763737aa99719e8d32df1a6cc2d16 to your computer and use it in GitHub Desktop.
Chunking example using FlairNLP https://github.com/flairNLP/flair/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flair.data import Sentence | |
from flair.models import SequenceTagger | |
import sys | |
class FlairChunker(): | |
def __init__(self): | |
self.chunker = SequenceTagger.load('chunk') | |
def get_chunk_spans(self, s): | |
sentence = Sentence(s) | |
self.chunker.predict(sentence) | |
spans = sentence.get_spans('np') | |
return spans | |
def main(): | |
#sentence_file = sys.argv[1] | |
#sentences = open(sentence_file, 'r').readlines() | |
sentences = ["The cat sat on the mat the dog chewed ."] | |
flairchunker = FlairChunker() | |
for s in sentences: | |
print('\n Input sentence: ', s) | |
spans = flairchunker.get_chunk_spans(s) | |
for entity in spans: | |
print(entity) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output:
Input sentence: The cat sat on the mat the dog chewed .
Span [1,2]: "The cat" [− Labels: NP (0.9999)]
Span [3]: "sat" [− Labels: VP (1.0)]
Span [4]: "on" [− Labels: PP (0.9956)]
Span [5,6]: "the mat" [− Labels: NP (0.9998)]
Span [7,8]: "the dog" [− Labels: NP (0.9985)]
Span [9]: "chewed" [− Labels: VP (1.0)]