kingjr · January 30, 2023 13:54
diff --git a/parser.py b/parser.py
 #pip install benepar
 #pip install protobuf==3.20.0

 import spacy
 import numpy as np

 class Parser():
    
    def __init__(self,):
        model = 'fr_core_news_sm'
        if not spacy.util.is_package(model):
            spacy.cli.download(model)
    
        self.nlp = spacy.load(model)
    
    def process(self, sentence):
        doc = self.nlp(sentence)
        assert len(list(doc.sents)) == 1
        sent = list(doc.sents)[0]
        return sent
        
 class DependencyParser(Parser):
    def parse(self, sentence):
        sent = self.process(sentence)
        closeds = []
        for current in range(1, len(sent)+1):
            closed = 0
            for position, word in enumerate(sent): # [:current]
                closed += self._is_closed(word, current)
            closeds.append(closed)
        
        closing = np.r_[np.diff(closeds), closeds[-1]]
        return list(zip(sent, closing))
    
    def _is_closed(self, node, position):
        if node.i > position:
            return False
        
        for child in node.children:
            if child.i > position:
                return False
            if not self._is_closed(child, position):
                return False

        return True

 class ConstituentParser(Parser):
    def __init__(self):
        import os
        import benepar
        super().__init__()
        ben_model = 'benepar_fr2'
        benepar.download(ben_model)
        os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
        self.nlp.add_pipe(benepar.BeneparComponent(ben_model))
        
    def parse(self, sentence):
        
        sent = self.process(sentence)
        tree = sent._.parse_string
        opening = 0
        labels = []
        for here in sent._.parse_string.split('('):
            split = here.split()
            if len(split):
                opening += 1
                closing = 0
                if len(split)>1:
                    word = split[1].strip(')')
                    closing = split[1].count(')')
                    labels.append((word, closing))
                    opened = 0
        return labels

 const_parser = ConstituentParser()

 dep_parser = DependencyParser()

 sentence = "les petits chats de Mamie suivent la souris verte."

 print(const_parser.parse(sentence))

 print(dep_parser.parse(sentence))
	#pip install benepar
	#pip install protobuf==3.20.0

	import spacy
	import numpy as np

	class Parser():

	def __init__(self,):
	model = 'fr_core_news_sm'
	if not spacy.util.is_package(model):
	spacy.cli.download(model)

	self.nlp = spacy.load(model)

	def process(self, sentence):
	doc = self.nlp(sentence)
	assert len(list(doc.sents)) == 1
	sent = list(doc.sents)[0]
	return sent

	class DependencyParser(Parser):
	def parse(self, sentence):
	sent = self.process(sentence)
	closeds = []
	for current in range(1, len(sent)+1):
	closed = 0
	for position, word in enumerate(sent): # [:current]
	closed += self._is_closed(word, current)
	closeds.append(closed)

	closing = np.r_[np.diff(closeds), closeds[-1]]
	return list(zip(sent, closing))

	def _is_closed(self, node, position):
	if node.i > position:
	return False

	for child in node.children:
	if child.i > position:
	return False
	if not self._is_closed(child, position):
	return False

	return True

	class ConstituentParser(Parser):
	def __init__(self):
	import os
	import benepar
	super().__init__()
	ben_model = 'benepar_fr2'
	benepar.download(ben_model)
	os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
	self.nlp.add_pipe(benepar.BeneparComponent(ben_model))

	def parse(self, sentence):

	sent = self.process(sentence)
	tree = sent._.parse_string
	opening = 0
	labels = []
	for here in sent._.parse_string.split('('):
	split = here.split()
	if len(split):
	opening += 1
	closing = 0
	if len(split)>1:
	word = split[1].strip(')')
	closing = split[1].count(')')
	labels.append((word, closing))
	opened = 0
	return labels

	const_parser = ConstituentParser()

	dep_parser = DependencyParser()

	sentence = "les petits chats de Mamie suivent la souris verte."

	print(const_parser.parse(sentence))

	print(dep_parser.parse(sentence))