Last active
April 5, 2022 02:13
-
-
Save GINK03/2a22e4432ac8d489997f86a0970e603e to your computer and use it in GitHub Desktop.
gcp natural language api japanese test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.cloud import language_v1 | |
def sample_analyze_syntax(text_content): | |
client = language_v1.LanguageServiceClient() | |
type_ = language_v1.Document.Type.PLAIN_TEXT | |
language = "ja" | |
document = {"content": text_content, "type_": type_, "language": language} | |
encoding_type = language_v1.EncodingType.UTF8 | |
response = client.analyze_syntax(request = {'document': document, 'encoding_type': encoding_type}) | |
print(u"Language of the text: {}".format(response.language)) | |
for token in response.tokens: | |
text = token.text | |
print(u"Token text: {}".format(text.content)) | |
print( | |
u"Location of this token in overall document: {}".format(text.begin_offset) | |
) | |
part_of_speech = token.part_of_speech | |
print( | |
u"Part of Speech tag: {}".format( | |
language_v1.PartOfSpeech.Tag(part_of_speech.tag).name | |
) | |
) | |
# print(u"Voice: {}".format(language_v1.PartOfSpeech.Voice(part_of_speech.voice).name)) | |
# print(u"Tense: {}".format(language_v1.PartOfSpeech.Tense(part_of_speech.tense).name)) | |
# print(u"Lemma: {}".format(token.lemma)) | |
dependency_edge = token.dependency_edge | |
print(u"Head token index: {}".format(dependency_edge.head_token_index)) | |
print( | |
u"Label: {}".format(language_v1.DependencyEdge.Label(dependency_edge.label).name) | |
) | |
print() | |
text = """ウクライナのゼレンスキー大統領は、5日に国連安全保障理事会で演説すると表明した。首都キーウ(キエフ)近郊で多数の民間人の遺体が見つかった問題について、他の地域でさらに多くの民間人がロシア軍に殺害された可能性があるとの見方を示した。""" | |
sample_analyze_syntax(text) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.cloud import language_v1 | |
def analyze_entities(text_content): | |
""" | |
Analyzing Entities in a String | |
Args: | |
text_content The text content to analyze | |
""" | |
client = language_v1.LanguageServiceClient() | |
type_ = language_v1.Document.Type.PLAIN_TEXT | |
language = "ja" | |
document = {"content": text_content, "type_": type_, "language": language} | |
encoding_type = language_v1.EncodingType.UTF8 | |
response = client.analyze_entities(request = {'document': document, 'encoding_type': encoding_type}) | |
for entity in response.entities: | |
print(u"Representative name for the entity: {}".format(entity.name)) | |
print(u"Entity type: {}".format(language_v1.Entity.Type(entity.type_).name)) | |
print(u"Salience sore: {}".format(entity.salience)) | |
for metadata_name, metadata_value in entity.metadata.items(): | |
print(u"{}: {}".format(metadata_name, metadata_value)) | |
for mention in entity.mentions: | |
print(u"Mention text: {}".format(mention.text.content)) | |
print( | |
u"Mention type: {}".format(language_v1.EntityMention.Type(mention.type_).name) | |
) | |
print(u"Language of the text: {}".format(response.language)) | |
def main(): | |
text = """ロシアが全土侵攻を始めたウクライナ。現地ではロシアの攻撃のものとみられる爆発音が相次ぎ、住民は恐怖で心を震わせた。""" | |
analyze_entities(text) | |
if __name__ == "__main__": | |
main() | |
c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.cloud import language_v1 | |
def sample_analyze_entity_sentiment(text_content): | |
""" | |
Analyzing Entity Sentiment in a String | |
Args: | |
text_content The text content to analyze | |
""" | |
client = language_v1.LanguageServiceClient() | |
type_ = language_v1.Document.Type.PLAIN_TEXT | |
language = "ja" | |
document = {"content": text_content, "type_": type_, "language": language} | |
encoding_type = language_v1.EncodingType.UTF8 | |
response = client.analyze_sentiment(request = {'document': document, 'encoding_type': encoding_type}) | |
for sentence in response.sentences: | |
print(u"Sentence text: {}".format(sentence.text.content)) | |
print(u"Sentence sentiment score: {}".format(sentence.sentiment.score)) | |
print(u"Sentence sentiment magnitude: {}".format(sentence.sentiment.magnitude)) | |
print(u"Language of the text: {}".format(response.language)) | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--text_content", type=str, default="""可能性という言葉を無限定に使ってはいけない。 | |
我々という存在を規定するのは、我々がもつ可能性ではなく、我々がもつ不可能性である。 | |
ほんの些細な決断の違いで 私の運命は変わる。 | |
無数の私が生まれる。""" | |
) | |
args = parser.parse_args() | |
sample_analyze_entity_sentiment(args.text_content) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment