Skip to content

Instantly share code, notes, and snippets.

@ssisaias
Created July 2, 2018 02:39
Show Gist options
  • Save ssisaias/d6dd83361d6fa64c0341427b1f6f3f22 to your computer and use it in GitHub Desktop.
Save ssisaias/d6dd83361d6fa64c0341427b1f6f3f22 to your computer and use it in GitHub Desktop.
import requests
import time
import datetime
import sys
import json
import re
#change the file name here. the file is generate by ytCommentMiner
dumpFile = open('videoei2-RjJDBHc2017-11-20-1200.json')
json_data = json.load(dumpFile)
contador = 0
contador2 = 0
dumpFile = open('output.txt', 'w', encoding='utf-8')
# The following symbol are removed from text:
# ✌
#HEADER LINE -- Keep it for SentiStrength
dumpFile.write("\n")
for comment in json_data["items"]:
contador += 1
#print("Nº"+ str(contador)+": "+comment["snippet"]["topLevelComment"]["snippet"]["textOriginal"])
#classif = input()
# 0 neutro - 1 ofensivo
#if classif == "0" or classif == "1":
textOriginal = comment["snippet"]["topLevelComment"]["snippet"]["textOriginal"]
textOriginal = textOriginal.replace("\n","")
textOriginal = textOriginal.replace("\t","")
textOriginal = textOriginal.replace("✌","")
textOriginal = textOriginal.replace(",","")
dumpFile.write(textOriginal+"\n")
if comment["snippet"]["totalReplyCount"] > 0:
for reply in comment["replies"]["comments"]:
contador += 1
#print("Nº"+ str(contador)+": "+reply["snippet"]["textOriginal"])
#classif = input()
# 0 neutro - 1 ofensivo
#if classif == "0" or classif == "1":
textOriginal = reply["snippet"]["textOriginal"]
textOriginal = textOriginal.replace("\n","")
textOriginal = textOriginal.replace("\t","")
textOriginal = textOriginal.replace("✌","")
textOriginal = textOriginal.replace(",","")
dumpFile.write(textOriginal+"\n")
print(contador)
# Pode ser utilizado para contar as palavras e verificar as palavras diferentes na saida!
# grep -wo '[[:alnum:]]\+' output.txt | sort | uniq -cd | less
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment