This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import pandas as pd | |
| from textblob import TextBlob | |
| def sentiment_polarity(string: str) -> float: | |
| polarity = TextBlob(string).sentiment[0] | |
| return polarity |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def serialise_file(document,format): | |
| if format =='txt': | |
| with open(document, 'r') as file: | |
| string = file.read().replace('\n', ' ') | |
| return string | |
| elif format == 'docx' | |
| #docx parsing code here | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from docx import Document | |
| class DocParser: | |
| def parse(self,document): | |
| parser = get_format(document) | |
| return parser(document) | |
| def get_format(document): | |
| format = os.path.splitext(document)[-1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import io | |
| from docx import Document | |
| from pdfminer3.layout import LAParams, LTTextBox | |
| from pdfminer3.pdfpage import PDFPage | |
| from pdfminer3.pdfinterp import PDFResourceManager | |
| from pdfminer3.pdfinterp import PDFPageInterpreter | |
| from pdfminer3.converter import PDFPageAggregator | |
| from pdfminer3.converter import TextConverter |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import parse_file as dp | |
| #define paths to test files | |
| txt_path = 'test_txt.txt' | |
| docx_path = 'test_docx.docx' | |
| pdf_path = 'test_pdf.pdf' | |
| html_path = 'test_html.html' | |
| pptx_path = 'test_pptx.pptx' | |
| file_paths = [txt_path,docx_path,pdf_path,html_path,pptx_path] |
OlderNewer