Created
November 1, 2024 20:45
-
-
Save rodrigorgs/20bba5fc5c999c1b22159136f3957b44 to your computer and use it in GitHub Desktop.
Converte extrato do PicPay (PDF) para CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf-lib/1.17.1/pdf-lib.min.js"></script> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script> | |
<script> | |
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.worker.min.js'; | |
async function convertPdfToCsv() { | |
const fileInput = document.getElementById('pdfUpload'); | |
const output = document.getElementById('csvOutput'); | |
if (!fileInput.files.length) { | |
alert('Please upload a PDF file.'); | |
return; | |
} | |
const file = fileInput.files[0]; | |
const pdfData = await file.arrayBuffer(); | |
const pdf = await pdfjsLib.getDocument({ data: pdfData }).promise; | |
let csvLines = [] | |
for (let i = 1; i <= pdf.numPages; i++) { | |
const page = await pdf.getPage(i); | |
const textContent = await page.getTextContent(); | |
let pageText = textContent.items.map(item => item.str).join(' '); | |
// Regular expression to capture table rows | |
const regex = new RegExp( | |
/(\d{2}\/\d{2}\/\d{4} \d{2}:\d{2}:\d{2})/.source // Date and time | |
+ /\s+(.+?)\s+/.source // Description | |
+ /\s+((?:- )?R\$\s[\d.,]+)\s+/.source // Value + | |
); | |
let match; | |
match = regex.exec(pageText) | |
while ((match = regex.exec(pageText)) !== null) { | |
let [_, dataTime, description, value] = match; | |
value = value.replace(/(-)?\s*R\$/, '$1').replace(/\s+/, ''); | |
console.log(dataTime, '/', description, '/', value); | |
csvLines.push(`${dataTime}\t${description}\t${value}`); | |
pageText = pageText.slice(match.index + match[0].length); | |
} | |
} | |
csvLines.reverse(); | |
const csvContent = csvLines.join('\n'); | |
output.textContent = csvContent; | |
} | |
</script> | |
</head> | |
<body> | |
<input type="file" id="pdfUpload" accept=".pdf"> | |
<button onclick="convertPdfToCsv()">Convert to CSV</button> | |
<pre id="csvOutput"></pre> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment