Skip to content

Instantly share code, notes, and snippets.

@rodrigorgs
Created November 13, 2024 00:01
Show Gist options
  • Save rodrigorgs/7b40b33806008dabf84ba6e6f5ed0722 to your computer and use it in GitHub Desktop.
Save rodrigorgs/7b40b33806008dabf84ba6e6f5ed0722 to your computer and use it in GitHub Desktop.
Converte extrato de cartão do Banco do Brasil (PDF) para CSV
<html>
<head>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf-lib/1.17.1/pdf-lib.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
<script>
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.worker.min.js';
async function convertPdfToCsv() {
const fileInput = document.getElementById('pdfUpload');
const output = document.getElementById('csvOutput');
if (!fileInput.files.length) {
alert('Please upload a PDF file.');
return;
}
const file = fileInput.files[0];
const pdfData = await file.arrayBuffer();
const pdf = await pdfjsLib.getDocument({ data: pdfData }).promise;
let csvLines = []
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
let pageText = textContent.items.map(item => item.str).join(' ');
// console.log(pageText);
// Regular expression to capture table rows
const regex = new RegExp(
/(\d{2}\/\d{2})/.source // Date (dd/MM)
+ /\s+(.+?)/.source // Description
+ /\s+([A-Z]{2})/.source // Country
+ /\s+(-?[\d.,]+)\s+/.source // Value +
);
let match;
match = regex.exec(pageText)
while ((match = regex.exec(pageText)) !== null) {
// console.log(match);
let [_, date, description, country, value] = match;
if (value.charAt(0) !== '-') {
console.log([date, description, country, value]);
csvLines.push(`${date}\t${description}\t${value}`);
}
pageText = pageText.slice(match.index + match[0].length);
}
}
csvLines.reverse();
const csvContent = csvLines.join('\n');
output.textContent = csvContent;
}
</script>
</head>
<body>
<input type="file" id="pdfUpload" accept=".pdf">
<button onclick="convertPdfToCsv()">Convert to CSV</button>
<pre id="csvOutput"></pre>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment