This gist has SQL scripts to pull data into PostgreSQL and perform a demo "similarity" search.
CREATE TABLE iab_assets (
id SERIAL PRIMARY KEY,
iab_code TEXT,
title TEXT,
origin TEXT,| #!/usr/bin/env python3 | |
| import os | |
| import subprocess | |
| from pathlib import Path | |
| from concurrent.futures import ProcessPoolExecutor, as_completed | |
| import multiprocessing | |
| # Input and output directories | |
| INPUT_DIR = "data" # change to your directory of PDFs | |
| OUTPUT_DIR = "ocr_output" |
| use PHPMailer\PHPMailer\PHPMailer; | |
| use PHPMailer\PHPMailer\SMTP; | |
| use PHPMailer\PHPMailer\Exception; | |
| require './vendor/autoload.php'; | |
| $dotenv = Dotenv\Dotenv::createImmutable(__DIR__ . '/'); | |
| $dotenv->load(); |
| #!/bin/bash | |
| # | |
| # Pull updated IPs from StopForumSpam from | |
| # https://www.stopforumspam.com/downloads. Note that different files have | |
| # different download limits. Generally you can download the last day hourly and | |
| # everything else you are limited to twice daily. | |
| # | |
| # URL: https://github.com/waynegraham/UpdateBlockedIPs | |
| # Author: Wayne Graham | |
| # License: MIT |
| import os | |
| import shutil | |
| def create_text_file(jpeg_file): | |
| """Creates a text file with the same name as the given JPEG file.""" | |
| text_file_name = jpeg_file.split(".")[0] + ".txt" | |
| text_file = open(text_file_name, "w") | |
| text_file.write("") | |
| text_file.close() |
mkdir /var/www/blocked_ips)listed_ip_30_all (see https://www.mediawiki.org/wiki/Extension:StopForumSpam#IP_blocking)LocalSettings.php points to the extracted file/var/www/mediawiki/extensions/StopForumSpam/maintenance/updateDenyList.php to load into APCu cache| # If you come from bash you might have to change your $PATH. | |
| export PATH=$HOME/bin:$HOME/.rbenv/bin:/usr/local/bin:$PATH | |
| # Path to your oh-my-zsh installation. | |
| export ZSH="/Users/wgraham/.oh-my-zsh" | |
| # Set name of the theme to load --- if set to "random", it will | |
| # load a random theme each time oh-my-zsh is loaded, in which case, | |
| # to know which specific one was loaded, run: echo $RANDOM_THEME | |
| # See https://github.com/ohmyzsh/ohmyzsh/wiki/Themes |
| #! /bin/bash | |
| for file in *.doc; do | |
| textutil -convert docx "$file" | |
| # Account for the new `x` in `docx` | |
| pandoc -o "${file%doc}pdf" "${file}x" | |
| done |