Skip to content

Instantly share code, notes, and snippets.

View pcbje's full-sized avatar

Petter Christian Bjelland pcbje

View GitHub Profile
@pcbje
pcbje / seeds.txt
Last active December 2, 2024 15:36
source_id|seed|result-locator|article-locator|date-locator
norway-mfa|https://www.regjeringen.no/no/sok/id86008/?sortby=1&page=1&documenttype=aktuelt/nyheter&term=venezuela|.listItem a|main|.date
us-mfa|https://www.state.gov/countries-areas-archive/venezuela/|.collection-result a|main|.article-meta__publish-date
fr-mfa|https://www.diplomatie.gouv.fr/en/country-files/venezuela/events/|.article_colonne a|article|header h1
uk-mfa|https://www.gov.uk/search/all?order=updated-newest&world_locations%5B%5D=venezuela|
040701002.02.1.0001
130201012.03.1.0001
190208003.01.1.0001
130101006.02.1.0001
171501003.01.1.0001
020201015.01.1.0001
122003002.01.1.0001
190210001.02.1.0001
130501031.01.1.0001
130701075.01.1.0001
import os
import re
def extract_count(line):
lower_line = line.lower()
numbers = re.findall(' (cero|uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve)(?:$|,| )', lower_line)
if len(numbers) != 4:
return None
text = ''
package filter
import (
"fmt"
"log"
)
var bytehash = []uint32{
0x12bd9527, 0xf4140cea, 0x987bd6e1, 0x79079850, 0xafbfd539, 0xd350ce0a,
0x82973931, 0x9fc32b9c, 0x28003b88, 0xc30c13aa, 0x6b678c34, 0x5844ef1d,
version: '2'
volumes:
postgres_data:
driver: local
app_data:
driver: local
services:
postgres:
class Makecluster(object):
def __init__(self):
self.clusters = {}
self.edges = {}
self.reverse_index = {}
self.clusterIndex = 0
def getClusters(self):
return self.clusters
makecluster = Makecluster()
makecluster.addPair(1, 2)
makecluster.addPair(2, 3)
makecluster.addPair(4, 5)
print (makecluster.getClusters())
#!/usr/bin/python
# -*- coding: utf-8 -*-
import base64
import hashlib
from datetime import datetime
import struct
import logging
import json
# License: GPLv3
# https://github.com/aboSamoor/polyglot/blob/master/LICENSE
import polyglot
from polyglot.text import Text, Word
from flask import Flask, request
app = Flask(__name__)
# sudo yum install libicu libicu-devel.x86_64 gcc-c++ python3-devel
# pip3 install morfessor pycld2 pyicu numpy six polyglot Flask
tika:
image: logicalspark/docker-tikaserver
cpu_shares: 4
ports:
- "9998:9998"
elasticsearch:
image: elasticsearch
cpu_shares: 4
command: elasticsearch -Des.http.cors.enabled=true -Dhttp.cors.allow-origin=*