This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
###################################################### | |
# | |
# Ce script récupère une liste de noms et vérifie | |
# d'abord s'il existent dans Wikipedia.fr, puis | |
# dans Wikipedia.nl | |
# | |
###################################################### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import json | |
with open("test.json", "r") as infile: | |
data = json.load(infile) | |
def transform_to_addcolumn(data): | |
data_trans = dict(data) | |
data_trans["op"] = "core/column-addition" | |
data_trans["expression"] = ( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import json | |
import sys | |
with open(sys.argv[1], "r") as infile: | |
data = json.load(infile) | |
outfile = open(sys.argv[1]+".txt", 'w') | |
count = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(data.table) | |
library(stringr) | |
#dossier contenant les fichiers | |
setwd("C:/Users/ettor/Desktop/Eurovoc Topicmodeling/presidencies") | |
#on merge les trois | |
files <- list.files(path = getwd(), | |
pattern = ".txt") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Get key collision fingerprints | |
#' | |
#' Given a character vector as input, get the key collision fingerprint for | |
#' each element. | |
#' | |
#' Operations in order : | |
#' | |
#'-remove leading and trailing whitespace | |
#'-change all characters to their lowercase representation | |
#'-remove all punctuation and control characters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(XML) | |
library(dplyr) | |
library(stringr) | |
library(readr) | |
library(readxl) | |
library(tidyr) | |
#liste des fichiers XML du corpus JRC Acquis version anglaise (http://optima.jrc.it/Acquis/JRC-Acquis.3.0/corpus/jrc-en.tgz) | |
liste <- | |
list.files( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def call_counter(func): | |
def helper(*args, **kwargs): | |
helper.calls += 1 | |
return func(*args, **kwargs) | |
helper.calls = 0 | |
helper.__name__= func.__name__ | |
return helper | |
memo = {} | |
@call_counter | |
def levenshtein(s, t): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
sys.path.append(r'D:\jython2.7.0\Lib\site-packages') | |
from pattern.fr import parsetree | |
sentences = parsetree(value, relations=True, lemmata=True) | |
liste = [] | |
for s in sentences: | |
for chunk in s.chunks: | |
for w in chunk.words: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
sys.path.append(r'D:\jython2.7.0\Lib\site-packages') | |
from SPARQLWrapper import SPARQLWrapper, JSON | |
from langdetect import detect | |
dbpedia_version = "http://dbpedia.org/sparql" | |
#TEST | |
value = "comptoir" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from unidecode import unidecode | |
with open(r"C:\Users\Boulot\Desktop\prenoms.txt", 'r') as f: | |
prenoms = [name.strip().lower() for name in f] | |
CHARS = "abcdefghijklmnopqrstuvwxyzéèàçüûùABCDEFGHIJKLMNOPQRSTUVWXYZ- " | |
family_joint = ["d'", "de", "du", "der", "den", "vander", "vanden", "van", "le"] | |
#TEST |
OlderNewer