This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import igraph | |
clusters_const = 2 | |
graph = igraph.read("2_big_graph_clusters_ncol.txt", format="ncol", directed=False, names=True) | |
clusters = graph.community_edge_betweenness(clusters=2, directed=False) | |
splitter = clusters.as_clustering(clusters_const).membership | |
vs = igraph.VertexSeq(graph) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package ru.stachek66.mwe.ml.spark; | |
import org.apache.spark.SparkConf; | |
import org.apache.spark.SparkContext; | |
import org.apache.spark.api.java.JavaPairRDD; | |
import org.apache.spark.api.java.JavaRDD; | |
import org.apache.spark.mllib.classification.NaiveBayes; | |
import org.apache.spark.mllib.classification.NaiveBayesModel; | |
import org.apache.spark.mllib.feature.HashingTF; | |
import org.apache.spark.mllib.feature.IDF; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import collections | |
import math | |
map = {i: [] for i in xrange(50000)} | |
for file in os.listdir("/home/alexeyev/projects/bee/"): | |
# files filter | |
if file.startswith("sol_0.7"): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
"""" | |
Скрипт для извлечения имён людей из коллекции текстов о музеях | |
""" | |
import os | |
from docx import Document | |
from natasha import Combinator | |
from natasha.grammars import Person |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import difflib | |
text_one = open("1.txt", "r").read() | |
text_two = open("2.txt", "r").read() | |
sm = difflib.SequenceMatcher(isjunk=None, a=text_one, b=text_two) | |
mbs = sm.get_matching_blocks() | |
for mb in mbs: | |
if mb.size > 10: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
""" | |
The MIT License (MIT) | |
Copyright (c) 2020 Anton Alekseev | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import autosklearn | |
# from autosklearn.classification import AutoSklearnClassifier | |
# # print("Available CLASSIFICATION metrics autosklearn.metrics.*:") | |
# # print("\t*" + "\n\t*".join(autosklearn.metrics.CLASSIFICATION_METRICS)) | |
# print("Y labels:", set(y)) | |
# | |
# automl = autosklearn.classification.AutoSklearnClassifier( | |
# n_jobs=3, | |
# ensemble_size=1, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
i=1 | |
END=100000 | |
mkdir export | |
while [ $i -le $END ]; do | |
echo "Dealing with company #"$i | |
mkdir export/$i |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
We do not recommend using this script for any purposes other than learning to use Selenium; | |
for batched machine translation via Google Translate using 'document' translation feature | |
is arguably the most suitable. For regular translations one should use the Cloud API. | |
""" | |
import time | |
from selenium.common.exceptions import TimeoutException |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from difflib import SequenceMatcher | |
t0 = open("text0.txt", "r+").read().strip().replace("\n", " ").replace(" ", " ") | |
t1 = open("text1.txt", "r+").read().strip().replace("\n", " ").replace(" ", " ") | |
matcher = SequenceMatcher(a=t0, b=t1) | |
ratio = matcher.ratio() | |
mbs = matcher.get_matching_blocks() |
OlderNewer