Skip to content

Instantly share code, notes, and snippets.

View maria-aguilera's full-sized avatar

Maria Aguilera García maria-aguilera

View GitHub Profile
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
@maria-aguilera
maria-aguilera / xml-to-dict.py
Created July 31, 2024 10:36 — forked from audy/xml-to-dict.py
Convert nested XML data into dictionaries
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
from pprint import pprint
import code
tree = ET.parse('biosample_result.xml')
def expand_blob(blob, attributes={}):
'''
import xml.etree.ElementTree as ET
import json
def parse_element(element, namespace, indent=""):
result = {}
if element.attrib:
result['attributes'] = element.attrib
if element.text and element.text.strip():
@maria-aguilera
maria-aguilera / xmlparser.py
Created July 31, 2024 10:25 — forked from nanvel/xmlparser.py
Lightweight xml parser
from xml import sax
class XMLParser(sax.handler.ContentHandler):
def __init__(self, keys=[], lists=[], *args, **kwargs):
"""
:param keys: list of data keys have to be available in data
:param lists: list of nodes have be represented as list
// XPath CheatSheet
// To test XPath in your Chrome Debugger: $x('/html/body')
// http://www.jittuu.com/2012/2/14/Testing-XPath-In-Chrome/
// 0. XPath Examples.
// More: http://xpath.alephzarro.com/content/cheatsheet.html
'//hr[@class="edge" and position()=1]' // every first hr of 'edge' class
@maria-aguilera
maria-aguilera / example.py
Created July 31, 2024 09:04 — forked from Averroes/example.py
parsing simple xml data
from urllib.request import urlopen
from xml.etree.ElementTree import parse
# Download the RSS feed and parse it
u = urlopen('http://planet.python.org/rss20.xml')
doc = parse(u)
# Extract and output tags of interest
for item in doc.iterfind('channel/item'):
title = item.findtext('title')
@maria-aguilera
maria-aguilera / process_wikipedia.py
Created January 4, 2023 22:19 — forked from snakers4/process_wikipedia.py
Post process wikipedia files produced by wikiextractor
import os
import re
import sys
import glob
import nltk
import gensim
import numpy as np
import pandas as pd
from tqdm import tqdm
from uuid import uuid4
import matplotlib
import seaborn as sns
import pandas as pd
from scipy.stats import zscore
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import DBSCAN
from matplotlib import cm
from datetime import datetime
from dateutil import parser
import json
from sklearn.datasets import load_wine
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.DataFrame(load_wine()["data"],columns=load_wine()["feature_names"])
data.head()
@maria-aguilera
maria-aguilera / mpl_pandas_plot_tools.py
Created November 16, 2022 16:02 — forked from vignesh-saptarishi/mpl_pandas_plot_tools.py
Utility functions for visualization using pandas dataframes and matplotlib
import numpy
import pandas
import matplotlib.pyplot as plt
import seaborn as sns
from ggplot import *
plt.style.use('ggplot')
def get_histogram_xy(data, bins=10):
"""Returns x,y coordinates for Histogram data.