Skip to content

Instantly share code, notes, and snippets.

@denis-bz
Created June 21, 2022 16:01
Show Gist options
  • Save denis-bz/6a363b3f353f9cc3310b1be40710a22d to your computer and use it in GitHub Desktop.
Save denis-bz/6a363b3f353f9cc3310b1be40710a22d to your computer and use it in GitHub Desktop.
One-line queries of the SuiteSparse matrix collection 21 Jun 2022
#!/usr/bin/env python3
""" Usage:
python3 ssquery.py '(posdef == 1) & (1000 <= rows <= 20000)' posdef.csv
reads http://sparse.tamu.edu/files/ssstats.csv
queries it
writes "posdef.csv" like
# SuiteSparse posdef & (1000 <= rows <= 20000)
id,group,name,rows,cols,nnz,real,bool,is2d3d,posdef,symm,nsymm,kind
29156,Boeing,msc01050,1050,1050,26198,1,0,1,1,1.0,1.0,structural
...
554466,JGD_Trefethen,Trefethen_20000,20000,20000,554466,1,0,0,1,1.0,1.0,combinatorial
Then look at the query csv with your favorite tools
(awk grep sort editor spreadsheet),
download matrices with e.g. https://github.com/drdarshan/ssgetpy
More examples:
'(symm == 1) & (posdef == 0)' tmp.csv (the default)
'(symm == 1) & (nnz / rows <= 4)'
Note that id s are not unique: 48 50 76 149 238 306 ...
"""
# Keywords: sparse-matrix SuiteSparse MatrixMarket python pandas pandas-query
# why ? flexible queries with python pandas
# wibni: a column "year"
# wibni: a csv / wiki of eigval info -- emin, emax, enear0
import os
import pandas as pd # https://pandas.pydata.org/docs/user_guide
#...............................................................................
url = "http://sparse.tamu.edu/files/ssstats.csv"
_ssinfo = None # load_ssinfo -> this pandas DataFrame -> ssquery
def ssquery( q = "(posdef == 1) & (1000 <= rows <= 20000)" ) -> pd.DataFrame:
""" https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html """
global _ssinfo # class ? mttiw
if _ssinfo is None:
_ssinfo = load_ssinfo()
return _ssinfo.query( q )
def load_ssinfo( csvin=url, **keywords ) -> pd.DataFrame:
""" read_csv %s or a local .csv """ % url
# square only, drop duplicates
csvin = _url_localfile( csvin )
df = pd.read_csv( csvin,
names="group name rows cols nnz real bool is2d3d posdef symm nsymm kind id ".split(),
index_col="id",
skiprows=2, # 2893 08-Oct-2020 17:09:58
sep=",", # whitespace: "\\s+"
comment="#",
**keywords
)
df = df[ df.rows == df.cols ] # square only
df = df[ ~ df.kind.str.contains( "duplicate" )] # drop
df.kind = (df.kind.str.replace( " problem", "" )
.str.replace( "computational fluid dynamics", "cfd" )
.str.replace( " ", "-" ))
df = df.sort_values( "rows", kind="stable" )
# print( "read %s %s \n" % ( csvin, df ))
return df
def write_csv( csvout: str, df: pd.DataFrame, header="", **keywords ):
""" df.to_csv with #header """
nr, nc = df.shape
print( "\nwrite_csv %s: %d rows x %d columns" % (
csvout, nr, nc ))
with open( csvout, "w" ) as f:
if header:
f.write( "# %s \n" % header ) # good csvs should have headers
# + date pwd: pandasutil.py
df.to_csv( f, **keywords )
def _url_localfile( url ) -> "tail if isfile else url":
if os.path.isfile( url ):
return url
tail = os.path.split( url )[1] # after last /
# findfile( tail, dirs=". data $data ~/.data" )
return tail if os.path.isfile( tail ) \
else url
__version__ = "2022-06-21 June denis-bz-py t-online.de"
#...............................................................................
if __name__ == "__main__": # python ssquery.py 'query' runs the following --
import sys
# default args --
query = "(posdef == 1) & (1000 <= rows <= 20000)"
# "posdef" alone ??
csvout = "tmp.csv"
args = sys.argv[1:]
if args:
if args[0].startswith( ("-h", "--h") ):
print( __doc__ )
sys.exit()
query = args.pop( 0 )
if args:
csvout = args.pop( 0 )
#...........................................................................
querydf = ssquery( query )
print( "\n-- query '%s' \n%s \n" % (
query, querydf ))
if csvout:
write_csv( csvout, querydf, header="SuiteSparse " + query )
# test = pd.read_csv( csvout, index_col="id", sep=",", comment="#" )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment