Created
June 21, 2022 16:01
-
-
Save denis-bz/6a363b3f353f9cc3310b1be40710a22d to your computer and use it in GitHub Desktop.
One-line queries of the SuiteSparse matrix collection 21 Jun 2022
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" Usage: | |
python3 ssquery.py '(posdef == 1) & (1000 <= rows <= 20000)' posdef.csv | |
reads http://sparse.tamu.edu/files/ssstats.csv | |
queries it | |
writes "posdef.csv" like | |
# SuiteSparse posdef & (1000 <= rows <= 20000) | |
id,group,name,rows,cols,nnz,real,bool,is2d3d,posdef,symm,nsymm,kind | |
29156,Boeing,msc01050,1050,1050,26198,1,0,1,1,1.0,1.0,structural | |
... | |
554466,JGD_Trefethen,Trefethen_20000,20000,20000,554466,1,0,0,1,1.0,1.0,combinatorial | |
Then look at the query csv with your favorite tools | |
(awk grep sort editor spreadsheet), | |
download matrices with e.g. https://github.com/drdarshan/ssgetpy | |
More examples: | |
'(symm == 1) & (posdef == 0)' tmp.csv (the default) | |
'(symm == 1) & (nnz / rows <= 4)' | |
Note that id s are not unique: 48 50 76 149 238 306 ... | |
""" | |
# Keywords: sparse-matrix SuiteSparse MatrixMarket python pandas pandas-query | |
# why ? flexible queries with python pandas | |
# wibni: a column "year" | |
# wibni: a csv / wiki of eigval info -- emin, emax, enear0 | |
import os | |
import pandas as pd # https://pandas.pydata.org/docs/user_guide | |
#............................................................................... | |
url = "http://sparse.tamu.edu/files/ssstats.csv" | |
_ssinfo = None # load_ssinfo -> this pandas DataFrame -> ssquery | |
def ssquery( q = "(posdef == 1) & (1000 <= rows <= 20000)" ) -> pd.DataFrame: | |
""" https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html """ | |
global _ssinfo # class ? mttiw | |
if _ssinfo is None: | |
_ssinfo = load_ssinfo() | |
return _ssinfo.query( q ) | |
def load_ssinfo( csvin=url, **keywords ) -> pd.DataFrame: | |
""" read_csv %s or a local .csv """ % url | |
# square only, drop duplicates | |
csvin = _url_localfile( csvin ) | |
df = pd.read_csv( csvin, | |
names="group name rows cols nnz real bool is2d3d posdef symm nsymm kind id ".split(), | |
index_col="id", | |
skiprows=2, # 2893 08-Oct-2020 17:09:58 | |
sep=",", # whitespace: "\\s+" | |
comment="#", | |
**keywords | |
) | |
df = df[ df.rows == df.cols ] # square only | |
df = df[ ~ df.kind.str.contains( "duplicate" )] # drop | |
df.kind = (df.kind.str.replace( " problem", "" ) | |
.str.replace( "computational fluid dynamics", "cfd" ) | |
.str.replace( " ", "-" )) | |
df = df.sort_values( "rows", kind="stable" ) | |
# print( "read %s %s \n" % ( csvin, df )) | |
return df | |
def write_csv( csvout: str, df: pd.DataFrame, header="", **keywords ): | |
""" df.to_csv with #header """ | |
nr, nc = df.shape | |
print( "\nwrite_csv %s: %d rows x %d columns" % ( | |
csvout, nr, nc )) | |
with open( csvout, "w" ) as f: | |
if header: | |
f.write( "# %s \n" % header ) # good csvs should have headers | |
# + date pwd: pandasutil.py | |
df.to_csv( f, **keywords ) | |
def _url_localfile( url ) -> "tail if isfile else url": | |
if os.path.isfile( url ): | |
return url | |
tail = os.path.split( url )[1] # after last / | |
# findfile( tail, dirs=". data $data ~/.data" ) | |
return tail if os.path.isfile( tail ) \ | |
else url | |
__version__ = "2022-06-21 June denis-bz-py t-online.de" | |
#............................................................................... | |
if __name__ == "__main__": # python ssquery.py 'query' runs the following -- | |
import sys | |
# default args -- | |
query = "(posdef == 1) & (1000 <= rows <= 20000)" | |
# "posdef" alone ?? | |
csvout = "tmp.csv" | |
args = sys.argv[1:] | |
if args: | |
if args[0].startswith( ("-h", "--h") ): | |
print( __doc__ ) | |
sys.exit() | |
query = args.pop( 0 ) | |
if args: | |
csvout = args.pop( 0 ) | |
#........................................................................... | |
querydf = ssquery( query ) | |
print( "\n-- query '%s' \n%s \n" % ( | |
query, querydf )) | |
if csvout: | |
write_csv( csvout, querydf, header="SuiteSparse " + query ) | |
# test = pd.read_csv( csvout, index_col="id", sep=",", comment="#" ) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment