FrankSpierings · March 11, 2020 14:25
diff --git a/sps-grab-SPSPeople-info.py b/sps-grab-SPSPeople-info.py
 import requests
 import json


 url = 'https://server/_api/search/postquery'

 # X-RequestDigest required for Anti XSRF
 headers = {
 	'X-RequestDigest':'0x4CF763CCDD37BC34BCEBC236D8C3A14D2B0852792487F30E57C9C46F668D31D461ED7C61FBA3D52DA19ACC6F797EED95B243A414D7A1BA6C736BDBD50A0D0A83,09 Mar 2020 13:32:05 -0000',
 	'Content-Type': 'application/json;odata=verbose',
 	'Accept': 'application/json;odata=verbose',
 }

 # Arguments: SelectProperties result from get-properties.py
 args = {
    "request":
    {
        "Querytext": "-ContentClass=urn:content-class:SPSPeople",

        "SelectProperties":
        {
 	        "results":
 	        [
 	        	"Path", "Other"
 			]
        },
        "RowLimit": 500,
        "RowsPerPage": 500,
        "StartRow": 0,
        "TrimDuplicates": True,
        "SourceId": "5dc9f503-801e-4ced-8a2c-5d1237132419"
    }
 }

 rowrequest = 0
 totalrows = None # We don't know the total amount yet
 r = requests.post(url, headers=headers, json=args)
 while True:
 	print('{0}/{1}'.format(rowrequest, totalrows))
 	if r.status_code == 200:
 		data = r.json()

 		# How many rows do we need to retrieve
 		totalrows = data['d']['postquery']['PrimaryQueryResult']['RelevantResults']['TotalRows']
 		rowcount = data['d']['postquery']['PrimaryQueryResult']['RelevantResults']['RowCount']

 		output = []
 		# Convert weird Microsoft format to something normal/usable. Leave out empty properties.
 		for item in data['d']['postquery']['PrimaryQueryResult']['RelevantResults']['Table']['Rows']['results']:
 			d = {i['Key']:i['Value'] for i in item['Cells']['results'] if i['Value'] is not None and i['Value'] != ''}
 			output.append(d)

 		# Write the output (per-page) to the /tmp/ directory.
 		json.dump(output, open('/tmp/SPSPeople-{0:08d}.json'.format(rowrequest), 'w'), indent=4)

 		if (rowrequest + rowcount) >= totalrows:
 			break
 		else:
 			rowrequest += rowcount
 			break
 		args['request']['StartRow'] = rowrequest
 	r = requests.post(url, headers=headers, json=args)
diff --git a/sps-properties.py b/sps-properties.py
 # https://social.technet.microsoft.com/wiki/contents/articles/51949.sharepoint-2013-search-query-apis.aspx
 # https://web.archive.org/web/20140205043809/http://blogs.technet.com/b/searchguys/archive/2013/12/11/how-to-all-managed-properties-of-a-document.aspx
 import requests

 url = "https://server/_api/search/query"

 sourceids = '''
 e7ec8cee-ded8-43c9-beb5-436b54b31e84
 5dc9f503-801e-4ced-8a2c-5d1237132419
 e1327b9c-2b8c-4b23-99c9-3730cb29c3f7
 48fec42e-4a92-48ce-8363-c2703a40e67d
 5c069288-1d17-454a-8ac6-9c642a065f48
 b09a7990-05ea-4af9-81ef-edfab16c4e31
 203fba36-2763-4060-9931-911ac8c0583b
 8413cd39-2156-4e00-b54d-11efd9abdb89
 78b793ce-7956-4669-aa3b-451fc5defebf
 5e34578e-4d08-4edc-8bf3-002acf3cdbcc
 38403c8c-3975-41a8-826e-717f2d41568a
 97c71db1-58ce-4891-8b64-585bc2326c12
 ba63bbae-fa9c-42c0-b027-9a878f16557c
 ec675252-14fa-4fbe-84dd-8d098ed74181
 9479bf85-e257-4318-b5a8-81a180f5faa1
 '''
 sourceids = sourceids.strip().split('\n')

 contentclasses = '''
 urn:content-class:SPSSearchQuery
 urn:content-class:SPSListing:News
 urn:content-class:SPSPeople
 urn:content-classes:SPSCategory
 urn:content-classes:SPSListing
 urn:content-classes:SPSPersonListing
 urn:content-classes:SPSTextListing
 urn:content-classes:SPSSiteListing
 urn:content-classes:SPSSiteRegistry
 '''

 contentclasses = contentclasses.strip().split('\n')

 params = {
 	"Querytext" : "'ContentClass=urn:content-class:SPSPeople'",
 	"SelectProperties" : "'Path'",
 	"SourceId" : "'5dc9f503-801e-4ced-8a2c-5d1237132419'",
 	"refiners" : "'managedproperties(filter=5000/0/*)'",
 }

 headers = {'Accept': 'application/json'}


 contentproperties = {}
 for contentclass in contentclasses:
 	params['Querytext'] = "'ContentClass={0}'".format(contentclass)
 	properties = []
 	for sourceid in sourceids:
 		r = requests.get(url, params=params, headers=headers)
 		data = r.json()
 		params['SourceId'] = "'{0}'".format(sourceid)

 		if 'PrimaryQueryResult' in data and data['PrimaryQueryResult']['RefinementResults']:
 			refinements = data['PrimaryQueryResult']['RefinementResults']['Refiners'][0]['Entries']
 			for refinement in refinements:
 				properties.append(refinement['RefinementName'])
 	if len(properties) > 0:
 		contentproperties[contentclass] = set(properties)
	import requests
	import json


	url = 'https://server/_api/search/postquery'

	# X-RequestDigest required for Anti XSRF
	headers = {
	'X-RequestDigest':'0x4CF763CCDD37BC34BCEBC236D8C3A14D2B0852792487F30E57C9C46F668D31D461ED7C61FBA3D52DA19ACC6F797EED95B243A414D7A1BA6C736BDBD50A0D0A83,09 Mar 2020 13:32:05 -0000',
	'Content-Type': 'application/json;odata=verbose',
	'Accept': 'application/json;odata=verbose',
	}

	# Arguments: SelectProperties result from get-properties.py
	args = {
	"request":
	{
	"Querytext": "-ContentClass=urn:content-class:SPSPeople",

	"SelectProperties":
	{
	"results":
	[
	"Path", "Other"
	]
	},
	"RowLimit": 500,
	"RowsPerPage": 500,
	"StartRow": 0,
	"TrimDuplicates": True,
	"SourceId": "5dc9f503-801e-4ced-8a2c-5d1237132419"
	}
	}

	rowrequest = 0
	totalrows = None # We don't know the total amount yet
	r = requests.post(url, headers=headers, json=args)
	while True:
	print('{0}/{1}'.format(rowrequest, totalrows))
	if r.status_code == 200:
	data = r.json()

	# How many rows do we need to retrieve
	totalrows = data['d']['postquery']['PrimaryQueryResult']['RelevantResults']['TotalRows']
	rowcount = data['d']['postquery']['PrimaryQueryResult']['RelevantResults']['RowCount']

	output = []
	# Convert weird Microsoft format to something normal/usable. Leave out empty properties.
	for item in data['d']['postquery']['PrimaryQueryResult']['RelevantResults']['Table']['Rows']['results']:
	d = {i['Key']:i['Value'] for i in item['Cells']['results'] if i['Value'] is not None and i['Value'] != ''}
	output.append(d)

	# Write the output (per-page) to the /tmp/ directory.
	json.dump(output, open('/tmp/SPSPeople-{0:08d}.json'.format(rowrequest), 'w'), indent=4)

	if (rowrequest + rowcount) >= totalrows:
	break
	else:
	rowrequest += rowcount
	break
	args['request']['StartRow'] = rowrequest
	r = requests.post(url, headers=headers, json=args)
	# https://social.technet.microsoft.com/wiki/contents/articles/51949.sharepoint-2013-search-query-apis.aspx
	# https://web.archive.org/web/20140205043809/http://blogs.technet.com/b/searchguys/archive/2013/12/11/how-to-all-managed-properties-of-a-document.aspx
	import requests

	url = "https://server/_api/search/query"

	sourceids = '''
	e7ec8cee-ded8-43c9-beb5-436b54b31e84
	5dc9f503-801e-4ced-8a2c-5d1237132419
	e1327b9c-2b8c-4b23-99c9-3730cb29c3f7
	48fec42e-4a92-48ce-8363-c2703a40e67d
	5c069288-1d17-454a-8ac6-9c642a065f48
	b09a7990-05ea-4af9-81ef-edfab16c4e31
	203fba36-2763-4060-9931-911ac8c0583b
	8413cd39-2156-4e00-b54d-11efd9abdb89
	78b793ce-7956-4669-aa3b-451fc5defebf
	5e34578e-4d08-4edc-8bf3-002acf3cdbcc
	38403c8c-3975-41a8-826e-717f2d41568a
	97c71db1-58ce-4891-8b64-585bc2326c12
	ba63bbae-fa9c-42c0-b027-9a878f16557c
	ec675252-14fa-4fbe-84dd-8d098ed74181
	9479bf85-e257-4318-b5a8-81a180f5faa1
	'''
	sourceids = sourceids.strip().split('\n')

	contentclasses = '''
	urn:content-class:SPSSearchQuery
	urn:content-class:SPSListing:News
	urn:content-class:SPSPeople
	urn:content-classes:SPSCategory
	urn:content-classes:SPSListing
	urn:content-classes:SPSPersonListing
	urn:content-classes:SPSTextListing
	urn:content-classes:SPSSiteListing
	urn:content-classes:SPSSiteRegistry
	'''

	contentclasses = contentclasses.strip().split('\n')

	params = {
	"Querytext" : "'ContentClass=urn:content-class:SPSPeople'",
	"SelectProperties" : "'Path'",
	"SourceId" : "'5dc9f503-801e-4ced-8a2c-5d1237132419'",
	"refiners" : "'managedproperties(filter=5000/0/*)'",
	}

	headers = {'Accept': 'application/json'}


	contentproperties = {}
	for contentclass in contentclasses:
	params['Querytext'] = "'ContentClass={0}'".format(contentclass)
	properties = []
	for sourceid in sourceids:
	r = requests.get(url, params=params, headers=headers)
	data = r.json()
	params['SourceId'] = "'{0}'".format(sourceid)

	if 'PrimaryQueryResult' in data and data['PrimaryQueryResult']['RefinementResults']:
	refinements = data['PrimaryQueryResult']['RefinementResults']['Refiners'][0]['Entries']
	for refinement in refinements:
	properties.append(refinement['RefinementName'])
	if len(properties) > 0:
	contentproperties[contentclass] = set(properties)