Skip to content

Instantly share code, notes, and snippets.

@Podbrushkin
Created October 8, 2024 06:40
Show Gist options
  • Save Podbrushkin/a1bdc2f152f3a9f93fe8e47e6fde834e to your computer and use it in GitHub Desktop.
Save Podbrushkin/a1bdc2f152f3a9f93fe8e47e6fde834e to your computer and use it in GitHub Desktop.
Neo4j DB Anthologies

Лучшие образцы русской литературы (1849–1900): антологии избранной поэзии и прозы, литературные сборники и альманахи, сборники для легкого чтения, антологии для народа, антологии для женщин - Библиографические данные (pushdom.ru)

$objs = irm https://dataverse.pushdom.ru/api/access/datafile/4099?format=original | ConvertFrom-Csv
$cypher = @'
UNWIND $objs as obj 
MERGE (p:Person {
	name: obj.author_std
	})
MERGE (e:Edition {
	book_id: obj.book_id,
	year: obj.year,
	censorship: obj.censorship,
	title: obj.title,
	city: obj.city,
	publisher: obj.publisher,
	publishing_house: obj.publishing_house,
	pages: obj.pages,
	size: obj.size,
	illustrations: obj.illustrations,
	introduction: obj.introduction,
	volume: obj.volume
})
MERGE (w:Work {
	text_std: obj.text_std,
	genre: obj.genre,
	first_publication: obj.first_publication
})
SET w.comments = obj.comments
MERGE (p) -[:AUTHOR]-> (w)
CREATE (w) -[:PUBLISHED_IN {
	text_id: obj.text_id,
	text: obj.text,
	subtitle: obj.subtitle,
	author: obj.author
}]-> (e)
'@
Invoke-CypherNeo4j $cypher @{objs = $objs}

Same without Invoke-CypherNeo4j:

irm https://dataverse.pushdom.ru/api/access/datafile/4099 -OutFile anthoLit.tsv
$objs = Import-Csv -delim "`t" .\anthoLit.tsv
$bodyJson = @{ statements=@( @{statement=$cypher; parameters=@{objs = $objs}}) } | ConvertTo-Json -d 99
irm http://localhost:7474/db/neo4j/tx/commit -Method Post -ContentType 'application/json' -Body $bodyJson -Credential $cred -AllowUnencryptedAuthentication | toJson -d 99

Query

MATCH (p)-[:AUTHOR]->(w)
RETURN p.name,COUNT(w)

# Top authors
MATCH (p)-[:AUTHOR]->(w)-[:PUBLISHED_IN]->(e)
RETURN p.name AS author, COUNT(DISTINCT e) AS editions

# Top works
MATCH (p)-[:AUTHOR]->(w)-[:PUBLISHED_IN]->(e)
RETURN p.name AS author,w.text_std,  COUNT(DISTINCT e) AS editions

MATCH (p)-[:AUTHOR]->(w)-[:PUBLISHED_IN]->(e)
RETURN p.name AS author, w.text_std, w.first_publication, w.genre, COUNT(DISTINCT e) AS editions

MATCH (w:Work) RETURN w.first_publication, COUNT(w)

MATCH (p)-[:AUTHOR]->(w)-[:PUBLISHED_IN]->(e)
RETURN e.title AS title, COUNT(DISTINCT p) as NumAuthors, COUNT(DISTINCT w) as NumWorks,
COLLECT(DISTINCT {author: p.name, title: w.text_std} ) AS works
@'
MATCH (p)-[:AUTHOR]->(w)-[:PUBLISHED_IN]->(e)
RETURN e.title AS title, COUNT(DISTINCT p) as NumAuthors, COUNT(DISTINCT w) as NumWorks, COLLECT(DISTINCT {author: p.name, title: w.text_std}) AS works
'@ | % { Invoke-CypherNeo4j $_ } | ogv -PassThru | % works | ogv



$cypherGenre = @'
MATCH (w:Work)
RETURN w.genre AS genre, COUNT(w)
'@

$cypher = @'
MATCH (p)-[:AUTHOR]->(w {genre: $genre })-[:PUBLISHED_IN]->(e)
RETURN p.name AS author, w.text_std, w.first_publication, w.genre, COUNT(DISTINCT e) AS editions
'@

Invoke-CypherNeo4j $cypherGenre | Out-GridView -PassThru | 
% { Invoke-CypherNeo4j $cypher -params @{genre = $_.genre} } | Out-GridView

Query also

MATCH (p)-[:AUTHOR]->(w)-[:PUBLISHED_IN]->(e)
RETURN e.title AS title, COUNT(DISTINCT p) as NumAuthors, COUNT(DISTINCT w) as NumWorks
MATCH (p)-[:AUTHOR]->(w)
RETURN p.name, COUNT(w) as NumWorks 
ORDER BY NumWorks DESC
LIMIT 5
MATCH (p)-[:AUTHOR]->(w)-[:PUBLISHED_IN]->(e)
WITH p, w, count(e) as pubsPerWork
ORDER BY p, pubsPerWork DESC
RETURN p.name, collect(w.text_std)[..5] as TopWorks, sum(pubsPerWork) AS pubsPerAuthor
ORDER BY pubsPerAuthor DESC
MATCH (p)-[:AUTHOR]->(w)-[:PUBLISHED_IN]->(e)
WITH p, w, count(e) as pubsPerWork
ORDER BY p, pubsPerWork DESC
RETURN p.name, collect(w.text_std+' ('+pubsPerWork+')')[..5] as TopWorks, sum(pubsPerWork) AS pubsPerAuthor
ORDER BY pubsPerAuthor DESC
MATCH (p)-[:AUTHOR]->(w)-[:PUBLISHED_IN]->(e)
WITH p, w, count(e) as pubsPerWork
ORDER BY p, pubsPerWork DESC
RETURN p.name, collect({title: w.text_std, count:pubsPerWork})[..5] as TopWorks, sum(pubsPerWork) AS pubsPerAuthor
ORDER BY pubsPerAuthor DESC

Invoke-CypherNeo4j

function Invoke-CypherNeo4j {
	[CmdletBinding()]
	param(
		[Parameter(ValueFromPipeline)]$cypher, 
		$params,
		[switch]$raw = $false
	)
	
	if ($null -eq $cred) {$Global:cred = Get-Credential}
	$bodyJson = @{ statements=@( @{statement=$cypher; parameters=$params}) } | ConvertTo-Json -d 99
	$respJson = Invoke-WebRequest http://localhost:7474/db/neo4j/tx/commit -Method Post -ContentType 'application/json' -Body $bodyJson -Credential $cred -AllowUnencryptedAuthentication | % Content
	$resp = $null
	if ($raw) {return $respJson} else {$resp = $respJson | ConvertFrom-Json -d 99}
	if ($resp.errors.Count -ne 0) {return $resp.errors}
	else {
		$resp.results.data | % {
			$obj = [ordered]@{}
			for ($i = 0; $i -lt $_.row.count; $i++) {
			 $key = ([array]($resp.results.columns)).get($i)
			 $value = $_.row[$i]
			 $obj[$key] = $value
			}
			[pscustomobject]$obj
			}
	}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment