Skip to content

Instantly share code, notes, and snippets.

@radityopw
Created May 27, 2024 10:21
Show Gist options
  • Save radityopw/7db9a8f9a5d35d5bda15c6058e3e8749 to your computer and use it in GitHub Desktop.
Save radityopw/7db9a8f9a5d35d5bda15c6058e3e8749 to your computer and use it in GitHub Desktop.
import stackoverflow xml files to postgresql
CREATE TABLE data_load (
data text
);
CREATE TABLE users(
id int PRIMARY KEY,
reputation int,
CreationDate text,
DisplayName text,
LastAccessDate timestamp,
Location text,
AboutMe text,
views int,
UpVotes int,
DownVotes int,
AccountId int
);
CREATE TABLE posts (
id int PRIMARY KEY,
PostTypeId int,
CreationDate timestamp,
score int,
viewcount int,
body text,
OwnerUserId int,
LastActivityDate text,
Title text,
Tags text,
AnswerCount int,
CommentCount int,
ContentLicense text
);
CREATE TABLE badges (
id int PRIMARY KEY,
userId int,
Name text,
dt timestamp,
class int,
tagbased boolean
);
CREATE TABLE comments (
id int,
postId int,
score int,
text text,
creationdate timestamp,
userid int,
contentlicense text
);
CREATE TABLE posthistory (
id int,
PostHistoryTypeId int,
postId int,
RevisionGUID text,
CreationDate timestamp,
userID int,
text text,
ContentLicense text
);
CREATE TABLE postlinks (
id int,
creationdate timestamp,
postId int,
relatedPostId int,
LinkTypeId int
);
CREATE TABLE tags (
id int,
tagname text,
count int,
ExcerptPostId int,
WikiPostId int,
IsRequired boolean,
IsModeratorOnly boolean
);
CREATE TABLE votes(
id int,
postid int,
votetypeid int,
creationdate timestamp
);
-- tags
TRUNCATE TABLE data_load;
COPY data_load FROM 'C:\workspaces\stackoverflow-postgresql\Tags.xml';
TRUNCATE TABLE tags;
INSERT INTO public.tags(
id, tagname, count, excerptpostid, wikipostid, IsModeratorOnly, IsRequired)
SELECT
(xpath('//@Id', x))[1]::text::int AS id
,(xpath('//@TagName', x))[1]::text::text AS TagName
,(xpath('//@Count', x))[1]::text::int AS Count
,(xpath('//@ExcerptPostId', x))[1]::text::int AS ExcerptPostId
,(xpath('//@WikiPostId', x))[1]::text::int AS WikiPostId
,(xpath('//@IsModeratorOnly', x))[1]::text::boolean
,(xpath('//@IsRequired', x))[1]::text::boolean
FROM data_load , unnest(xpath('//row', data::xml)) x
WHERE data ilike '%<row %';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment