Skip to content

Instantly share code, notes, and snippets.

@bellerbrock
Created February 2, 2022 17:55
Show Gist options
  • Save bellerbrock/ef10474b2f457e2877ec94a71b583ab6 to your computer and use it in GitHub Desktop.
Save bellerbrock/ef10474b2f457e2877ec94a71b583ab6 to your computer and use it in GitHub Desktop.
# Retrieve NCSU crosses that are missing female_parent stock_relationship
SELECT stock.stock_id, stock.uniquename FROM stock
LEFT JOIN stock_relationship sr ON(
stock.stock_id = sr.object_id
AND sr.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'female_parent')
)
JOIN nd_experiment_stock s ON(stock.stock_id = s.stock_id)
JOIN nd_experiment_project p ON(s.nd_experiment_id = p.nd_experiment_id)
JOIN project t ON(p.project_id = t.project_id)
JOIN project_relationship pr ON(
t.project_id = pr.subject_project_id AND pr.type_id = (select cvterm_id from cvterm where name = 'breeding_program_trial_relationship')
)
JOIN project b ON(pr.object_project_id = b.project_id)
WHERE stock.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'cross')
AND sr.stock_relationship_id IS NULL
AND b.name = 'NCSU'
ORDER BY 2;
#Fix single cross with specific ids (adding female and male parent relationships to 19GRKN_BeauregardXCaromex)
BEGIN;
INSERT INTO stock_relationship (subject_id, object_id, type_id) VALUES (469241,467194,76437);
INSERT INTO stock_relationship (subject_id, object_id, type_id) VALUES (468206,467194,76438);
#check that fixes look good
SELECT o.uniquename, cvterm.name, s.uniquename
FROM stock o JOIN stock_relationship sr ON(o.stock_id = sr.object_id)
JOIN cvterm ON(sr.type_id = cvterm_id)
JOIN stock s ON(sr.subject_id = s.stock_id)
WHERE o.stock_id = 467194;
#should return
uniquename | name | uniquename
---------------------------+---------------+------------
19GRKN_BeauregardXCaromex | male_parent | Caromex
19GRKN_BeauregardXCaromex | female_parent | Beauregard
(2 rows)
#then
COMMIT;
# extract parents for remaining parents as strings
SELECT
stock.stock_id,
stock.uniquename,
REGEXP_REPLACE(
split_part(stock.uniquename, '_', 2),
'(.+)[xX]([^xX]*$)',
'\1') AS female_parent,
coalesce((REGEXP_MATCH(
split_part(stock.uniquename, '_', 2),
'.+[xX](?<=[xX])([^xX]*$)'))[1], NULL) as male_parent
FROM stock
LEFT JOIN stock_relationship sr ON(
stock.stock_id = sr.object_id
AND sr.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'female_parent')
)
JOIN nd_experiment_stock s ON(stock.stock_id = s.stock_id)
JOIN nd_experiment_project p ON(s.nd_experiment_id = p.nd_experiment_id)
JOIN project t ON(p.project_id = t.project_id)
JOIN project_relationship pr ON(
t.project_id = pr.subject_project_id AND pr.type_id = (select cvterm_id from cvterm where name = 'breeding_program_trial_relationship')
)
JOIN project b ON(pr.object_project_id = b.project_id)
WHERE stock.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'cross')
AND sr.stock_relationship_id IS NULL
AND b.name = 'NCSU'
ORDER BY 2;
# Fix batch of missing NCSU biparental cross female parent relationships
BEGIN;
INSERT INTO stock_relationship (subject_id, object_id, type_id, value)
SELECT
f.stock_id,
stock.stock_id,
(select cvterm_id from cvterm where name = 'female_parent') as type,
'biparental'
FROM stock
LEFT JOIN stock_relationship sr ON(
stock.stock_id = sr.object_id
AND sr.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'female_parent')
)
JOIN nd_experiment_stock s ON(stock.stock_id = s.stock_id)
JOIN nd_experiment_project p ON(s.nd_experiment_id = p.nd_experiment_id)
JOIN project t ON(p.project_id = t.project_id)
JOIN project_relationship pr ON(
t.project_id = pr.subject_project_id AND pr.type_id = (select cvterm_id from cvterm where name = 'breeding_program_trial_relationship')
)
JOIN project b ON(pr.object_project_id = b.project_id)
JOIN stock f ON(f.uniquename = REGEXP_REPLACE(
split_part(stock.uniquename, '_', 2),
'(.+)[xX]([^xX]*$)',
'\1'))
WHERE stock.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'cross')
AND sr.stock_relationship_id IS NULL
AND b.name = 'NCSU'
ORDER BY 2;
# tweak needed to fix these because female_parents names had been edited to add underscores
stock_id | uniquename | female_parent | male_parent
----------+----------------------------+---------------+-------------
252791 | 17DIV19_NASPOT5xBeauregard | NASPOT5 | Beauregard
252794 | 17DIV22_Yanshu1xBeauregard | Yanshu1 | Beauregard
252813 | 17DIV41_NASPOT5xHernandez | NASPOT5 | Hernandez
252816 | 17DIV44_Yanshu1xHernandez | Yanshu1 | Hernandez
BEGIN;
INSERT INTO stock_relationship (subject_id, object_id, type_id, value)
SELECT
f.stock_id,
stock.stock_id,
(select cvterm_id from cvterm where name = 'female_parent') as type,
'biparental'
FROM stock
LEFT JOIN stock_relationship sr ON(
stock.stock_id = sr.object_id
AND sr.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'female_parent')
)
JOIN nd_experiment_stock s ON(stock.stock_id = s.stock_id)
JOIN nd_experiment_project p ON(s.nd_experiment_id = p.nd_experiment_id)
JOIN project t ON(p.project_id = t.project_id)
JOIN project_relationship pr ON(
t.project_id = pr.subject_project_id AND pr.type_id = (select cvterm_id from cvterm where name = 'breeding_program_trial_relationship')
)
JOIN project b ON(pr.object_project_id = b.project_id)
LEFT JOIN stock f ON(f.uniquename =
substring(REGEXP_REPLACE(
split_part(stock.uniquename, '_', 2),
'(.+)[xX]([^xX]*$)',
'\1') from '^[A-Za-z]+') || '_' ||
substring(REGEXP_REPLACE(
split_part(stock.uniquename, '_', 2),
'(.+)[xX]([^xX]*$)',
'\1') from '[0-9]+$')
)
WHERE stock.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'cross')
AND sr.stock_relationship_id IS NULL
AND b.name = 'NCSU'
ORDER BY 2;
#Fix batch of missing NCSU biparental cross male parent relationships
BEGIN;
INSERT INTO stock_relationship (subject_id, object_id, type_id)
SELECT
m.stock_id,
stock.stock_id,
(select cvterm_id from cvterm where name = 'male_parent') as type
FROM stock
LEFT JOIN stock_relationship sr ON(
stock.stock_id = sr.object_id
AND sr.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'male_parent')
)
JOIN nd_experiment_stock s ON(stock.stock_id = s.stock_id)
JOIN nd_experiment_project p ON(s.nd_experiment_id = p.nd_experiment_id)
JOIN project t ON(p.project_id = t.project_id)
JOIN project_relationship pr ON(
t.project_id = pr.subject_project_id AND pr.type_id = (select cvterm_id from cvterm where name = 'breeding_program_trial_relationship')
)
JOIN project b ON(pr.object_project_id = b.project_id)
JOIN stock m ON(m.uniquename =
CASE WHEN stock.uniquename LIKE '%x' THEN
coalesce((REGEXP_MATCH(
split_part(stock.uniquename, '_', 2),
'.+[X](?<=[X])([^X]*$)'))[1], NULL)
ELSE coalesce((REGEXP_MATCH(
split_part(stock.uniquename, '_', 2),
'.+[xX](?<=[xX])([^xX]*$)'))[1], NULL)
END
)
WHERE stock.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'cross')
AND sr.stock_relationship_id IS NULL
AND b.name = 'NCSU'
ORDER BY 3;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment