Created
March 25, 2018 07:09
-
-
Save fo40225/8d4814e08e9fb8a4c4227c98c16c8e9d to your computer and use it in GitHub Desktop.
使用SQL Server過濾出TaiwanBioBank中UCSC exon區域的資料
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
USE [master]; | |
GO | |
CREATE DATABASE [TaiwanBioBank] | |
CONTAINMENT = NONE | |
ON PRIMARY ( | |
NAME = N'TaiwanBioBank' | |
, FILENAME = N'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\DATA\TaiwanBioBank.mdf' | |
, SIZE = 8192 KB | |
, FILEGROWTH = 65536 KB | |
) | |
LOG ON ( | |
NAME = N'TaiwanBioBank_log' | |
, FILENAME = N'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\DATA\TaiwanBioBank_log.ldf' | |
, SIZE = 8192 KB | |
, FILEGROWTH = 65536 KB | |
); | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET COMPATIBILITY_LEVEL = 130; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET ANSI_NULL_DEFAULT OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET ANSI_NULLS OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET ANSI_PADDING OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET ANSI_WARNINGS OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET ARITHABORT OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET AUTO_CLOSE OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET AUTO_SHRINK OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET AUTO_CREATE_STATISTICS ON(INCREMENTAL = OFF) | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET AUTO_UPDATE_STATISTICS ON; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET CURSOR_CLOSE_ON_COMMIT OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET CURSOR_DEFAULT GLOBAL; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET CONCAT_NULL_YIELDS_NULL OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET NUMERIC_ROUNDABORT OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET QUOTED_IDENTIFIER OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET RECURSIVE_TRIGGERS OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET DISABLE_BROKER; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET AUTO_UPDATE_STATISTICS_ASYNC OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET DATE_CORRELATION_OPTIMIZATION OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET PARAMETERIZATION SIMPLE; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET READ_COMMITTED_SNAPSHOT OFF; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET READ_WRITE; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET RECOVERY FULL; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET MULTI_USER; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET PAGE_VERIFY CHECKSUM; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET TARGET_RECOVERY_TIME = 60 SECONDS; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET DELAYED_DURABILITY = DISABLED; | |
GO | |
USE [TaiwanBioBank]; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION SET MAXDOP = 0; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET MAXDOP = PRIMARY; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET LEGACY_CARDINALITY_ESTIMATION = PRIMARY; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION SET PARAMETER_SNIFFING = ON; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET PARAMETER_SNIFFING = PRIMARY; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET QUERY_OPTIMIZER_HOTFIXES = PRIMARY; | |
GO | |
USE [TaiwanBioBank]; | |
GO | |
IF NOT EXISTS ( | |
SELECT [name] | |
FROM [sys].[filegroups] | |
WHERE [is_default] = 1 | |
AND [name] = N'PRIMARY' | |
) | |
BEGIN | |
ALTER DATABASE [TaiwanBioBank] MODIFY FILEGROUP [PRIMARY] DEFAULT; | |
END; | |
GO | |
ALTER DATABASE [TaiwanBioBank] SET RECOVERY SIMPLE; | |
GO | |
CREATE TABLE [dbo].[TaiwanBioBank_population_allelefreq.1517] ( | |
[Chrom] NVARCHAR(5) NOT NULL | |
, [Position] [INT] NOT NULL | |
, [dbSNPid] NVARCHAR(256) NOT NULL | |
, [Ref ] NVARCHAR(512) NOT NULL | |
, [AA] [INT] NOT NULL | |
, [AC] [INT] NOT NULL | |
, [AG] [INT] NOT NULL | |
, [AT] [INT] NOT NULL | |
, [CC] [INT] NOT NULL | |
, [CG] [INT] NOT NULL | |
, [CT] [INT] NOT NULL | |
, [GG] [INT] NOT NULL | |
, [GT] [INT] NOT NULL | |
, [TT] [INT] NOT NULL | |
, [Others] [INT] NOT NULL | |
, [AlleleFreq] NVARCHAR(MAX) NOT NULL | |
, [VarType] NVARCHAR(4) NOT NULL | |
) | |
ON [PRIMARY]; | |
GO | |
DECLARE @bulk_cmd NVARCHAR(4000); | |
SET @bulk_cmd = 'BULK INSERT [TaiwanBioBank_population_allelefreq.1517] | |
FROM ''C:\Users\User\Downloads\TaiwanBioBank_population_allelefreq.1517.tsv'' | |
WITH ( | |
FIRSTROW = 2, | |
FIELDTERMINATOR = ''\t'', | |
ROWTERMINATOR = '''+CHAR(10)+''' | |
)'; | |
EXEC (@bulk_cmd); | |
GO | |
CREATE NONCLUSTERED INDEX [IX_TaiwanBioBank_population_allelefreq.1517] | |
ON [dbo].[TaiwanBioBank_population_allelefreq.1517] ( | |
[Position] ASC | |
, [Chrom] ASC | |
, [Ref ] ASC | |
) | |
INCLUDE ( | |
[AlleleFreq] | |
) | |
WITH ( | |
PAD_INDEX = OFF | |
, STATISTICS_NORECOMPUTE = OFF | |
, SORT_IN_TEMPDB = OFF | |
, DROP_EXISTING = OFF | |
, ONLINE = OFF | |
, ALLOW_ROW_LOCKS = ON | |
, ALLOW_PAGE_LOCKS = ON | |
) | |
ON [PRIMARY]; | |
GO | |
CREATE TABLE [dbo].[Whole exome-UCSC_Main_on_Human__refGene_(genome)] ( | |
[chrom] NVARCHAR(32) NOT NULL | |
, [chromStart] [INT] NOT NULL | |
, [chromEnd] [INT] NOT NULL | |
, [name] NVARCHAR(64) NOT NULL | |
, [score] [INT] NOT NULL | |
, [strand] NVARCHAR(2) NOT NULL | |
) | |
ON [PRIMARY]; | |
GO | |
DECLARE @bulk_cmd NVARCHAR(4000); | |
SET @bulk_cmd = 'BULK INSERT [Whole exome-UCSC_Main_on_Human__refGene_(genome)] | |
FROM ''C:\Users\User\Downloads\Whole exome-[UCSC_Main_on_Human__refGene_(genome)].bed'' | |
WITH ( | |
FIRSTROW = 1, | |
FIELDTERMINATOR = ''\t'', | |
ROWTERMINATOR = '''+CHAR(10)+''' | |
)'; | |
EXEC (@bulk_cmd); | |
GO | |
CREATE NONCLUSTERED INDEX [IX_Whole exome-UCSC_Main_on_Human__refGene_(genome)] | |
ON [dbo].[Whole exome-UCSC_Main_on_Human__refGene_(genome)] ( | |
[chromStart] ASC | |
, [chromEnd] ASC | |
, [chrom] ASC | |
) | |
WITH ( | |
PAD_INDEX = OFF | |
, STATISTICS_NORECOMPUTE = OFF | |
, SORT_IN_TEMPDB = OFF | |
, DROP_EXISTING = OFF | |
, ONLINE = OFF | |
, ALLOW_ROW_LOCKS = ON | |
, ALLOW_PAGE_LOCKS = ON | |
) | |
ON [PRIMARY]; | |
GO | |
CREATE TABLE [dbo].[TaiwanBioBankWholeExome] ( | |
[Chrom] NVARCHAR(5) NOT NULL | |
, [Position] [INT] NOT NULL | |
, [Ref] NVARCHAR(512) NOT NULL | |
, [Alt] NVARCHAR(MAX) NOT NULL | |
, [VarType] NVARCHAR(3) NOT NULL | |
, [Rate] FLOAT NOT NULL | |
) | |
ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]; | |
GO | |
CREATE CLUSTERED INDEX [CX_TaiwanBioBankWholeExome] | |
ON [dbo].[TaiwanBioBankWholeExome] ( | |
[Chrom] ASC | |
, [Position] ASC | |
, [VarType] ASC | |
, [Rate] DESC | |
) | |
WITH ( | |
PAD_INDEX = OFF | |
, STATISTICS_NORECOMPUTE = OFF | |
, SORT_IN_TEMPDB = OFF | |
, DROP_EXISTING = OFF | |
, ONLINE = OFF | |
, ALLOW_ROW_LOCKS = ON | |
, ALLOW_PAGE_LOCKS = ON | |
) | |
ON [PRIMARY]; | |
GO | |
INSERT INTO [dbo].[TaiwanBioBankWholeExome] | |
SELECT DISTINCT | |
[a].[Chrom] | |
, [a].[Position] | |
, [a].[Ref] | |
, SUBSTRING(value, 1, CHARINDEX(':', value)-1) AS [Alt] | |
, [a].[VarType] | |
, CAST(SUBSTRING(value, CHARINDEX(':', value)+1, LEN(value)-CHARINDEX(':', value)) AS FLOAT) AS [Rate] | |
FROM [dbo].[TaiwanBioBank_population_allelefreq.1517] AS [a] | |
CROSS APPLY [STRING_SPLIT]([a].[AlleleFreq], ' ') | |
INNER JOIN [dbo].[Whole exome-UCSC_Main_on_Human__refGene_(genome)] AS [b] | |
ON [a].[Chrom] = [b].[chrom] | |
AND [a].[Position] >= [b].[chromStart] | |
AND [a].[Position] < [b].[chromEnd]; | |
GO | |
DBCC SHRINKFILE(TaiwanBioBank, 1); | |
DBCC SHRINKFILE(TaiwanBioBank_log, 1); | |
GO |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment