Skip to content

Instantly share code, notes, and snippets.

@fo40225
Created March 25, 2018 07:09
Show Gist options
  • Save fo40225/8d4814e08e9fb8a4c4227c98c16c8e9d to your computer and use it in GitHub Desktop.
Save fo40225/8d4814e08e9fb8a4c4227c98c16c8e9d to your computer and use it in GitHub Desktop.
使用SQL Server過濾出TaiwanBioBank中UCSC exon區域的資料
USE [master];
GO
CREATE DATABASE [TaiwanBioBank]
CONTAINMENT = NONE
ON PRIMARY (
NAME = N'TaiwanBioBank'
, FILENAME = N'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\DATA\TaiwanBioBank.mdf'
, SIZE = 8192 KB
, FILEGROWTH = 65536 KB
)
LOG ON (
NAME = N'TaiwanBioBank_log'
, FILENAME = N'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\DATA\TaiwanBioBank_log.ldf'
, SIZE = 8192 KB
, FILEGROWTH = 65536 KB
);
GO
ALTER DATABASE [TaiwanBioBank] SET COMPATIBILITY_LEVEL = 130;
GO
ALTER DATABASE [TaiwanBioBank] SET ANSI_NULL_DEFAULT OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET ANSI_NULLS OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET ANSI_PADDING OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET ANSI_WARNINGS OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET ARITHABORT OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET AUTO_CLOSE OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET AUTO_SHRINK OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET AUTO_CREATE_STATISTICS ON(INCREMENTAL = OFF)
GO
ALTER DATABASE [TaiwanBioBank] SET AUTO_UPDATE_STATISTICS ON;
GO
ALTER DATABASE [TaiwanBioBank] SET CURSOR_CLOSE_ON_COMMIT OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET CURSOR_DEFAULT GLOBAL;
GO
ALTER DATABASE [TaiwanBioBank] SET CONCAT_NULL_YIELDS_NULL OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET NUMERIC_ROUNDABORT OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET QUOTED_IDENTIFIER OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET RECURSIVE_TRIGGERS OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET DISABLE_BROKER;
GO
ALTER DATABASE [TaiwanBioBank] SET AUTO_UPDATE_STATISTICS_ASYNC OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET DATE_CORRELATION_OPTIMIZATION OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET PARAMETERIZATION SIMPLE;
GO
ALTER DATABASE [TaiwanBioBank] SET READ_COMMITTED_SNAPSHOT OFF;
GO
ALTER DATABASE [TaiwanBioBank] SET READ_WRITE;
GO
ALTER DATABASE [TaiwanBioBank] SET RECOVERY FULL;
GO
ALTER DATABASE [TaiwanBioBank] SET MULTI_USER;
GO
ALTER DATABASE [TaiwanBioBank] SET PAGE_VERIFY CHECKSUM;
GO
ALTER DATABASE [TaiwanBioBank] SET TARGET_RECOVERY_TIME = 60 SECONDS;
GO
ALTER DATABASE [TaiwanBioBank] SET DELAYED_DURABILITY = DISABLED;
GO
USE [TaiwanBioBank];
GO
ALTER DATABASE SCOPED CONFIGURATION SET MAXDOP = 0;
GO
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET MAXDOP = PRIMARY;
GO
ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF;
GO
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET LEGACY_CARDINALITY_ESTIMATION = PRIMARY;
GO
ALTER DATABASE SCOPED CONFIGURATION SET PARAMETER_SNIFFING = ON;
GO
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET PARAMETER_SNIFFING = PRIMARY;
GO
ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF;
GO
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET QUERY_OPTIMIZER_HOTFIXES = PRIMARY;
GO
USE [TaiwanBioBank];
GO
IF NOT EXISTS (
SELECT [name]
FROM [sys].[filegroups]
WHERE [is_default] = 1
AND [name] = N'PRIMARY'
)
BEGIN
ALTER DATABASE [TaiwanBioBank] MODIFY FILEGROUP [PRIMARY] DEFAULT;
END;
GO
ALTER DATABASE [TaiwanBioBank] SET RECOVERY SIMPLE;
GO
CREATE TABLE [dbo].[TaiwanBioBank_population_allelefreq.1517] (
[Chrom] NVARCHAR(5) NOT NULL
, [Position] [INT] NOT NULL
, [dbSNPid] NVARCHAR(256) NOT NULL
, [Ref ] NVARCHAR(512) NOT NULL
, [AA] [INT] NOT NULL
, [AC] [INT] NOT NULL
, [AG] [INT] NOT NULL
, [AT] [INT] NOT NULL
, [CC] [INT] NOT NULL
, [CG] [INT] NOT NULL
, [CT] [INT] NOT NULL
, [GG] [INT] NOT NULL
, [GT] [INT] NOT NULL
, [TT] [INT] NOT NULL
, [Others] [INT] NOT NULL
, [AlleleFreq] NVARCHAR(MAX) NOT NULL
, [VarType] NVARCHAR(4) NOT NULL
)
ON [PRIMARY];
GO
DECLARE @bulk_cmd NVARCHAR(4000);
SET @bulk_cmd = 'BULK INSERT [TaiwanBioBank_population_allelefreq.1517]
FROM ''C:\Users\User\Downloads\TaiwanBioBank_population_allelefreq.1517.tsv''
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = ''\t'',
ROWTERMINATOR = '''+CHAR(10)+'''
)';
EXEC (@bulk_cmd);
GO
CREATE NONCLUSTERED INDEX [IX_TaiwanBioBank_population_allelefreq.1517]
ON [dbo].[TaiwanBioBank_population_allelefreq.1517] (
[Position] ASC
, [Chrom] ASC
, [Ref ] ASC
)
INCLUDE (
[AlleleFreq]
)
WITH (
PAD_INDEX = OFF
, STATISTICS_NORECOMPUTE = OFF
, SORT_IN_TEMPDB = OFF
, DROP_EXISTING = OFF
, ONLINE = OFF
, ALLOW_ROW_LOCKS = ON
, ALLOW_PAGE_LOCKS = ON
)
ON [PRIMARY];
GO
CREATE TABLE [dbo].[Whole exome-UCSC_Main_on_Human__refGene_(genome)] (
[chrom] NVARCHAR(32) NOT NULL
, [chromStart] [INT] NOT NULL
, [chromEnd] [INT] NOT NULL
, [name] NVARCHAR(64) NOT NULL
, [score] [INT] NOT NULL
, [strand] NVARCHAR(2) NOT NULL
)
ON [PRIMARY];
GO
DECLARE @bulk_cmd NVARCHAR(4000);
SET @bulk_cmd = 'BULK INSERT [Whole exome-UCSC_Main_on_Human__refGene_(genome)]
FROM ''C:\Users\User\Downloads\Whole exome-[UCSC_Main_on_Human__refGene_(genome)].bed''
WITH (
FIRSTROW = 1,
FIELDTERMINATOR = ''\t'',
ROWTERMINATOR = '''+CHAR(10)+'''
)';
EXEC (@bulk_cmd);
GO
CREATE NONCLUSTERED INDEX [IX_Whole exome-UCSC_Main_on_Human__refGene_(genome)]
ON [dbo].[Whole exome-UCSC_Main_on_Human__refGene_(genome)] (
[chromStart] ASC
, [chromEnd] ASC
, [chrom] ASC
)
WITH (
PAD_INDEX = OFF
, STATISTICS_NORECOMPUTE = OFF
, SORT_IN_TEMPDB = OFF
, DROP_EXISTING = OFF
, ONLINE = OFF
, ALLOW_ROW_LOCKS = ON
, ALLOW_PAGE_LOCKS = ON
)
ON [PRIMARY];
GO
CREATE TABLE [dbo].[TaiwanBioBankWholeExome] (
[Chrom] NVARCHAR(5) NOT NULL
, [Position] [INT] NOT NULL
, [Ref] NVARCHAR(512) NOT NULL
, [Alt] NVARCHAR(MAX) NOT NULL
, [VarType] NVARCHAR(3) NOT NULL
, [Rate] FLOAT NOT NULL
)
ON [PRIMARY] TEXTIMAGE_ON [PRIMARY];
GO
CREATE CLUSTERED INDEX [CX_TaiwanBioBankWholeExome]
ON [dbo].[TaiwanBioBankWholeExome] (
[Chrom] ASC
, [Position] ASC
, [VarType] ASC
, [Rate] DESC
)
WITH (
PAD_INDEX = OFF
, STATISTICS_NORECOMPUTE = OFF
, SORT_IN_TEMPDB = OFF
, DROP_EXISTING = OFF
, ONLINE = OFF
, ALLOW_ROW_LOCKS = ON
, ALLOW_PAGE_LOCKS = ON
)
ON [PRIMARY];
GO
INSERT INTO [dbo].[TaiwanBioBankWholeExome]
SELECT DISTINCT
[a].[Chrom]
, [a].[Position]
, [a].[Ref]
, SUBSTRING(value, 1, CHARINDEX(':', value)-1) AS [Alt]
, [a].[VarType]
, CAST(SUBSTRING(value, CHARINDEX(':', value)+1, LEN(value)-CHARINDEX(':', value)) AS FLOAT) AS [Rate]
FROM [dbo].[TaiwanBioBank_population_allelefreq.1517] AS [a]
CROSS APPLY [STRING_SPLIT]([a].[AlleleFreq], ' ')
INNER JOIN [dbo].[Whole exome-UCSC_Main_on_Human__refGene_(genome)] AS [b]
ON [a].[Chrom] = [b].[chrom]
AND [a].[Position] >= [b].[chromStart]
AND [a].[Position] < [b].[chromEnd];
GO
DBCC SHRINKFILE(TaiwanBioBank, 1);
DBCC SHRINKFILE(TaiwanBioBank_log, 1);
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment