Warning: this is my personal messy ad-hoc note. Not intended for production installation. Instruction by @kalinialab is probably better.
- Cent OS 7.0
- CUDA 11.2
- Without root accoumt
AF_ROOT=/path/to/somewhere
cd $AF_ROOT
mkdir tools
TOOLS=$AF_ROOT/tools
git clone https://github.com/deepmind/alphafold.git
First, manually install aria2c to somewhere in PATH.
Fix bugs in the download scripts.
--- a/scripts/download_mgnify.sh
+++ b/scripts/download_mgnify.sh
@@ -35,7 +35,7 @@ SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2018
aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}"
pushd "${ROOT_DIR}"
-gunzip "${ROOT_DIR}/${BASENAME}"
+gunzip "${BASENAME}"
popd
index 061818c..36e1ef5 100755
--- a/scripts/download_uniref90.sh
+++ b/scripts/download_uniref90.sh
@@ -37,5 +37,5 @@ BASENAME=$(basename "${SOURCE_URL}")
aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}"
pushd "${ROOT_DIR}"
-gunzip "${ROOT_DIR}/${BASENAME}"
+gunzip "${BASENAME}"
popd
Also add -x 8
to aria2
if you want parallel download.
Run download scripts.
mkdir DBs
$AF_ROOT/alphafold/scripts/download_bfd.sh DBs &
$AF_ROOT/alphafold/scripts/download_pdb70.sh DBs &
$AF_ROOT/alphafold/scripts/download_uniclust30.sh DBs &
$AF_ROOT/alphafold/scripts/download_alphafold_params.sh DBs &
$AF_ROOT/alphafold/scripts/download_mgnify.sh DBs &
$AF_ROOT/alphafold/scripts/download_pdb_mmcif.sh DBs &
$AF_ROOT/alphafold/scripts/download_uniref90.sh DBs &
wget https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
mv stereo_chemical_props.txt $AF_ROOT/alphafold/alphafold/common
cd $TOOLS
git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git hh-suite-3.3.0
cd hh-suite-3.3.0
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX=$TOOLS/hh-suite ..
make -j12
make install
cd $TOOLS
wget http://eddylab.org/software/hmmer/hmmer-3.3.2.tar.gz
tar xzvf hmmer-3.3.2.tar.gz
cd hmmer-3.3.2
./configure --prefix=$TOOLS/hmmer
make -j12
make install
cd $TOOLS
wget https://github.com/TimoLassmann/kalign/archive/refs/tags/v3.3.1.tar.gz
tar xzvf v3.3.1.tar.gz
cd kalign-3.3.1/
./autogen.sh
./configure --prefix=$TOOLS/kalign # needs GCC >= 4.9; I used 6.2.0
make -j12
make install
warning: after pip install
, conda
becomes unhappy with module version conflicts and won't let you install more packages.
conda create -p $TOOLS/alphafold-conda python=3.8
conda activate $TOOLS/alphafold-conda
conda install -c conda-forge openmm=7.5.1 cudatoolkit==11.2 pdbfixer pip
pip3 install --upgrade pip
vim $AF_ROOT/alphafold/requirements.txt # comment out docker
pip3 install -r ../alphafold/requirements.txt
# My system has CUDA 11.2 but JAX has only up to 11.1
pip3 install --upgrade jax jaxlib==0.1.69+cuda111 -f https://storage.googleapis.com/jax-releases/jax_releases.html
cd $TOOLS/alphafold-conda/lib/python3.8/site-packages
patch -p0 < $AF_ROOT/alphafold/docker/openmm.patch
# Download cuDNN 8.2.1 and put lib*.so* into LD_LIBRARY_PATH
mkdir $TOOLS/cudnn
cp /path/to/cudnn/lib/* $TOOLS/cudnn
bash run.sh --fasta_paths=/absolute/path/to/sequence.fasta # You MUST use an absolute path!
This allows you to skip slow, non-GPU accelerated MSA calculation when the GPU part failed.
You can also use this to run MSA on a CPU-only node, kill the job (or add exit
to the code) and then continue on a GPU node.
--- a/run_alphafold.py
+++ b/run_alphafold.py
@@ -112,15 +112,20 @@ def predict_structure(
# Get features.
t_0 = time.time()
- feature_dict = data_pipeline.process(
- input_fasta_path=fasta_path,
- msa_output_dir=msa_output_dir)
- timings['features'] = time.time() - t_0
-
- # Write out features as a pickled dictionary.
features_output_path = os.path.join(output_dir, 'features.pkl')
- with open(features_output_path, 'wb') as f:
- pickle.dump(feature_dict, f, protocol=4)
+ if os.path.exists(features_output_path):
+ feature_dict = pickle.load(open(features_output_path, 'rb'))
+
+ else:
+ feature_dict = data_pipeline.process(
+ input_fasta_path=fasta_path,
+ msa_output_dir=msa_output_dir)
+ msa_output_dir=msa_output_dir)
+
+ # Write out features as a pickled dictionary.
+ with open(features_output_path, 'wb') as f:
+ pickle.dump(feature_dict, f, protocol=4)
+
+ timings['features'] = time.time() - t_0
relaxed_pdbs = {}
plddts = {}
This allows you to run AlphaFold2 from arbitrary directories, without changing the current directory to the program directory.
--- a/alphafold/common/residue_constants.py
+++ b/alphafold/common/residue_constants.py
@@ -402,8 +402,9 @@ def load_stereo_chemical_props() -> Tuple[Mapping[str, List[Bond]],
residue_virtual_bonds: dict that maps resname --> list of Bond tuples
residue_bond_angles: dict that maps resname --> list of BondAngle tuples
"""
- stereo_chemical_props_path = (
- 'alphafold/common/stereo_chemical_props.txt')
+ import os.path
+ stereo_chemical_props_path = (os.path.join(os.path.dirname(__file__ ),
+ 'stereo_chemical_props.txt'))
with open(stereo_chemical_props_path, 'rt') as f:
stereo_chemical_props = f.read()
lines_iter = iter(stereo_chemical_props.splitlines())
On installation of kalign,
./autogen.sh
is required before./configure
cd $TOOLS wget https://github.com/TimoLassmann/kalign/archive/refs/tags/v3.3.1.tar.gz tar xzvf v3.3.1.tar.gz cd kalign-3.3.1/ + ./autogen.sh ./configure --prefix=$TOOLS/kalign # needs GCC >= 4.9; I used 6.2.0 make -j12 make install
Also,
libtoolize
package is required for./autogen.sh
. Typeyum -y install libtool
to install it for CentOS 7.