rndparr · August 20, 2020 02:13
diff --git a/Mining of Massive Datasets Bookmarks.md b/Mining of Massive Datasets Bookmarks.md
diff --git a/out.tex b/out.tex
 % Usage:
 % * download the full book from: http://infolab.stanford.edu/~ullman/mmds/book0n.pdf
 % * store this file as 'out.tex', and compile as 'pdflatex out.tex'
 % * rename output file to e.g.
 %   'Mining of Massive Datasets - Jure Leskovec, Anand Rajaraman, Jeffrey Ullman.pdf'

 \documentclass{article}
 \usepackage[utf8]{inputenc}

 \usepackage{geometry}

 \usepackage{pdfpages}
 \usepackage[
  pdfpagelabels=true,
  pdftitle={Mining of Massive Datasets},
  pdfauthor={Jure Leskovec, Anand Rajaraman, Jeffrey D. Ullman},
  pdfsubject={Machine Learning},
  pdfkeywords={machine learning},
  unicode=true,
 ]{hyperref}
 \usepackage{bookmark}

 \begin{document}


 \pagenumbering{arabic}
 \setcounter{page}{1}
 \includepdf[pages=1-]{book0n.pdf}

 \bookmark[page=1,level=0]{Cover}
 \bookmark[page=3,level=0]{Preface}
 \bookmark[page=7,level=0]{Table of Contents}

 \bookmark[page=21,level=0]{1 Data Mining}
 \bookmark[page=21,level=1]{1.1 What is Data Mining?}
 \bookmark[page=21,level=2]{1.1.1 Modeling}
 \bookmark[page=22,level=2]{1.1.2 Statistical Modeling}
 \bookmark[page=22,level=2]{1.1.3 Machine Learning}
 \bookmark[page=23,level=2]{1.1.4 Computational Approaches to Modeling}
 \bookmark[page=24,level=2]{1.1.5 Summarization}
 \bookmark[page=25,level=2]{1.1.6 Feature Extraction}
 \bookmark[page=25,level=1]{1.2 Statistical Limits on Data Mining}
 \bookmark[page=26,level=2]{1.2.1 Total Information Awareness}
 \bookmark[page=26,level=2]{1.2.2 Bonferroni’s Principle}
 \bookmark[page=27,level=2]{1.2.3 An Example of Bonferroni’s Principle}
 \bookmark[page=28,level=3]{1.2.4 Exercises for Section 1.2}
 \bookmark[page=28,level=1]{1.3 Things Useful to Know}
 \bookmark[page=29,level=2]{1.3.1 Importance of Words in Documents}
 \bookmark[page=30,level=2]{1.3.2 Hash Functions}
 \bookmark[page=31,level=2]{1.3.3 Indexes}
 \bookmark[page=33,level=2]{1.3.4 Secondary Storage}
 \bookmark[page=33,level=2]{1.3.5 The Base of Natural Logarithms}
 \bookmark[page=34,level=2]{1.3.6 Power Laws}
 \bookmark[page=36,level=3]{1.3.7 Exercises for Section 1.3}
 \bookmark[page=37,level=1]{1.4 Outline of the Book}
 \bookmark[page=39,level=1]{1.5 Summary of Chapter 1}
 \bookmark[page=39,level=1]{1.6 References for Chapter 1}
 \bookmark[page=41,level=0]{2 MapReduce and the New Software Stack}
 \bookmark[page=42,level=1]{2.1 Distributed File Systems}
 \bookmark[page=42,level=2]{2.1.1 Physical Organization of Compute Nodes}
 \bookmark[page=44,level=2]{2.1.2 Large-Scale File-System Organization}
 \bookmark[page=45,level=1]{2.2 MapReduce}
 \bookmark[page=45,level=2]{2.2.1 The Map Tasks}
 \bookmark[page=46,level=2]{2.2.2 Grouping by Key}
 \bookmark[page=47,level=2]{2.2.3 The Reduce Tasks}
 \bookmark[page=47,level=2]{2.2.4 Combiners}
 \bookmark[page=48,level=2]{2.2.5 Details of MapReduce Execution}
 \bookmark[page=50,level=2]{2.2.6 Coping With Node Failures}
 \bookmark[page=50,level=3]{2.2.7 Exercises for Section 2.2}
 \bookmark[page=50,level=1]{2.3 Algorithms Using MapReduce}
 \bookmark[page=51,level=2]{2.3.1 Matrix-Vector Multiplication by MapReduce}
 \bookmark[page=52,level=2]{2.3.2 If the Vector v Cannot Fit in Main Memory}
 \bookmark[page=53,level=2]{2.3.3 Relational-Algebra Operations}
 \bookmark[page=55,level=2]{2.3.4 Computing Selections by MapReduce}
 \bookmark[page=56,level=2]{2.3.5 Computing Projections by MapReduce}
 \bookmark[page=56,level=2]{2.3.6 Union, Intersection, and Diﬀerence by MapReduce}
 \bookmark[page=57,level=2]{2.3.7 Computing Natural Join by MapReduce}
 \bookmark[page=58,level=2]{2.3.8 Grouping and Aggregation by MapReduce}
 \bookmark[page=58,level=2]{2.3.9 Matrix Multiplication}
 \bookmark[page=59,level=2]{2.3.10 Matrix Multiplication with One MapReduce Step}
 \bookmark[page=60,level=3]{2.3.11 Exercises for Section 2.3}
 \bookmark[page=61,level=1]{2.4 Extensions to MapReduce}
 \bookmark[page=62,level=2]{2.4.1 Workﬂow Systems}
 \bookmark[page=63,level=2]{2.4.2 Spark}
 \bookmark[page=66,level=2]{2.4.3 Spark Implementation}
 \bookmark[page=68,level=2]{2.4.4 TensorFlow}
 \bookmark[page=69,level=2]{2.4.5 Recursive Extensions to MapReduce}
 \bookmark[page=71,level=2]{2.4.6 Bulk-Synchronous Systems}
 \bookmark[page=73,level=3]{2.4.7 Exercises for Section 2.4}
 \bookmark[page=73,level=1]{2.5 The Communication-Cost Model}
 \bookmark[page=73,level=2]{2.5.1 Communication Cost for Task Networks}
 \bookmark[page=75,level=2]{2.5.2 Wall-Clock Time}
 \bookmark[page=76,level=2]{2.5.3 Multiway Joins}
 \bookmark[page=79,level=3]{2.5.4 Exercises for Section 2.5}
 \bookmark[page=81,level=1]{2.6 Complexity Theory for MapReduce}
 \bookmark[page=81,level=2]{2.6.1 Reducer Size and Replication Rate}
 \bookmark[page=82,level=2]{2.6.2 An Example: Similarity Joins}
 \bookmark[page=84,level=2]{2.6.3 A Graph Model for MapReduce Problems}
 \bookmark[page=85,level=2]{2.6.4 Mapping Schemas}
 \bookmark[page=87,level=2]{2.6.5 When Not All Inputs Are Present}
 \bookmark[page=88,level=2]{2.6.6 Lower Bounds on Replication Rate}
 \bookmark[page=89,level=2]{2.6.7 Case Study: Matrix Multiplication}
 \bookmark[page=93,level=3]{2.6.8 Exercises for Section 2.6}
 \bookmark[page=94,level=1]{2.7 Summary of Chapter 2}
 \bookmark[page=97,level=1]{2.8 References for Chapter 2}
 \bookmark[page=101,level=0]{3 Finding Similar Items}
 \bookmark[page=102,level=1]{3.1 Applications of Set Similarity}
 \bookmark[page=102,level=2]{3.1.1 Jaccard Similarity of Sets}
 \bookmark[page=102,level=2]{3.1.2 Similarity of Documents}
 \bookmark[page=104,level=2]{3.1.3 Collaborative Filtering as a Similar-Sets Problem}
 \bookmark[page=106,level=3]{3.1.4 Exercises for Section 3.1}
 \bookmark[page=106,level=1]{3.2 Shingling of Documents}
 \bookmark[page=106,level=2]{3.2.1 k-Shingles}
 \bookmark[page=107,level=2]{3.2.2 Choosing the Shingle Size}
 \bookmark[page=107,level=2]{3.2.3 Hashing Shingles}
 \bookmark[page=108,level=2]{3.2.4 Shingles Built from Words}
 \bookmark[page=108,level=3]{3.2.5 Exercises for Section 3.2}
 \bookmark[page=109,level=1]{3.3 Similarity-Preserving Summaries of Sets}
 \bookmark[page=109,level=2]{3.3.1 Matrix Representation of Sets}
 \bookmark[page=110,level=2]{3.3.2 Minhashing}
 \bookmark[page=111,level=2]{3.3.3 Minhashing and Jaccard Similarity}
 \bookmark[page=111,level=2]{3.3.4 Minhash Signatures}
 \bookmark[page=112,level=2]{3.3.5 Computing Minhash Signatures in Practice}
 \bookmark[page=114,level=2]{3.3.6 Speeding Up Minhashing}
 \bookmark[page=115,level=2]{3.3.7 Speedup Using Hash Functions}
 \bookmark[page=118,level=3]{3.3.8 Exercises for Section 3.3}
 \bookmark[page=119,level=1]{3.4 Locality-Sensitive Hashing for Documents}
 \bookmark[page=120,level=2]{3.4.1 LSH for Minhash Signatures}
 \bookmark[page=121,level=2]{3.4.2 Analysis of the Banding Technique}
 \bookmark[page=123,level=2]{3.4.3 Combining the Techniques}
 \bookmark[page=124,level=3]{3.4.4 Exercises for Section 3.4}
 \bookmark[page=124,level=1]{3.5 Distance Measures}
 \bookmark[page=125,level=2]{3.5.1 Deﬁnition of a Distance Measure}
 \bookmark[page=125,level=2]{3.5.2 Euclidean Distances}
 \bookmark[page=126,level=2]{3.5.3 Jaccard Distance}
 \bookmark[page=127,level=2]{3.5.4 Cosine Distance}
 \bookmark[page=128,level=2]{3.5.5 Edit Distance}
 \bookmark[page=129,level=2]{3.5.6 Hamming Distance}
 \bookmark[page=129,level=3]{3.5.7 Exercises for Section 3.5}
 \bookmark[page=131,level=1]{3.6 The Theory of Locality-Sensitive Functions}
 \bookmark[page=132,level=2]{3.6.1 Locality-Sensitive Functions}
 \bookmark[page=132,level=2]{3.6.2 Locality-Sensitive Families for Jaccard Distance}
 \bookmark[page=133,level=2]{3.6.3 Amplifying a Locality-Sensitive Family}
 \bookmark[page=136,level=3]{3.6.4 Exercises for Section 3.6}
 \bookmark[page=136,level=1]{3.7 LSH Families for Other Distance Measures}
 \bookmark[page=137,level=2]{3.7.1 LSH Families for Hamming Distance}
 \bookmark[page=137,level=2]{3.7.2 Random Hyperplanes and the Cosine Distance}
 \bookmark[page=139,level=2]{3.7.3 Sketches}
 \bookmark[page=139,level=2]{3.7.4 LSH Families for Euclidean Distance}
 \bookmark[page=141,level=2]{3.7.5 More LSH Families for Euclidean Spaces}
 \bookmark[page=141,level=3]{3.7.6 Exercises for Section 3.7}
 \bookmark[page=142,level=1]{3.8 Applications of Locality-Sensitive Hashing}
 \bookmark[page=143,level=2]{3.8.1 Entity Resolution}
 \bookmark[page=143,level=2]{3.8.2 An Entity-Resolution Example}
 \bookmark[page=144,level=2]{3.8.3 Validating Record Matches}
 \bookmark[page=145,level=2]{3.8.4 Matching Fingerprints}
 \bookmark[page=146,level=2]{3.8.5 A LSH Family for Fingerprint Matching}
 \bookmark[page=148,level=2]{3.8.6 Similar News Articles}
 \bookmark[page=149,level=3]{3.8.7 Exercises for Section 3.8}
 \bookmark[page=150,level=1]{3.9 Methods for High Degrees of Similarity}
 \bookmark[page=150,level=2]{3.9.1 Finding Identical Items}
 \bookmark[page=151,level=2]{3.9.2 Representing Sets as Strings}
 \bookmark[page=151,level=2]{3.9.3 Length-Based Filtering}
 \bookmark[page=152,level=2]{3.9.4 Preﬁx Indexing}
 \bookmark[page=153,level=2]{3.9.5 Using Position Information}
 \bookmark[page=155,level=2]{3.9.6 Using Position and Length in Indexes}
 \bookmark[page=157,level=3]{3.9.7 Exercises for Section 3.9}
 \bookmark[page=158,level=1]{3.10 Summary of Chapter 3}
 \bookmark[page=161,level=1]{3.11 References for Chapter 3}
 \bookmark[page=163,level=0]{4 Mining Data Streams}
 \bookmark[page=163,level=1]{4.1 The Stream Data Model}
 \bookmark[page=164,level=2]{4.1.1 A Data-Stream-Management System}
 \bookmark[page=165,level=2]{4.1.2 Examples of Stream Sources}
 \bookmark[page=166,level=2]{4.1.3 Stream Queries}
 \bookmark[page=167,level=2]{4.1.4 Issues in Stream Processing}
 \bookmark[page=168,level=1]{4.2 Sampling Data in a Stream}
 \bookmark[page=168,level=2]{4.2.1 A Motivating Example}
 \bookmark[page=169,level=2]{4.2.2 Obtaining a Representative Sample}
 \bookmark[page=169,level=2]{4.2.3 The General Sampling Problem}
 \bookmark[page=170,level=2]{4.2.4 Varying the Sample Size}
 \bookmark[page=170,level=3]{4.2.5 Exercises for Section 4.2}
 \bookmark[page=171,level=1]{4.3 Filtering Streams}
 \bookmark[page=171,level=2]{4.3.1 A Motivating Example}
 \bookmark[page=172,level=2]{4.3.2 The Bloom Filter}
 \bookmark[page=172,level=2]{4.3.3 Analysis of Bloom Filtering}
 \bookmark[page=173,level=3]{4.3.4 Exercises for Section 4.3}
 \bookmark[page=174,level=1]{4.4 Counting Distinct Elements in a Stream}
 \bookmark[page=174,level=2]{4.4.1 The Count-Distinct Problem}
 \bookmark[page=175,level=2]{4.4.2 The Flajolet-Martin Algorithm}
 \bookmark[page=176,level=2]{4.4.3 Combining Estimates}
 \bookmark[page=176,level=2]{4.4.4 Space Requirements}
 \bookmark[page=177,level=3]{4.4.5 Exercises for Section 4.4}
 \bookmark[page=177,level=1]{4.5 Estimating Moments}
 \bookmark[page=177,level=2]{4.5.1 Deﬁnition of Moments}
 \bookmark[page=178,level=2]{4.5.2 The Alon-Matias-Szegedy Algorithm for Second Moments}
 \bookmark[page=179,level=2]{4.5.3 Why the Alon-Matias-Szegedy Algorithm Works}
 \bookmark[page=180,level=2]{4.5.4 Higher-Order Moments}
 \bookmark[page=180,level=2]{4.5.5 Dealing With Inﬁnite Streams}
 \bookmark[page=181,level=3]{4.5.6 Exercises for Section 4.5}
 \bookmark[page=182,level=1]{4.6 Counting Ones in a Window}
 \bookmark[page=183,level=2]{4.6.1 The Cost of Exact Counts}
 \bookmark[page=183,level=2]{4.6.2 The Datar-Gionis-Indyk-Motwani Algorithm}
 \bookmark[page=185,level=2]{4.6.3 Storage Requirements for the DGIM Algorithm}
 \bookmark[page=185,level=2]{4.6.4 Query Answering in the DGIM Algorithm}
 \bookmark[page=186,level=2]{4.6.5 Maintaining the DGIM Conditions}
 \bookmark[page=187,level=2]{4.6.6 Reducing the Error}
 \bookmark[page=188,level=2]{4.6.7 Extensions to the Counting of Ones}
 \bookmark[page=189,level=3]{4.6.8 Exercises for Section 4.6}
 \bookmark[page=189,level=1]{4.7 Decaying Windows}
 \bookmark[page=189,level=2]{4.7.1 The Problem of Most-Common Elements}
 \bookmark[page=190,level=2]{4.7.2 Deﬁnition of the Decaying Window}
 \bookmark[page=191,level=2]{4.7.3 Finding the Most Popular Elements}
 \bookmark[page=192,level=1]{4.8 Summary of Chapter 4}
 \bookmark[page=193,level=1]{4.9 References for Chapter 4}
 \bookmark[page=195,level=0]{5 Link Analysis}
 \bookmark[page=195,level=1]{5.1 PageRank}
 \bookmark[page=196,level=2]{5.1.1 Early Search Engines and Term Spam}
 \bookmark[page=197,level=2]{5.1.2 Deﬁnition of PageRank}
 \bookmark[page=201,level=2]{5.1.3 Structure of the Web}
 \bookmark[page=202,level=2]{5.1.4 Avoiding Dead Ends}
 \bookmark[page=205,level=2]{5.1.5 Spider Traps and Taxation}
 \bookmark[page=207,level=2]{5.1.6 Using PageRank in a Search Engine}
 \bookmark[page=207,level=3]{5.1.7 Exercises for Section 5.1}
 \bookmark[page=209,level=1]{5.2 Eﬃcient Computation of PageRank}
 \bookmark[page=210,level=2]{5.2.1 Representing Transition Matrices}
 \bookmark[page=211,level=2]{5.2.2 PageRank Iteration Using MapReduce}
 \bookmark[page=211,level=2]{5.2.3 Use of Combiners to Consolidate the Result Vector}
 \bookmark[page=212,level=2]{5.2.4 Representing Blocks of the Transition Matrix}
 \bookmark[page=213,level=2]{5.2.5 Other Eﬃcient Approaches to PageRank Iteration}
 \bookmark[page=215,level=3]{5.2.6 Exercises for Section 5.2}
 \bookmark[page=215,level=1]{5.3 Topic-Sensitive PageRank}
 \bookmark[page=215,level=2]{5.3.1 Motivation for Topic-Sensitive Page Rank}
 \bookmark[page=216,level=2]{5.3.2 Biased Random Walks}
 \bookmark[page=217,level=2]{5.3.3 Using Topic-Sensitive PageRank}
 \bookmark[page=218,level=2]{5.3.4 Inferring Topics from Words}
 \bookmark[page=219,level=3]{5.3.5 Exercises for Section 5.3}
 \bookmark[page=219,level=1]{5.4 Link Spam}
 \bookmark[page=219,level=2]{5.4.1 Architecture of a Spam Farm}
 \bookmark[page=221,level=2]{5.4.2 Analysis of a Spam Farm}
 \bookmark[page=222,level=2]{5.4.3 Combating Link Spam}
 \bookmark[page=222,level=2]{5.4.4 TrustRank}
 \bookmark[page=223,level=2]{5.4.5 Spam Mass}
 \bookmark[page=223,level=3]{5.4.6 Exercises for Section 5.4}
 \bookmark[page=224,level=1]{5.5 Hubs and Authorities}
 \bookmark[page=224,level=2]{5.5.1 The Intuition Behind HITS}
 \bookmark[page=225,level=2]{5.5.2 Formalizing Hubbiness and Authority}
 \bookmark[page=228,level=3]{5.5.3 Exercises for Section 5.5}
 \bookmark[page=228,level=1]{5.6 Summary of Chapter 5}
 \bookmark[page=232,level=1]{5.7 References for Chapter 5}
 \bookmark[page=233,level=0]{6 Frequent Itemsets}
 \bookmark[page=234,level=1]{6.1 The Market-Basket Model}
 \bookmark[page=234,level=2]{6.1.1 Deﬁnition of Frequent Itemsets}
 \bookmark[page=236,level=2]{6.1.2 Applications of Frequent Itemsets}
 \bookmark[page=237,level=2]{6.1.3 Association Rules}
 \bookmark[page=239,level=2]{6.1.4 Finding Association Rules with High Conﬁdence}
 \bookmark[page=239,level=3]{6.1.5 Exercises for Section 6.1}
 \bookmark[page=241,level=1]{6.2 Market Baskets and the A-Priori Algorithm}
 \bookmark[page=241,level=2]{6.2.1 Representation of Market-Basket Data}
 \bookmark[page=242,level=2]{6.2.2 Use of Main Memory for Itemset Counting}
 \bookmark[page=244,level=2]{6.2.3 Monotonicity of Itemsets}
 \bookmark[page=245,level=2]{6.2.4 Tyranny of Counting Pairs}
 \bookmark[page=245,level=2]{6.2.5 The A-Priori Algorithm}
 \bookmark[page=246,level=2]{6.2.6 A-Priori for All Frequent Itemsets}
 \bookmark[page=249,level=3]{6.2.7 Exercises for Section 6.2}
 \bookmark[page=250,level=1]{6.3 Handling Larger Datasets in Main Memory}
 \bookmark[page=250,level=2]{6.3.1 The Algorithm of Park, Chen, and Yu}
 \bookmark[page=252,level=2]{6.3.2 The Multistage Algorithm}
 \bookmark[page=254,level=2]{6.3.3 The Multihash Algorithm}
 \bookmark[page=256,level=3]{6.3.4 Exercises for Section 6.3}
 \bookmark[page=258,level=1]{6.4 Limited-Pass Algorithms}
 \bookmark[page=258,level=2]{6.4.1 The Simple, Randomized Algorithm}
 \bookmark[page=259,level=2]{6.4.2 Avoiding Errors in Sampling Algorithms}
 \bookmark[page=260,level=2]{6.4.3 The Algorithm of Savasere, Omiecinski, and Navathe}
 \bookmark[page=261,level=2]{6.4.4 The SON Algorithm and MapReduce}
 \bookmark[page=262,level=2]{6.4.5 Toivonen’s Algorithm}
 \bookmark[page=263,level=2]{6.4.6 Why Toivonen’s Algorithm Works}
 \bookmark[page=264,level=3]{6.4.7 Exercises for Section 6.4}
 \bookmark[page=264,level=1]{6.5 Counting Frequent Items in a Stream}
 \bookmark[page=265,level=2]{6.5.1 Sampling Methods for Streams}
 \bookmark[page=266,level=2]{6.5.2 Frequent Itemsets in Decaying Windows}
 \bookmark[page=267,level=2]{6.5.3 Hybrid Methods}
 \bookmark[page=267,level=3]{6.5.4 Exercises for Section 6.5}
 \bookmark[page=268,level=1]{6.6 Summary of Chapter 6}
 \bookmark[page=270,level=1]{6.7 References for Chapter 6}
 \bookmark[page=273,level=0]{7 Clustering}
 \bookmark[page=273,level=1]{7.1 Introduction to Clustering Techniques}
 \bookmark[page=273,level=2]{7.1.1 Points, Spaces, and Distances}
 \bookmark[page=275,level=2]{7.1.2 Clustering Strategies}
 \bookmark[page=276,level=2]{7.1.3 The Curse of Dimensionality}
 \bookmark[page=277,level=3]{7.1.4 Exercises for Section 7.1}
 \bookmark[page=277,level=1]{7.2 Hierarchical Clustering}
 \bookmark[page=278,level=2]{7.2.1 Hierarchical Clustering in a Euclidean Space}
 \bookmark[page=280,level=2]{7.2.2 Eﬃciency of Hierarchical Clustering}
 \bookmark[page=281,level=2]{7.2.3 Alternative Rules for Controlling Hierarchical Clustering}
 \bookmark[page=284,level=2]{7.2.4 Hierarchical Clustering in Non-Euclidean Spaces}
 \bookmark[page=285,level=3]{7.2.5 Exercises for Section 7.2}
 \bookmark[page=286,level=1]{7.3 K-means Algorithms}
 \bookmark[page=287,level=2]{7.3.1 K-Means Basics}
 \bookmark[page=287,level=2]{7.3.2 Initializing Clusters for K-Means}
 \bookmark[page=288,level=2]{7.3.3 Picking the Right Value of k}
 \bookmark[page=289,level=2]{7.3.4 The Algorithm of Bradley, Fayyad, and Reina}
 \bookmark[page=291,level=2]{7.3.5 Processing Data in the BFR Algorithm}
 \bookmark[page=294,level=3]{7.3.6 Exercises for Section 7.3}
 \bookmark[page=294,level=1]{7.4 The CURE Algorithm}
 \bookmark[page=295,level=2]{7.4.1 Initialization in CURE}
 \bookmark[page=296,level=2]{7.4.2 Completion of the CURE Algorithm}
 \bookmark[page=297,level=3]{7.4.3 Exercises for Section 7.4}
 \bookmark[page=298,level=1]{7.5 Clustering in Non-Euclidean Spaces}
 \bookmark[page=298,level=2]{7.5.1 Representing Clusters in the GRGPF Algorithm}
 \bookmark[page=299,level=2]{7.5.2 Initializing the Cluster Tree}
 \bookmark[page=300,level=2]{7.5.3 Adding Points in the GRGPF Algorithm}
 \bookmark[page=301,level=2]{7.5.4 Splitting and Merging Clusters}
 \bookmark[page=302,level=3]{7.5.5 Exercises for Section 7.5}
 \bookmark[page=302,level=1]{7.6 Clustering for Streams and Parallelism}
 \bookmark[page=303,level=2]{7.6.1 The Stream-Computing Model}
 \bookmark[page=303,level=2]{7.6.2 A Stream-Clustering Algorithm}
 \bookmark[page=304,level=2]{7.6.3 Initializing Buckets}
 \bookmark[page=304,level=2]{7.6.4 Merging Buckets}
 \bookmark[page=307,level=2]{7.6.5 Answering Queries}
 \bookmark[page=307,level=2]{7.6.6 Clustering in a Parallel Environment}
 \bookmark[page=308,level=3]{7.6.7 Exercises for Section 7.6}
 \bookmark[page=308,level=1]{7.7 Summary of Chapter 7}
 \bookmark[page=312,level=1]{7.8 References for Chapter 7}
 \bookmark[page=313,level=0]{8 Advertising on the Web}
 \bookmark[page=313,level=1]{8.1 Issues in On-Line Advertising}
 \bookmark[page=313,level=2]{8.1.1 Advertising Opportunities}
 \bookmark[page=314,level=2]{8.1.2 Direct Placement of Ads}
 \bookmark[page=315,level=2]{8.1.3 Issues for Display Ads}
 \bookmark[page=316,level=1]{8.2 On-Line Algorithms}
 \bookmark[page=316,level=2]{8.2.1 On-Line and Oﬀ-Line Algorithms}
 \bookmark[page=317,level=2]{8.2.2 Greedy Algorithms}
 \bookmark[page=318,level=2]{8.2.3 The Competitive Ratio}
 \bookmark[page=318,level=3]{8.2.4 Exercises for Section 8.2}
 \bookmark[page=319,level=1]{8.3 The Matching Problem}
 \bookmark[page=319,level=2]{8.3.1 Matches and Perfect Matches}
 \bookmark[page=320,level=2]{8.3.2 The Greedy Algorithm for Maximal Matching}
 \bookmark[page=321,level=2]{8.3.3 Competitive Ratio for Greedy Matching}
 \bookmark[page=322,level=3]{8.3.4 Exercises for Section 8.3}
 \bookmark[page=322,level=1]{8.4 The Adwords Problem}
 \bookmark[page=323,level=2]{8.4.1 History of Search Advertising}
 \bookmark[page=323,level=2]{8.4.2 Deﬁnition of the Adwords Problem}
 \bookmark[page=324,level=2]{8.4.3 The Greedy Approach to the Adwords Problem}
 \bookmark[page=325,level=2]{8.4.4 The Balance Algorithm}
 \bookmark[page=326,level=2]{8.4.5 A Lower Bound on Competitive Ratio for Balance}
 \bookmark[page=328,level=2]{8.4.6 The Balance Algorithm with Many Bidders}
 \bookmark[page=329,level=2]{8.4.7 The Generalized Balance Algorithm}
 \bookmark[page=330,level=2]{8.4.8 Final Observations About the Adwords Problem}
 \bookmark[page=331,level=3]{8.4.9 Exercises for Section 8.4}
 \bookmark[page=331,level=1]{8.5 Adwords Implementation}
 \bookmark[page=332,level=2]{8.5.1 Matching Bids and Search Queries}
 \bookmark[page=332,level=2]{8.5.2 More Complex Matching Problems}
 \bookmark[page=333,level=2]{8.5.3 A Matching Algorithm for Documents and Bids}
 \bookmark[page=335,level=1]{8.6 Summary of Chapter 8}
 \bookmark[page=337,level=1]{8.7 References for Chapter 8}
 \bookmark[page=339,level=0]{9 Recommendation Systems}
 \bookmark[page=339,level=1]{9.1 A Model for Recommendation Systems}
 \bookmark[page=340,level=2]{9.1.1 The Utility Matrix}
 \bookmark[page=341,level=2]{9.1.2 The Long Tail}
 \bookmark[page=341,level=2]{9.1.3 Applications of Recommendation Systems}
 \bookmark[page=343,level=2]{9.1.4 Populating the Utility Matrix}
 \bookmark[page=344,level=1]{9.2 Content-Based Recommendations}
 \bookmark[page=344,level=2]{9.2.1 Item Proﬁles}
 \bookmark[page=345,level=2]{9.2.2 Discovering Features of Documents}
 \bookmark[page=346,level=2]{9.2.3 Obtaining Item Features From Tags}
 \bookmark[page=347,level=2]{9.2.4 Representing Item Proﬁles}
 \bookmark[page=348,level=2]{9.2.5 User Proﬁles}
 \bookmark[page=349,level=2]{9.2.6 Recommending Items to Users Based on Content}
 \bookmark[page=350,level=2]{9.2.7 Classiﬁcation Algorithms}
 \bookmark[page=352,level=3]{9.2.8 Exercises for Section 9.2}
 \bookmark[page=353,level=1]{9.3 Collaborative Filtering}
 \bookmark[page=354,level=2]{9.3.1 Measuring Similarity}
 \bookmark[page=356,level=2]{9.3.2 The Duality of Similarity}
 \bookmark[page=357,level=2]{9.3.3 Clustering Users and Items}
 \bookmark[page=359,level=3]{9.3.4 Exercises for Section 9.3}
 \bookmark[page=360,level=1]{9.4 Dimensionality Reduction}
 \bookmark[page=360,level=2]{9.4.1 UV-Decomposition}
 \bookmark[page=361,level=2]{9.4.2 Root-Mean-Square Error}
 \bookmark[page=362,level=2]{9.4.3 Incremental Computation of a UV-Decomposition}
 \bookmark[page=364,level=2]{9.4.4 Optimizing an Arbitrary Element}
 \bookmark[page=366,level=2]{9.4.5 Building a Complete UV-Decomposition Algorithm}
 \bookmark[page=368,level=3]{9.4.6 Exercises for Section 9.4}
 \bookmark[page=369,level=1]{9.5 The Netﬂix Challenge}
 \bookmark[page=370,level=1]{9.6 Summary of Chapter 9}
 \bookmark[page=372,level=1]{9.7 References for Chapter 9}
 \bookmark[page=375,level=0]{10 Mining Social-Network Graphs}
 \bookmark[page=376,level=1]{10.1 Social Networks as Graphs}
 \bookmark[page=376,level=2]{10.1.1 What is a Social Network?}
 \bookmark[page=376,level=2]{10.1.2 Social Networks as Graphs}
 \bookmark[page=378,level=2]{10.1.3 Varieties of Social Networks}
 \bookmark[page=379,level=2]{10.1.4 Graphs With Several Node Types}
 \bookmark[page=380,level=3]{10.1.5 Exercises for Section 10.1}
 \bookmark[page=381,level=1]{10.2 Clustering of Social-Network Graphs}
 \bookmark[page=381,level=2]{10.2.1 Distance Measures for Social-Network Graphs}
 \bookmark[page=381,level=2]{10.2.2 Applying Standard Clustering Methods}
 \bookmark[page=383,level=2]{10.2.3 Betweenness}
 \bookmark[page=383,level=2]{10.2.4 The Girvan-Newman Algorithm}
 \bookmark[page=386,level=2]{10.2.5 Using Betweenness to Find Communities}
 \bookmark[page=387,level=3]{10.2.6 Exercises for Section 10.2}
 \bookmark[page=388,level=1]{10.3 Direct Discovery of Communities}
 \bookmark[page=389,level=2]{10.3.1 Finding Cliques}
 \bookmark[page=389,level=2]{10.3.2 Complete Bipartite Graphs}
 \bookmark[page=390,level=2]{10.3.3 Finding Complete Bipartite Subgraphs}
 \bookmark[page=391,level=2]{10.3.4 Why Complete Bipartite Graphs Must Exist}
 \bookmark[page=393,level=3]{10.3.5 Exercises for Section 10.3}
 \bookmark[page=394,level=1]{10.4 Partitioning of Graphs}
 \bookmark[page=394,level=2]{10.4.1 What Makes a Good Partition?}
 \bookmark[page=395,level=2]{10.4.2 Normalized Cuts}
 \bookmark[page=395,level=2]{10.4.3 Some Matrices That Describe Graphs}
 \bookmark[page=397,level=2]{10.4.4 Eigenvalues of the Laplacian Matrix}
 \bookmark[page=399,level=2]{10.4.5 Alternative Partitioning Methods}
 \bookmark[page=400,level=3]{10.4.6 Exercises for Section 10.4}
 \bookmark[page=401,level=1]{10.5 Finding Overlapping Communities}
 \bookmark[page=401,level=2]{10.5.1 The Nature of Communities}
 \bookmark[page=401,level=2]{10.5.2 Maximum-Likelihood Estimation}
 \bookmark[page=403,level=2]{10.5.3 The Aﬃliation-Graph Model}
 \bookmark[page=405,level=2]{10.5.4 Discrete Optimization of Community Assignments}
 \bookmark[page=406,level=2]{10.5.5 Avoiding the Use of Discrete Membership Changes}
 \bookmark[page=408,level=3]{10.5.6 Exercises for Section 10.5}
 \bookmark[page=409,level=1]{10.6 Simrank}
 \bookmark[page=410,level=2]{10.6.1 Random Walkers on a Social Graph}
 \bookmark[page=411,level=2]{10.6.2 Random Walks with Restart}
 \bookmark[page=413,level=2]{10.6.3 Approximate Simrank}
 \bookmark[page=415,level=2]{10.6.4 Why Approximate Simrank Works}
 \bookmark[page=416,level=2]{10.6.5 Application of Simrank to Finding Communities}
 \bookmark[page=417,level=3]{10.6.6 Exercises for Section 10.6}
 \bookmark[page=419,level=1]{10.7 Counting Triangles}
 \bookmark[page=419,level=2]{10.7.1 Why Count Triangles?}
 \bookmark[page=420,level=2]{10.7.2 An Algorithm for Finding Triangles}
 \bookmark[page=421,level=2]{10.7.3 Optimality of the Triangle-Finding Algorithm}
 \bookmark[page=421,level=2]{10.7.4 Finding Triangles Using MapReduce}
 \bookmark[page=423,level=2]{10.7.5 Using Fewer Reduce Tasks}
 \bookmark[page=424,level=3]{10.7.6 Exercises for Section 10.7}
 \bookmark[page=425,level=1]{10.8 Neighborhood Properties of Graphs}
 \bookmark[page=425,level=2]{10.8.1 Directed Graphs and Neighborhoods}
 \bookmark[page=426,level=2]{10.8.2 The Diameter of a Graph}
 \bookmark[page=427,level=2]{10.8.3 Transitive Closure and Reachability}
 \bookmark[page=428,level=2]{10.8.4 Reachability Via MapReduce}
 \bookmark[page=429,level=2]{10.8.5 Seminaive Evaluation}
 \bookmark[page=430,level=2]{10.8.6 Linear Transitive Closure}
 \bookmark[page=431,level=2]{10.8.7 Transitive Closure by Recursive Doubling}
 \bookmark[page=433,level=2]{10.8.8 Smart Transitive Closure}
 \bookmark[page=435,level=2]{10.8.9 Comparison of Methods}
 \bookmark[page=437,level=2]{10.8.10Transitive Closure by Graph Reduction}
 \bookmark[page=438,level=2]{10.8.11Approximating the Sizes of Neighborhoods}
 \bookmark[page=440,level=3]{10.8.12Exercises for Section 10.8}
 \bookmark[page=442,level=1]{10.9 Summary of Chapter 10}
 \bookmark[page=445,level=1]{10.10References for Chapter 10}
 \bookmark[page=449,level=0]{11 Dimensionality Reduction}
 \bookmark[page=450,level=1]{11.1 Eigenvalues and Eigenvectors of Symmetric Matrices}
 \bookmark[page=450,level=2]{11.1.1 Deﬁnitions}
 \bookmark[page=451,level=2]{11.1.2 Computing Eigenvalues and Eigenvectors}
 \bookmark[page=452,level=2]{11.1.3 Finding Eigenpairs by Power Iteration}
 \bookmark[page=455,level=2]{11.1.4 The Matrix of Eigenvectors}
 \bookmark[page=455,level=3]{11.1.5 Exercises for Section 11.1}
 \bookmark[page=456,level=1]{11.2 Principal-Component Analysis}
 \bookmark[page=457,level=2]{11.2.1 An Illustrative Example}
 \bookmark[page=460,level=2]{11.2.2 Using Eigenvectors for Dimensionality Reduction}
 \bookmark[page=461,level=2]{11.2.3 The Matrix of Distances}
 \bookmark[page=462,level=3]{11.2.4 Exercises for Section 11.2}
 \bookmark[page=462,level=1]{11.3 Singular-Value Decomposition}
 \bookmark[page=462,level=2]{11.3.1 Deﬁnition of SVD}
 \bookmark[page=464,level=2]{11.3.2 Interpretation of SVD}
 \bookmark[page=466,level=2]{11.3.3 Dimensionality Reduction Using SVD}
 \bookmark[page=467,level=2]{11.3.4 Why Zeroing Low Singular Values Works}
 \bookmark[page=469,level=2]{11.3.5 Querying Using Concepts}
 \bookmark[page=470,level=2]{11.3.6 Computing the SVD of a Matrix}
 \bookmark[page=471,level=3]{11.3.7 Exercises for Section 11.3}
 \bookmark[page=472,level=1]{11.4 CUR Decomposition}
 \bookmark[page=473,level=2]{11.4.1 Deﬁnition of CUR}
 \bookmark[page=474,level=2]{11.4.2 Choosing Rows and Columns Properly}
 \bookmark[page=475,level=2]{11.4.3 Constructing the Middle Matrix}
 \bookmark[page=476,level=2]{11.4.4 The Complete CUR Decomposition}
 \bookmark[page=477,level=2]{11.4.5 Eliminating Duplicate Rows and Columns}
 \bookmark[page=478,level=3]{11.4.6 Exercises for Section 11.4}
 \bookmark[page=478,level=1]{11.5 Summary of Chapter 11}
 \bookmark[page=480,level=1]{11.6 References for Chapter 11}
 \bookmark[page=483,level=0]{12 Large-Scale Machine Learning}
 \bookmark[page=484,level=1]{12.1 The Machine-Learning Model}
 \bookmark[page=484,level=2]{12.1.1 Training Sets}
 \bookmark[page=484,level=2]{12.1.2 Some Illustrative Examples}
 \bookmark[page=487,level=2]{12.1.3 Approaches to Machine Learning}
 \bookmark[page=488,level=2]{12.1.4 Machine-Learning Architecture}
 \bookmark[page=491,level=3]{12.1.5 Exercises for Section 12.1}
 \bookmark[page=491,level=1]{12.2 Perceptrons}
 \bookmark[page=491,level=2]{12.2.1 Training a Perceptron with Zero Threshold}
 \bookmark[page=495,level=2]{12.2.2 Convergence of Perceptrons}
 \bookmark[page=495,level=2]{12.2.3 The Winnow Algorithm}
 \bookmark[page=497,level=2]{12.2.4 Allowing the Threshold to Vary}
 \bookmark[page=499,level=2]{12.2.5 Multiclass Perceptrons}
 \bookmark[page=500,level=2]{12.2.6 Transforming the Training Set}
 \bookmark[page=501,level=2]{12.2.7 Problems With Perceptrons}
 \bookmark[page=502,level=2]{12.2.8 Parallel Implementation of Perceptrons}
 \bookmark[page=504,level=3]{12.2.9 Exercises for Section 12.2}
 \bookmark[page=505,level=1]{12.3 Support-Vector Machines}
 \bookmark[page=506,level=2]{12.3.1 The Mechanics of an SVM}
 \bookmark[page=507,level=2]{12.3.2 Normalizing the Hyperplane}
 \bookmark[page=509,level=2]{12.3.3 Finding Optimal Approximate Separators}
 \bookmark[page=512,level=2]{12.3.4 SVM Solutions by Gradient Descent}
 \bookmark[page=515,level=2]{12.3.5 Stochastic Gradient Descent}
 \bookmark[page=516,level=2]{12.3.6 Parallel Implementation of SVM}
 \bookmark[page=517,level=3]{12.3.7 Exercises for Section 12.3}
 \bookmark[page=517,level=1]{12.4 Learning from Nearest Neighbors}
 \bookmark[page=518,level=2]{12.4.1 The Framework for Nearest-Neighbor Calculations}
 \bookmark[page=518,level=2]{12.4.2 Learning with One Nearest Neighbor}
 \bookmark[page=519,level=2]{12.4.3 Learning One-Dimensional Functions}
 \bookmark[page=522,level=2]{12.4.4 Kernel Regression}
 \bookmark[page=522,level=2]{12.4.5 Dealing with High-Dimensional Euclidean Data}
 \bookmark[page=524,level=2]{12.4.6 Dealing with Non-Euclidean Distances}
 \bookmark[page=524,level=3]{12.4.7 Exercises for Section 12.4}
 \bookmark[page=525,level=1]{12.5 Decision Trees}
 \bookmark[page=526,level=2]{12.5.1 Using a Decision Tree}
 \bookmark[page=527,level=2]{12.5.2 Impurity Measures}
 \bookmark[page=528,level=2]{12.5.3 Designing a Decision-Tree Node}
 \bookmark[page=529,level=2]{12.5.4 Selecting a Test Using a Numerical Feature}
 \bookmark[page=531,level=2]{12.5.5 Selecting a Test Using a Categorical Feature}
 \bookmark[page=533,level=2]{12.5.6 Parallel Design of Decision Trees}
 \bookmark[page=534,level=2]{12.5.7 Node Pruning}
 \bookmark[page=535,level=2]{12.5.8 Decision Forests}
 \bookmark[page=536,level=3]{12.5.9 Exercises for Section 12.5}
 \bookmark[page=537,level=1]{12.6 Comparison of Learning Methods}
 \bookmark[page=538,level=1]{12.7 Summary of Chapter 12}
 \bookmark[page=540,level=1]{12.8 References for Chapter 12}
 \bookmark[page=543,level=0]{13 Neural Nets and Deep Learning}
 \bookmark[page=544,level=1]{13.1 Introduction to Neural Nets}
 \bookmark[page=545,level=2]{13.1.1 Neural Nets, in General}
 \bookmark[page=547,level=2]{13.1.2 Interconnections Among Nodes}
 \bookmark[page=547,level=2]{13.1.3 Convolutional Neural Networks}
 \bookmark[page=548,level=2]{13.1.4 Design Issues for Neural Nets}
 \bookmark[page=548,level=3]{13.1.5 Exercises for Section 13.1}
 \bookmark[page=549,level=1]{13.2 Dense Feedforward Networks}
 \bookmark[page=549,level=2]{13.2.1 Linear Algebra Notation}
 \bookmark[page=551,level=2]{13.2.2 Activation Functions}
 \bookmark[page=552,level=2]{13.2.3 The Sigmoid}
 \bookmark[page=552,level=2]{13.2.4 The Hyperbolic Tangent}
 \bookmark[page=553,level=2]{13.2.5 Softmax}
 \bookmark[page=554,level=2]{13.2.6 Reciﬁed Linear Unit}
 \bookmark[page=555,level=2]{13.2.7 Loss Functions}
 \bookmark[page=556,level=2]{13.2.8 Regression Loss}
 \bookmark[page=557,level=2]{13.2.9 Classiﬁcation Loss}
 \bookmark[page=558,level=3]{13.2.10Exercises for Section 13.2}
 \bookmark[page=559,level=1]{13.3 Backpropagation and Gradient Descent}
 \bookmark[page=560,level=2]{13.3.1 Compute Graphs}
 \bookmark[page=561,level=2]{13.3.2 Gradients, Jacobians, and the Chain Rule}
 \bookmark[page=562,level=2]{13.3.3 The Backpropagation Algorithm}
 \bookmark[page=565,level=2]{13.3.4 Iterating Gradient Descent}
 \bookmark[page=566,level=2]{13.3.5 Tensors}
 \bookmark[page=568,level=3]{13.3.6 Exercises for Section 13.3}
 \bookmark[page=568,level=1]{13.4 Convolutional Neural Networks}
 \bookmark[page=569,level=2]{13.4.1 Convolutional Layers}
 \bookmark[page=572,level=2]{13.4.2 Convolution and Cross-Correlation}
 \bookmark[page=573,level=2]{13.4.3 Pooling Layers}
 \bookmark[page=573,level=2]{13.4.4 CNN Architecture}
 \bookmark[page=575,level=2]{13.4.5 Implementation and Training}
 \bookmark[page=576,level=3]{13.4.6 Exercises for Section 13.4}
 \bookmark[page=577,level=1]{13.5 Recurrent Neural Networks}
 \bookmark[page=579,level=2]{13.5.1 Training RNN’s}
 \bookmark[page=581,level=2]{13.5.2 Vanishing and Exploding Gradients}
 \bookmark[page=582,level=2]{13.5.3 Long Short-Term Memory (LSTM)}
 \bookmark[page=584,level=3]{13.5.4 Exercises for Section 13.5}
 \bookmark[page=585,level=1]{13.6 Regularization}
 \bookmark[page=585,level=2]{13.6.1 Norm Penalties}
 \bookmark[page=586,level=2]{13.6.2 Dropout}
 \bookmark[page=586,level=2]{13.6.3 Early Stopping}
 \bookmark[page=587,level=2]{13.6.4 Dataset Augmentation}
 \bookmark[page=587,level=1]{13.7 Summary of Chapter 13}
 \bookmark[page=589,level=1]{13.8 References for Chapter 13}


 \end{document}
diff --git a/texcode_for_bookmarks.py b/texcode_for_bookmarks.py

 # fitz is PyMuPDF for reasons
 import fitz, re

 filepath = './Desktop/book0n.pdf'
 doc = fitz.open(filepath)

 all_lines = []
 # only read in toc pages
 for page in doc.pages(6,19):
    # straight up all of the lines you'd care about for the TOC are in the same block, other blocks are for headers, footers, title, etc.
    # the one you want is the only one that starts with a digit
    page_lines = list(zip(*page.getText('blocks')))[4]
    all_lines += [line.split('\n') for line in page_lines if line[0].isdigit()]

 # flatten
 all_lines = [j for i in all_lines for j in i]


 # really long contents strings are split between lines
 # if a contents string doesnt end with a page number assume it needs to be combined with the next string in the list
 # this does kick up a warning
 def join_while_not_end_in_num(it):
    it = iter(it)
    while True:
        current = next(it)
        while not current[len(current)-1].isdigit():
            current += ' ' + next(it)
        yield current

 # remave repeats of ' . ', split strings on remaining ' . ' if there's a period in the string or ' ' if there's not (ie a chapter string, which don't have ' . ')
 # this gets a list of tuples: ('contents', 'page_num')
 new_list = [y.split(' . ') if '.' in y else y.rsplit(' ', 1) for y in [re.sub(r"(. +?)\1+", r"\1", x) for x in join_while_not_end_in_num(all_lines)]]


 chapter, old_page = zip(*new_list)

 # numbering stars on pdf page 20, so:
 page = [str(int(x)+20) for x in old_page]

 # get the level by the number of '.'s in the digits at the start of the contents strings
 # d is a chapter, level 0
 # d.d is level 1
 # d.d.d is level 2
 # etc.
 level = [str(''.join(re.findall(r'[0-9]*\.', x)).count('.')) for x in chapter]

 bkmrk_strings = ['\\bookmark[page=' + page[i] + ',level='+ level[i] + ']{'+ chapter[i] +'}' for i in range(len(page))]

 # literally just copied and pasted the output to out.tex
 for string in bkmrk_strings:
    print(string)

	# fitz is PyMuPDF for reasons
	import fitz, re

	filepath = './Desktop/book0n.pdf'
	doc = fitz.open(filepath)

	all_lines = []
	# only read in toc pages
	for page in doc.pages(6,19):
	# straight up all of the lines you'd care about for the TOC are in the same block, other blocks are for headers, footers, title, etc.
	# the one you want is the only one that starts with a digit
	page_lines = list(zip(*page.getText('blocks')))[4]
	all_lines += [line.split('\n') for line in page_lines if line[0].isdigit()]

	# flatten
	all_lines = [j for i in all_lines for j in i]


	# really long contents strings are split between lines
	# if a contents string doesnt end with a page number assume it needs to be combined with the next string in the list
	# this does kick up a warning
	def join_while_not_end_in_num(it):
	it = iter(it)
	while True:
	current = next(it)
	while not current[len(current)-1].isdigit():
	current += ' ' + next(it)
	yield current

	# remave repeats of ' . ', split strings on remaining ' . ' if there's a period in the string or ' ' if there's not (ie a chapter string, which don't have ' . ')
	# this gets a list of tuples: ('contents', 'page_num')
	new_list = [y.split(' . ') if '.' in y else y.rsplit(' ', 1) for y in [re.sub(r"(. +?)\1+", r"\1", x) for x in join_while_not_end_in_num(all_lines)]]


	chapter, old_page = zip(*new_list)

	# numbering stars on pdf page 20, so:
	page = [str(int(x)+20) for x in old_page]

	# get the level by the number of '.'s in the digits at the start of the contents strings
	# d is a chapter, level 0
	# d.d is level 1
	# d.d.d is level 2
	# etc.
	level = [str(''.join(re.findall(r'[0-9]*\.', x)).count('.')) for x in chapter]

	bkmrk_strings = ['\\bookmark[page=' + page[i] + ',level='+ level[i] + ']{'+ chapter[i] +'}' for i in range(len(page))]

	# literally just copied and pasted the output to out.tex
	for string in bkmrk_strings:
	print(string)