Skip to content

Instantly share code, notes, and snippets.

@paul-english
Created November 7, 2024 20:24
Show Gist options
  • Save paul-english/affacc8c5f094b973d1f94286d765eec to your computer and use it in GitHub Desktop.
Save paul-english/affacc8c5f094b973d1f94286d765eec to your computer and use it in GitHub Desktop.
Some bookmarks & research saved from PathVisions 2024
@misc{kalra_yottixel_2019,
title = {Yottixel -- {An} {Image} {Search} {Engine} for {Large} {Archives} of {Histopathology} {Whole} {Slide} {Images}},
url = {http://arxiv.org/abs/1911.08748},
doi = {10.48550/arXiv.1911.08748},
abstract = {With the emergence of digital pathology, searching for similar images in large archives has gained considerable attention. Image retrieval can provide pathologists with unprecedented access to the evidence embodied in already diagnosed and treated cases from the past. This paper proposes a search engine specialized for digital pathology, called Yottixel, a portmanteau for "one yotta pixel," alluding to the big-data nature of histopathology images. The most impressive characteristic of Yottixel is its ability to represent whole slide images (WSIs) in a compact manner. Yottixel can perform millions of searches in real-time with a high search accuracy and low storage profile. Yottixel uses an intelligent indexing algorithm capable of representing WSIs with a mosaic of patches by converting them into a small number of methodically extracted barcodes, called "Bunch of Barcodes" (BoB), the most prominent performance enabler of Yottixel. The performance of the prototype platform is qualitatively tested using 300 WSIs from the University of Pittsburgh Medical Center (UPMC) and 2,020 WSIs from The Cancer Genome Atlas Program (TCGA) provided by the National Cancer Institute. Both datasets amount to more than 4,000,000 patches of 1000x1000 pixels. We report three sets of experiments that show that Yottixel can accurately retrieve organs and malignancies, and its semantic ordering shows good agreement with the subjective evaluation of human observers.},
urldate = {2024-11-07},
publisher = {arXiv},
author = {Kalra, S. and Choi, C. and Shah, S. and Pantanowitz, L. and Tizhoosh, H. R.},
month = nov,
year = {2019},
note = {arXiv:1911.08748},
keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Electrical Engineering and Systems Science - Image and Video Processing},
file = {Preprint PDF:C\:\\Users\\paul\\Zotero\\storage\\57YK94JL\\Kalra et al. - 2019 - Yottixel -- An Image Search Engine for Large Archi.pdf:application/pdf;Snapshot:C\:\\Users\\paul\\Zotero\\storage\\6CI7B6SG\\1911.html:text/html},
}
@misc{campanella_clinical_2024,
title = {A {Clinical} {Benchmark} of {Public} {Self}-{Supervised} {Pathology} {Foundation} {Models}},
url = {http://arxiv.org/abs/2407.06508},
doi = {10.48550/arXiv.2407.06508},
abstract = {The use of self-supervised learning (SSL) to train pathology foundation models has increased substantially in the past few years. Notably, several models trained on large quantities of clinical data have been made publicly available in recent months. This will significantly enhance scientific research in computational pathology and help bridge the gap between research and clinical deployment. With the increase in availability of public foundation models of different sizes, trained using different algorithms on different datasets, it becomes important to establish a benchmark to compare the performance of such models on a variety of clinically relevant tasks spanning multiple organs and diseases. In this work, we present a collection of pathology datasets comprising clinical slides associated with clinically relevant endpoints including cancer diagnoses and a variety of biomarkers generated during standard hospital operation from two medical centers. We leverage these datasets to systematically assess the performance of public pathology foundation models and provide insights into best practices for training new foundation models and selecting appropriate pretrained models.},
urldate = {2024-11-07},
publisher = {arXiv},
author = {Campanella, Gabriele and Chen, Shengjia and Verma, Ruchika and Zeng, Jennifer and Stock, Aryeh and Croken, Matt and Veremis, Brandon and Elmas, Abdulkadir and Huang, Kuan-lin and Kwan, Ricky and Houldsworth, Jane and Schoenfeld, Adam J. and Vanderbilt, Chad},
month = jul,
year = {2024},
note = {arXiv:2407.06508},
keywords = {Computer Science - Computer Vision and Pattern Recognition, Electrical Engineering and Systems Science - Image and Video Processing},
file = {Preprint PDF:C\:\\Users\\paul\\Zotero\\storage\\N5NJ2G4M\\Campanella et al. - 2024 - A Clinical Benchmark of Public Self-Supervised Pat.pdf:application/pdf;Snapshot:C\:\\Users\\paul\\Zotero\\storage\\RJJYU2TS\\2407.html:text/html},
}
@article{lu_visual-language_2024,
title = {A visual-language foundation model for computational pathology},
volume = {30},
copyright = {2024 The Author(s), under exclusive licence to Springer Nature America, Inc.},
issn = {1546-170X},
url = {https://www.nature.com/articles/s41591-024-02856-4},
doi = {10.1038/s41591-024-02856-4},
abstract = {The accelerated adoption of digital pathology and advances in deep learning have enabled the development of robust models for various pathology tasks across a diverse array of diseases and patient cohorts. However, model training is often difficult due to label scarcity in the medical domain, and a model’s usage is limited by the specific task and disease for which it is trained. Additionally, most models in histopathology leverage only image data, a stark contrast to how humans teach each other and reason about histopathologic entities. We introduce CONtrastive learning from Captions for Histopathology (CONCH), a visual-language foundation model developed using diverse sources of histopathology images, biomedical text and, notably, over 1.17 million image–caption pairs through task-agnostic pretraining. Evaluated on a suite of 14 diverse benchmarks, CONCH can be transferred to a wide range of downstream tasks involving histopathology images and/or text, achieving state-of-the-art performance on histology image classification, segmentation, captioning, and text-to-image and image-to-text retrieval. CONCH represents a substantial leap over concurrent visual-language pretrained systems for histopathology, with the potential to directly facilitate a wide array of machine learning-based workflows requiring minimal or no further supervised fine-tuning.},
language = {en},
number = {3},
urldate = {2024-11-07},
journal = {Nature Medicine},
author = {Lu, Ming Y. and Chen, Bowen and Williamson, Drew F. K. and Chen, Richard J. and Liang, Ivy and Ding, Tong and Jaume, Guillaume and Odintsov, Igor and Le, Long Phi and Gerber, Georg and Parwani, Anil V. and Zhang, Andrew and Mahmood, Faisal},
month = mar,
year = {2024},
note = {Publisher: Nature Publishing Group},
keywords = {Machine learning, Medical research, Pathology},
pages = {863--874},
}
@article{lu_multimodal_2024,
title = {A multimodal generative {AI} copilot for human pathology},
volume = {634},
copyright = {2024 The Author(s)},
issn = {1476-4687},
url = {https://www.nature.com/articles/s41586-024-07618-3},
doi = {10.1038/s41586-024-07618-3},
abstract = {Computational pathology1,2 has witnessed considerable progress in the development of both task-specific predictive models and task-agnostic self-supervised vision encoders3,4. However, despite the explosive growth of generative artificial intelligence (AI), there have been few studies on building general-purpose multimodal AI assistants and copilots5 tailored to pathology. Here we present PathChat, a vision-language generalist AI assistant for human pathology. We built PathChat by adapting a foundational vision encoder for pathology, combining it with a pretrained large language model and fine-tuning the whole system on over 456,000 diverse visual-language instructions consisting of 999,202 question and answer turns. We compare PathChat with several multimodal vision-language AI assistants and GPT-4V, which powers the commercially available multimodal general-purpose AI assistant ChatGPT-4 (ref. 6). PathChat achieved state-of-the-art performance on multiple-choice diagnostic questions from cases with diverse tissue origins and disease models. Furthermore, using open-ended questions and human expert evaluation, we found that overall PathChat produced more accurate and pathologist-preferable responses to diverse queries related to pathology. As an interactive vision-language AI copilot that can flexibly handle both visual and natural language inputs, PathChat may potentially find impactful applications in pathology education, research and human-in-the-loop clinical decision-making.},
language = {en},
number = {8033},
urldate = {2024-11-07},
journal = {Nature},
author = {Lu, Ming Y. and Chen, Bowen and Williamson, Drew F. K. and Chen, Richard J. and Zhao, Melissa and Chow, Aaron K. and Ikemura, Kenji and Kim, Ahrong and Pouli, Dimitra and Patel, Ankush and Soliman, Amr and Chen, Chengkuan and Ding, Tong and Wang, Judy J. and Gerber, Georg and Liang, Ivy and Le, Long Phi and Parwani, Anil V. and Weishaupt, Luca L. and Mahmood, Faisal},
month = oct,
year = {2024},
note = {Publisher: Nature Publishing Group},
keywords = {Data integration, Image processing, Machine learning, Pathology},
pages = {466--473},
file = {Full Text PDF:C\:\\Users\\paul\\Zotero\\storage\\WT43U9YV\\Lu et al. - 2024 - A multimodal generative AI copilot for human patho.pdf:application/pdf},
}
@article{chen_towards_2024,
title = {Towards a general-purpose foundation model for computational pathology},
volume = {30},
copyright = {2024 The Author(s), under exclusive licence to Springer Nature America, Inc.},
issn = {1546-170X},
url = {https://www.nature.com/articles/s41591-024-02857-3},
doi = {10.1038/s41591-024-02857-3},
abstract = {Quantitative evaluation of tissue images is crucial for computational pathology (CPath) tasks, requiring the objective characterization of histopathological entities from whole-slide images (WSIs). The high resolution of WSIs and the variability of morphological features present significant challenges, complicating the large-scale annotation of data for high-performance applications. To address this challenge, current efforts have proposed the use of pretrained image encoders through transfer learning from natural image datasets or self-supervised learning on publicly available histopathology datasets, but have not been extensively developed and evaluated across diverse tissue types at scale. We introduce UNI, a general-purpose self-supervised model for pathology, pretrained using more than 100 million images from over 100,000 diagnostic H\&E-stained WSIs ({\textgreater}77 TB of data) across 20 major tissue types. The model was evaluated on 34 representative CPath tasks of varying diagnostic difficulty. In addition to outperforming previous state-of-the-art models, we demonstrate new modeling capabilities in CPath such as resolution-agnostic tissue classification, slide classification using few-shot class prototypes, and disease subtyping generalization in classifying up to 108 cancer types in the OncoTree classification system. UNI advances unsupervised representation learning at scale in CPath in terms of both pretraining data and downstream evaluation, enabling data-efficient artificial intelligence models that can generalize and transfer to a wide range of diagnostically challenging tasks and clinical workflows in anatomic pathology.},
language = {en},
number = {3},
urldate = {2024-11-07},
journal = {Nature Medicine},
author = {Chen, Richard J. and Ding, Tong and Lu, Ming Y. and Williamson, Drew F. K. and Jaume, Guillaume and Song, Andrew H. and Chen, Bowen and Zhang, Andrew and Shao, Daniel and Shaban, Muhammad and Williams, Mane and Oldenburg, Lukas and Weishaupt, Luca L. and Wang, Judy J. and Vaidya, Anurag and Le, Long Phi and Gerber, Georg and Sahai, Sharifa and Williams, Walt and Mahmood, Faisal},
month = mar,
year = {2024},
note = {Publisher: Nature Publishing Group},
keywords = {Biomedical engineering, Machine learning, Pathology},
pages = {850--862},
}
@article{lu_data-efficient_2021,
title = {Data-efficient and weakly supervised computational pathology on whole-slide images},
volume = {5},
copyright = {2021 The Author(s), under exclusive licence to Springer Nature Limited},
issn = {2157-846X},
url = {https://www.nature.com/articles/s41551-020-00682-w},
doi = {10.1038/s41551-020-00682-w},
abstract = {Deep-learning methods for computational pathology require either manual annotation of gigapixel whole-slide images (WSIs) or large datasets of WSIs with slide-level labels and typically suffer from poor domain adaptation and interpretability. Here we report an interpretable weakly supervised deep-learning method for data-efficient WSI processing and learning that only requires slide-level labels. The method, which we named clustering-constrained-attention multiple-instance learning (CLAM), uses attention-based learning to identify subregions of high diagnostic value to accurately classify whole slides and instance-level clustering over the identified representative regions to constrain and refine the feature space. By applying CLAM to the subtyping of renal cell carcinoma and non-small-cell lung cancer as well as the detection of lymph node metastasis, we show that it can be used to localize well-known morphological features on WSIs without the need for spatial labels, that it overperforms standard weakly supervised classification algorithms and that it is adaptable to independent test cohorts, smartphone microscopy and varying tissue content.},
language = {en},
number = {6},
urldate = {2024-11-07},
journal = {Nature Biomedical Engineering},
author = {Lu, Ming Y. and Williamson, Drew F. K. and Chen, Tiffany Y. and Chen, Richard J. and Barbieri, Matteo and Mahmood, Faisal},
month = jun,
year = {2021},
note = {Publisher: Nature Publishing Group},
keywords = {Image processing, Machine learning, Pathology},
pages = {555--570},
file = {Accepted Version:C\:\\Users\\paul\\Zotero\\storage\\P46NNBH8\\Lu et al. - 2021 - Data-efficient and weakly supervised computational.pdf:application/pdf},
}
@misc{noauthor_home_nodate,
title = {Home {Page}},
url = {https://applikate.com/},
abstract = {Direct-to-digital histopathology Minimize histology workflows, Maximize pathology data. The combination of key scientific advancements, clearing histology and multiphoton microscopy, provides in-depth, high-quality histopathology data in hours without the need for manual processes.    About Applikate Applikate Technologies is a hardware and software company that has developed the first automated histology platform with integrated digital imaging. Our patented technology stains and images whole samples without the need for physical slicing. The platform automates the pathology workflow, cuts processing time, lowers labor and materials costs, improves image quality, and reduces the risk of misdiagnosis. Our Mission Applikate’s mission is to modernize pathology with technology that significantly reduces hospital costs, improves medical diagnosis, and enables a faster clinical response to patients. Our Vision Applikate envisions a world without the need for physical slides and prohibitively expensive histology equipment and storage. Our Mission Applikate’s mission is to modernize pathology with technology that significantly reduces hospital costs, improves medical diagnosis, and enables a faster clinical response to patients. Our Vision Applikate envisions a world without the need for physical slides and prohibitively expensive histology equipment and storage. The Applikate Advantage From over 20 manual steps reduced to 3 simple steps. The time and cost of producing physical slides constrains the number of slides per patient that a pathologist reviews, limiting the diagnostic information at their disposal. Immediate visualization of multiple tissue layers Globally accessible images from software that navigates multiple tissue levels for improved diagnosis and analysis. Elimination of physical slides Legacy technologies compensate for physical slide limitations by layering more equipment and steps on an archaic process. High resolution multiphoton microscopy at depth The fastest high-resolution multiphoton microscope in the world, sub-micron resolution at depth. Important Milestones Since 2013, Applikate has been revolutionizing pathology, reducing medical errors, and lowering cost. 2013 Applikate Founded Richard Torres, MD, MS and Michael Levene, PhD co-found Applikate Technologies, LLC in New Haven, CT after publishing an innovative approach using unique chemical combinations and advanced microscopy. 2014 First Grant National Cancer Institute awards Applikate’s first NIH SBIR grant. 2016 Article Publication Published article illustrates unique capabilities of CHiMP for assessment of human renal tissue specimens. 2017 Follow-On Grant Applikate receives follow-on NIH/NCI SBIR grant funding to continue CHiMP development. 2018 First Clinical Study Applikate Technologies and Yale-New Haven Hospital begin first clinical study using Applikate’s CHiMP platform for prostate cancer patients. 2019 Fourth NIH Award Applikate awarded fourth NIH/NCI SBIR to explore specialized applications of CHiMP. 2019 NIDDK Grant Applikate receives grant from National Institute of Diabetes and Digestive and Kidney Diseases for development focused on renal pathology. 2020 Key Patents Granted U.S. Patent Office grants key patents to Applikate covering various aspects of the CHiMP platform. 2020 Applikate Joins CDL Applikate joins the Creative Destruction Lab (CDL) business incubator. 2020 Prostate Cancer Diagnosis Study Published prostate cancer diagnostics study demonstrates viability of CHiMP as a primary diagnostic platform with critical advantages. 2021 CDL Showcase CDL showcases Applikate at its annual Super Session. 2021 Over-Subscribed Seed Round Applikate closes over-subscribed seed round and becomes Applikate Technologies, Inc. 2022 Second Clinical Validation Round Applikate begins second phase of clinical validation studies using CHiMP. Subscribe for exclusive updates},
language = {en-US},
urldate = {2024-11-07},
journal = {Applikate},
file = {Snapshot:C\:\\Users\\paul\\Zotero\\storage\\37HHVNAM\\applikate.com.html:text/html},
}
@misc{noauthor_modella_nodate,
title = {Modella {AI}},
url = {https://www.modella.ai/index.html},
urldate = {2024-11-07},
}
@misc{noauthor_sierra_nodate,
title = {Sierra colour reference slide},
url = {https://ffei.ai/sierra-colour-reference-slide/},
language = {en-US},
urldate = {2024-11-07},
journal = {FFEI.AI},
}
@misc{noauthor_american_nodate,
title = {American {Society} of {Cytopathology}},
url = {https://cytopathology.org/},
urldate = {2024-11-07},
}
@misc{noauthor_society_nodate,
title = {Society for {Imaging} {Informatics} in {Medicine} - {You} {Belong} at {SIIM}},
url = {https://siim.org/},
abstract = {You Belong at SIIM},
language = {en\_US},
urldate = {2024-11-07},
journal = {https://siim.org/},
}
@misc{noauthor_mahmood_nodate,
title = {Mahmood {Lab} – {Computational} {Pathology} – {Computational} and {Quantitive} {Pathology} at {Harvard}},
url = {https://faisal.ai/},
language = {en-US},
urldate = {2024-11-07},
}
@inproceedings{molnar_ihcscoregan_2024,
title = {{IHCScoreGAN}: {An} unsupervised generative adversarial network for end-to-end ki67 scoring for clinical breast cancer diagnosis},
shorttitle = {{IHCScoreGAN}},
url = {https://openreview.net/forum?id=U3vfFn9WQ7&noteId=2GdZUzEQgC},
abstract = {Ki67 is a biomarker whose activity is routinely measured and scored by pathologists through immunohistochemistry (IHC) staining, which informs clinicians of patient prognosis and guides treatment. Currently, most clinical laboratories rely on a tedious, inconsistent manual scoring process to quantify the percentage of Ki67-positive cells. While many works have shown promise for Ki67 quantification using computational approaches, the current state-of-the-art methods have limited real-world feasibility: they either require large datasets of meticulous cell-level ground truth labels to train, or they provide pre-trained weights that may not generalize well to in-house data. To overcome these challenges, we propose IHCScoreGAN, the first unsupervised deep learning framework for end-to-end Ki67 scoring without the need for any ground truth labels. IHCScoreGAN only requires IHC image samples and unpaired synthetic data, yet it learns to generate colored cell segmentation masks while simultaneously predicting cell center point and biomarker expressions for Ki67 scoring, made possible through our novel dual-branch generator structure. We validated our framework on a large cohort of 2,136 clinically signed-out cases, yielding an accuracy of 0.97 and an F1-score of 0.95 and demonstrating substantially better performance than a pre-trained state-of-the-art supervised model. By removing ground truth requirements, our unsupervised technique constitutes an important step towards easily-trained Ki67 scoring solutions which can train on out-of-domain data in an unsupervised manner.},
language = {en},
urldate = {2024-11-07},
author = {Molnar, Carl and Tavolara, Thomas E. and Garcia, Christopher A. and McClintock, David S. and Zarella, Mark D. and Han, Wenchao},
month = feb,
year = {2024},
file = {Full Text PDF:C\:\\Users\\paul\\Zotero\\storage\\M9IZHMEV\\Molnar et al. - 2024 - IHCScoreGAN An unsupervised generative adversaria.pdf:application/pdf},
}
@misc{rasool_grasooltcga-path-report_2024,
title = {grasool/tcga-path-report},
url = {https://github.com/grasool/tcga-path-report},
urldate = {2024-11-07},
author = {Rasool, Ghulam},
month = jun,
year = {2024},
note = {original-date: 2024-05-23T11:54:43Z},
}
@misc{noauthor_imaging-researchpath-foundation_nodate,
title = {imaging-research/path-foundation at master · {Google}-{Health}/imaging-research},
url = {https://github.com/Google-Health/imaging-research/tree/master/path-foundation},
abstract = {Contribute to Google-Health/imaging-research development by creating an account on GitHub.},
language = {en},
urldate = {2024-11-07},
journal = {GitHub},
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment