Created
December 23, 2020 18:07
-
-
Save farhan-raza/021115af6976cc4014ad1b08f492213c to your computer and use it in GitHub Desktop.
Split HTML Webpage into Multiple Files Programmatically using C#
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| String content = File.ReadAllText(dataDir1 + "Product.html"); | |
| // Create an instance of HTML document | |
| var document = new HTMLDocument(content, ""); | |
| // Split HTML webpage to multiple files | |
| // find all paragraph elements inside document by using CSS Selector Query | |
| var elements = document.QuerySelectorAll("p"); | |
| for (int i = 0; i < elements.Length; i++) | |
| { | |
| // create an empty document to export content | |
| using (var copyTo = new HTMLDocument()) | |
| { | |
| // append the content into the earlier created document | |
| copyTo.DocumentElement.AppendChild(elements[i]); | |
| // save the document | |
| copyTo.Save(dataDir1 + "doc_" + i + ".html", HTMLSaveFormat.HTML); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment