Last active
December 19, 2021 10:27
-
-
Save sunmeat/3c6848306e6ebe8eb97a66eca472ec4b to your computer and use it in GitHub Desktop.
stack usage example (html parser)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FileWorker.java: | |
package com.alex.stack.htmlparser; | |
import java.io.BufferedReader; | |
import java.io.FileReader; | |
import java.io.File; | |
import java.io.FileWriter; | |
import java.io.IOException; | |
import java.util.Vector; | |
class FileWorker extends File { | |
public FileWorker(String name) { | |
super(name); | |
} | |
public void erase() { | |
try { | |
try (FileWriter file = new FileWriter(this, false)) { | |
file.write(""); | |
} | |
} catch (IOException e) { | |
throw new RuntimeException(e); | |
} | |
} | |
public boolean write(String string) { | |
try { | |
try (FileWriter file = new FileWriter(this, true)) { | |
file.write(string); | |
} | |
} catch (IOException e) { | |
return false; | |
} | |
return true; | |
} | |
public boolean writeln(String string) { | |
return write(string + "\n"); | |
} | |
public Vector<String> readLines() { | |
Vector<String> result = new Vector<>(); | |
try { | |
try (BufferedReader file = new BufferedReader(new FileReader(this))) { | |
String line; | |
while ((line = file.readLine()) != null) { | |
result.add(line + "\n"); | |
} | |
} | |
} catch (IOException e) { | |
throw new RuntimeException(e); | |
} | |
return result; | |
} | |
public StringBuilder readAll() { | |
StringBuilder result = new StringBuilder(); | |
try { | |
try (BufferedReader file = new BufferedReader(new FileReader(this))) { | |
String line; | |
while ((line = file.readLine()) != null) { | |
result.append(line).append("\n"); | |
} | |
} | |
} catch (IOException e) { | |
throw new RuntimeException(e); | |
} | |
return result; | |
} | |
@Override | |
public String toString() { | |
return readAll().toString(); | |
} | |
} | |
==================================================================================================================== | |
UniversalStack.java: | |
class UniversalStack<Type> { | |
boolean repeatEnabled = false; | |
private Vector<Type> data; | |
public UniversalStack() { | |
data = new Vector<>(0); | |
} | |
public UniversalStack(boolean repeatEnabled) { | |
this.repeatEnabled = repeatEnabled; | |
data = new Vector<>(0); | |
} | |
public void clear() { | |
data.clear(); | |
} | |
public boolean isEmpty() { | |
return data.isEmpty(); | |
} | |
public int getCount() { | |
return data.size(); | |
} | |
public void push(Type data) { | |
if (repeatEnabled && contains(data)) { | |
return; | |
} | |
this.data.add(data); | |
} | |
public Type pop() { | |
if (!isEmpty()) { | |
Type temp = data.lastElement(); | |
data.removeElementAt(data.size() - 1); | |
return temp; | |
} | |
return null; | |
} | |
public Type peek() { | |
if (!isEmpty()) { | |
return data.lastElement(); | |
} | |
return null; | |
} | |
public Type peekEqual() { | |
if (!isEmpty()) { | |
return data.lastElement(); | |
} | |
return null; | |
} | |
public boolean contains(Type data) { | |
for (int i = 0; i < this.data.size(); i++) { | |
if (this.data.elementAt(i).equals(data)) { | |
return true; | |
} | |
} | |
return false; | |
} | |
@Override | |
public String toString() { | |
StringBuilder result = new StringBuilder(); | |
for (Type data1 : data) { | |
result.append(data1.toString()).append("\n"); | |
} | |
return result.toString(); | |
} | |
} | |
==================================================================================================================== | |
HTMLTags.java: | |
class HTMLTags { | |
private UniversalStack<HTMLTag> tagsStack = new UniversalStack<HTMLTag>(true); | |
private Vector<HTMLTag> dualTags = new Vector<>(); | |
private Vector<HTMLTag> singleTags = new Vector<>(); | |
public HTMLTags() { | |
FileWorker dualTagsFile = new FileWorker("C:\\1\\dualtags.txt"); // !!! MAYBE YOU NEED TO CHANGE PATH TO FILES !!! | |
FileWorker singleTagsFile = new FileWorker("C:\\1\\singletags.txt"); | |
Vector<String> tagsList; | |
tagsList = dualTagsFile.readLines(); | |
for (String tagsList1 : tagsList) { | |
dualTags.add(new HTMLTag(tagsList1)); | |
} | |
tagsList = singleTagsFile.readLines(); | |
for (String tagsList1 : tagsList) { | |
singleTags.add(new HTMLTag(tagsList1, true)); | |
} | |
} | |
public boolean dualTagsContains(String tag) { | |
for (HTMLTag dualTag : dualTags) { | |
if (dualTag.getName().equals(new HTMLTag(tag).getName())) { | |
return true; | |
} | |
} | |
return false; | |
} | |
public boolean singleTagsContains(String tag) { | |
for (HTMLTag singleTag : singleTags) { | |
if (singleTag.getName().equals(new HTMLTag(tag).getName())) { | |
return true; | |
} | |
} | |
return false; | |
} | |
public String parseTags(String htmlCode) { | |
String tag; | |
tagsStack.clear(); | |
htmlCode = htmlCode.toLowerCase(); | |
try { | |
while (htmlCode.contains("<") && htmlCode.contains(">")) { | |
try { | |
tag = htmlCode.substring(htmlCode.indexOf("<"), htmlCode.indexOf(">") + 1); | |
} catch (IndexOutOfBoundsException e) { | |
throw new Exception("Неправильно расставлены скобки '<' и '>'"); | |
} | |
htmlCode = htmlCode.replaceFirst(tag, "[xxx]"); | |
if (dualTagsContains(tag)) { | |
HTMLTag tempTag = new HTMLTag(tag); | |
if (tempTag.getOpeningTag().equals((tag.contains(" ")) ? tag.substring(0, tag.indexOf(" ")) + ">" : tag)) { | |
tagsStack.push(tempTag); | |
} else if (tempTag.getClosingTag().equals(tag)) { | |
if (tagsStack.peek().getName().equals(tempTag.getName())) { | |
tagsStack.pop(); | |
} else { | |
throw new Exception("Вначале необходимо открыть тег, а лишь потом закрывать: " + tempTag); | |
} | |
} else { | |
throw new Exception("Недопустимый тег: " + tag); | |
} | |
} else if (singleTagsContains(tag)) { | |
if (tag.charAt(1) == '/') { | |
throw new Exception("Закрывающий тег не нужен. Это тег-одиночка: " + new HTMLTag(tag, true)); | |
} else { | |
} | |
} else { | |
throw new Exception("Такого тега не существует: " + tag); | |
} | |
} | |
if (htmlCode.contains("<") || htmlCode.contains(">")) { | |
throw new Exception("Остались лишние скобки '<' или '>'"); | |
} | |
if (tagsStack.getCount() > 0) { | |
throw new Exception("Не все теги были закрыты: \n" + tagsStack); | |
} | |
return "HTML код правильный"; | |
} catch (Exception e) { | |
return e.getMessage(); | |
} | |
} | |
@Override | |
public String toString() { | |
return " Двойные теги: \n" + dualTags.toString() | |
+ " \n Одиночные теги: \n" + singleTags.toString(); | |
} | |
public static class HTMLTag { | |
private String tag; | |
private boolean single = false; | |
public HTMLTag(String tag) { | |
setTag(tag); | |
} | |
public HTMLTag(String tag, boolean single) { | |
setTag(tag, single); | |
} | |
public boolean setTag(String tag) { | |
return setTag(tag, false); | |
} | |
public boolean setTag(String tag, boolean single) { | |
String tempTag; | |
if (tag.contains("<") && tag.contains(">")) { | |
tempTag = tag.substring(tag.indexOf("<") + 1, tag.indexOf(">")); | |
if (tempTag.contains(" ")) { | |
tempTag = tempTag.substring(0, tempTag.indexOf(" ")); | |
} | |
if (tempTag.charAt(0) == '/') { | |
tempTag = tempTag.replace("/", ""); | |
} | |
this.tag = tempTag; | |
this.single = single; | |
return true; | |
} else { | |
return false; | |
} | |
} | |
public String getName() { | |
return tag; | |
} | |
public String getOpeningTag() { | |
return "<" + tag + ">"; | |
} | |
public String getClosingTag() { | |
return "</" + tag + ">"; | |
} | |
@Override | |
public String toString() { | |
if (single) { | |
return getOpeningTag(); | |
} else { | |
return getOpeningTag() + "..." + getClosingTag(); | |
} | |
} | |
} | |
} | |
==================================================================================================================== | |
Program.java: | |
class Program { // change class name! | |
public static void main(String[] args) { | |
String htmlFileName = "C:\\1\\some page.html"; // !!! PATH TO HTML-FILE !!! | |
FileWorker htmlFile = new FileWorker(htmlFileName); | |
System.out.println(htmlFile); | |
HTMLTags htmlTags = new HTMLTags(); | |
StringBuilder htmlCode = htmlFile.readAll(); | |
System.out.println(htmlTags.parseTags(htmlCode.toString())); | |
} | |
} | |
==================================================================================================================== | |
dualtags.txt: | |
<html>...</html> | |
<head>...</head> | |
<title>...</title> | |
<body>...</body> | |
<strong>...</strong> | |
<p>...</p> | |
<h1>...</h1> | |
<h2>...</h2> | |
<h3>...</h3> | |
<h4>...</h4> | |
<h5>...</h5> | |
<h6>...</h6> | |
<blockquote>...</blockquote> | |
<q>...</q> | |
<address>...<address> | |
<pre>...</pre> | |
<b>...</b> | |
<i>...</i> | |
<tt>...</tt> | |
<font>...</font> | |
<ol>...</ol> | |
<ul>...</ul> | |
<li>...</li> | |
<dl>...</dl> | |
<dt>...</dt> | |
<dd>...</dd> | |
<table>...</table> | |
<td>...</td> | |
<tr>...</tr> | |
<th>...</th> | |
<thead>...</thead> | |
<caption>...</caption> | |
<a>...</a> | |
======================================================================================================================== | |
singletags.txt: | |
<base> | |
<meta> | |
<br> | |
<hr> | |
<img> | |
======================================================================================================================== | |
some page.html: | |
<html> | |
<head> | |
<meta http-equiv="Content-Type" content="text/html" /> | |
<title>Пример страницы с таблицей №1.</title> | |
</head> | |
<body> | |
<table width="100%" border="1" cellspacing="1" cellpadding="1" bgcolor=""> | |
<tr> | |
<th height="91" colspan=3 align="center" bgcolor="#66FF00"><strong>ПРИМЕР НАПИСАНИЯ ТАБЛИЦ</strong></th> | |
</tr> | |
<tr> | |
<th height="47" colspan=3 align="center" bgcolor="#BCEE68"> | |
<table width="50%" border="2" cellspacing="1" bgcolor="#4876FF"> | |
<tr> | |
<td align="center"><a href="#" style="text-decoration: none"><font color="#FFFFFF">страница 1</font></a></td> | |
<td align="center"><a href="#" style="text-decoration: none"><font color="#FFFFFF">страница 2</font></a></td> | |
<td align="center"><a href="#" style="text-decoration: none"><font color="#FFFFFF">страница 3</font></a></td> | |
<td align="center"><a href="#" style="text-decoration: none"><font color="#FFFFFF">страница 4</font></a></td> | |
</tr> | |
</table> | |
</th> | |
</tr> | |
<tr> | |
<th width="17%" height="498" align="center" bgcolor="#BFEFFF">левый блок</th> | |
<th width="65%" align="center" background="images/bek_564.jpg"> | |
<table width="90%" border="2" cellspacing="1" bordercolor="#0000CC"> | |
<caption>Пример таблицы для тренировок.</caption> | |
<tr> | |
<td colspan="3" align="center">ячейка 1</td> | |
</tr> | |
<tr> | |
<td align="center">ячейка 2</td> | |
<td align="center">ячейка 3</td> | |
<td align="center">ячейка 4</td> | |
</tr> | |
<tr> | |
<td align="center" colspan="2" rowspan="2">ячейка 5</td> | |
<td align="center">ячейка 6</td> | |
</tr> | |
<tr> | |
<td align="center">ячейка 7</td> | |
</tr> | |
<tr> | |
<td align="center" colspan="2">ячейка8</td> | |
<td align="center">ячейка 9</td> | |
</tr> | |
<tr> | |
<td align="center" colspan="3">ячейка 10</td> | |
</tr> | |
<tr> | |
<td align="center">ячейка 11</td> | |
<td align="center" rowspan="3" colspan="3">ячейка 12</td> | |
</tr> | |
<tr> | |
<td align="center">ячейка 13</td> | |
</tr> | |
<tr> | |
<td align="center">ячейка 14</td> | |
</tr> | |
<tr> | |
<td align="center" colspan="3">ячейка 15</td> | |
</tr> | |
</table> | |
<br> | |
<p align="center"><a href="http://vk.com/sunmeat">about author</a></p> | |
<br> | |
</th> | |
<th width="18%" align="center" bgcolor="#98F5FF">правый блок</th> | |
</tr> | |
<tr> | |
<th height="49" colspan=3 align="center" bgcolor="#1E90FF">НИЖНИЙ БЛОК</th> | |
</tr> | |
</table> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment