annotate src/fschmidt/html/Html.java @ 68:00520880ad02

add fschmidt source
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 05 Oct 2025 17:24:15 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
68
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
1 /*
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
2 Copyright (c) 2008 Franklin Schmidt <fschmidt@gmail.com>
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
3
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
4 Permission is hereby granted, free of charge, to any person obtaining a copy
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
5 of this software and associated documentation files (the "Software"), to deal
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
6 in the Software without restriction, including without limitation the rights
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
8 copies of the Software, and to permit persons to whom the Software is
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
9 furnished to do so, subject to the following conditions:
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
10
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
11 The above copyright notice and this permission notice shall be included in
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
12 all copies or substantial portions of the Software.
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
13
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
20 THE SOFTWARE.
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
21 */
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
22
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
23 package fschmidt.html;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
24
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
25 import java.io.InputStreamReader;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
26 import java.util.ArrayList;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
27 import java.util.Iterator;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
28 import java.util.Arrays;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
29 import java.util.Set;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
30 import java.util.HashSet;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
31 import org.slf4j.Logger;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
32 import org.slf4j.LoggerFactory;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
33 import fschmidt.util.java.HtmlUtils;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
34
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
35
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
36 public final class Html extends ArrayList<Object> {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
37 private static final Logger logger = LoggerFactory.getLogger(Html.class);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
38
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
39 public static final String TEXTAREA = "textarea";
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
40 public static final String SCRIPT = "script";
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
41 public static final String STYLE = "style";
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
42
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
43 private int startingLine = 0;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
44 private boolean removeBadTags = false;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
45 private Set<String> containerTags = new HashSet<String>(Arrays.asList(SCRIPT,STYLE));
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
46
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
47 public Html() {}
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
48
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
49 public Html(String text) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
50 parse(text);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
51 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
52
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
53 public Set<String> containerTags() {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
54 return containerTags;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
55 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
56
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
57 public void setStartingLine(int startingLine) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
58 this.startingLine = startingLine;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
59 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
60
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
61 public void removeBadTags(boolean removeBadTags) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
62 this.removeBadTags = removeBadTags;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
63 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
64
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
65 public void parse(String text) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
66 int len = text.length();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
67 int i = 0;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
68 int i2Prev = 0;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
69 int line = startingLine;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
70 outer:
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
71 while( i < len ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
72 int i2 = text.indexOf('<',i);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
73 while( i2 != -1 && i2+1 < len ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
74 char c = text.charAt(i2+1);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
75 if( Character.isLetter(c) || c=='/' || c=='!' )
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
76 break;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
77 i2 = text.indexOf('<',i2+1);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
78 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
79 if( i2 == -1 ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
80 add( text.substring(i) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
81 break;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
82 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
83 if( i < i2 )
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
84 add( text.substring(i,i2) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
85 if( text.startsWith("<!--",i2) ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
86 i = text.indexOf("-->",i2+4);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
87 if( i == -1 ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
88 add( text.substring(i2) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
89 break;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
90 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
91 add( new HtmlComment( text.substring(i2+4,i) ) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
92 i += 3;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
93 } else if( text.startsWith("<![CDATA[",i2) ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
94 i = text.indexOf("]]>",i2+9);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
95 if( i == -1 ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
96 add( text.substring(i2) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
97 break;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
98 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
99 add( new HtmlCdata( text.substring(i2+9,i) ) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
100 i += 3;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
101 } else {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
102 i = text.indexOf('>',i2);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
103 if( i == -1 ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
104 add( text.substring(i2) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
105 break;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
106 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
107 line += lines(text,i2Prev,i2);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
108 i2Prev = i2;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
109 String tagText = text.substring(i2+1,i);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
110 try {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
111 HtmlTag tag = new HtmlTag(tagText);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
112 tag.lineNumber = line;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
113 String tagName = tag.getName().toLowerCase();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
114 if( containerTags.contains(tagName) ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
115 i2 = i;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
116 String endTagName = '/' + tagName;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
117 while(true) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
118 i2 = text.indexOf('<',i2+1);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
119 if( i2 == -1 )
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
120 break;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
121 int i3 = text.indexOf('>',i2);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
122 if( i3 == -1 )
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
123 break;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
124 int j = i2+1;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
125 while( j<i3 && !Character.isWhitespace(text.charAt(j)) ) j++;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
126 String s = text.substring(i2+1,j);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
127 if( s.equalsIgnoreCase(endTagName) ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
128 HtmlTag tag2 = new HtmlTag( text.substring(i2+1,i3) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
129 line += lines(text,i2Prev,i2);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
130 tag2.lineNumber = line;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
131 i2Prev = i2;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
132 String text2 = text.substring(i+1,i2);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
133 HtmlTextContainer textContainer =
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
134 tagName.equals(TEXTAREA) ?
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
135 new HtmlTextarea(tag,text2,tag2)
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
136 : tagName.equals(SCRIPT) ?
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
137 new HtmlScript(tag,text2,tag2)
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
138 : tagName.equals(STYLE) ?
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
139 new HtmlStyle(tag,text2,tag2)
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
140 :
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
141 new HtmlTextContainer(tag,text2,tag2)
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
142 ;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
143 add( textContainer );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
144 i = i3 + 1;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
145 continue outer;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
146 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
147 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
148 logger.warn("unclosed "+tagName);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
149 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
150 i += 1;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
151 add( tag );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
152 } catch(HtmlTag.BadTag e) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
153 // logger.debug("bad tag",e);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
154 i += 1;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
155 if( !removeBadTags ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
156 add( "&lt;" );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
157 add( HtmlUtils.htmlEncode(tagText) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
158 add( "&gt;" );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
159 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
160 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
161 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
162 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
163 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
164
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
165 @Override public String toString() {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
166 StringBuilder buf = new StringBuilder();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
167 for( Object o : this ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
168 buf.append( o );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
169 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
170 return buf.toString();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
171 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
172
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
173 private static int lines(String text,int start,int end) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
174 int n = 0;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
175 int i = start - 1;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
176 while(true) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
177 i = text.indexOf('\n',i+1);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
178 if( i == -1 || i >= end )
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
179 return n;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
180 n++;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
181 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
182 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
183
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
184 public Html flatten() {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
185 Html html = new Html();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
186 flattenTo(html);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
187 return html;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
188 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
189
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
190 void flattenTo(Html html) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
191 for( Object obj : this ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
192 if( obj instanceof HtmlNode ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
193 ((HtmlNode)obj).flattenTo(html);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
194 } else {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
195 html.add(obj);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
196 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
197 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
198 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
199
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
200 public Html deepen() {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
201 Iterator iter = iterator();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
202 Html html = deepen(iter);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
203 if( iter.hasNext() )
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
204 throw new RuntimeException("unmatched end tag:\n"+html);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
205 return html;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
206 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
207
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
208 private static Html deepen(Iterator iter) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
209 Html html = new Html();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
210 while( iter.hasNext() ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
211 Object obj = iter.next();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
212 if( obj instanceof HtmlTag && !(obj instanceof HtmlNode) ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
213 HtmlTag tag = (HtmlTag)obj;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
214 if( !tag.isEmpty() ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
215 String name = tag.getName();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
216 if( name.startsWith("/") ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
217 html.add(tag);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
218 return html;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
219 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
220 Html children = deepen(iter);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
221 HtmlTag endTag = (HtmlTag)children.get(children.size()-1);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
222 if( endTag.getName().equals("/"+name) ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
223 children.remove(children.size()-1);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
224 html.add( new HtmlNode(tag,children) );
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
225 continue;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
226 } else {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
227 html.add(tag);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
228 html.addAll(children);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
229 return html;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
230 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
231 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
232 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
233 html.add(obj);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
234 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
235 return html;
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
236 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
237
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
238 public static void main(String[] args) throws Exception {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
239 /*
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
240 String page = fschmidt.util.java.IoUtils.readPage("http://www.yahoo.com/");
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
241 Html html = new Html(page);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
242 String s = html.toString();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
243 System.out.print(s);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
244 // System.out.println(html.size());
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
245 */
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
246 String page = fschmidt.util.java.IoUtils.readAll(new InputStreamReader(System.in));
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
247 Html html = new Html(page);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
248 for( Iterator i=html.iterator(); i.hasNext(); ) {
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
249 Object o = i.next();
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
250 System.out.println(o.getClass().getName()+" - "+o);
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
251 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
252 }
00520880ad02 add fschmidt source
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
253 }