View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.myfaces.renderkit.html.util;
20  
21  import junit.framework.TestCase;
22  import org.apache.myfaces.renderkit.html.util.CallbackListener;
23  import org.apache.myfaces.renderkit.html.util.ReducedHTMLParser;
24  
25  /**
26   * Unit test for the ReducedHTMLParser class which detects tags within an HTML document.
27   */
28  public class ReducedHTMLParserTest extends TestCase
29  {
30      public static class ParseCallbackListener implements CallbackListener
31      {
32          int beforeHeadStart = -1;
33          int afterHeadStart = -1;
34          int beforeHeadEnd = -1;
35          int afterHeadEnd = -1;
36          int beforeBodyStart = -1;
37          int afterBodyStart = -1;
38          int beforeBodyEnd = -1;
39          int afterBodyEnd = -1;
40  
41          public void openedStartTag(int charIndex, int tagIdentifier)
42          {
43              if (tagIdentifier == ReducedHTMLParser.HEAD_TAG)
44              {
45                  beforeHeadStart = charIndex;
46              }
47              else if (tagIdentifier == ReducedHTMLParser.BODY_TAG)
48              {
49                  beforeBodyStart = charIndex;
50              }
51          }
52  
53          public void closedStartTag(int charIndex, int tagIdentifier)
54          {
55              if (tagIdentifier == ReducedHTMLParser.HEAD_TAG)
56              {
57                  afterHeadStart = charIndex;
58              }
59              else if (tagIdentifier == ReducedHTMLParser.BODY_TAG)
60              {
61                  afterBodyStart = charIndex;
62              }
63          }
64  
65          public void openedEndTag(int charIndex, int tagIdentifier)
66          {
67              if (tagIdentifier == ReducedHTMLParser.HEAD_TAG)
68              {
69                  beforeHeadEnd = charIndex;
70              }
71              else if (tagIdentifier == ReducedHTMLParser.BODY_TAG)
72              {
73                  beforeBodyEnd = charIndex;
74              }
75          }
76  
77          public void closedEndTag(int charIndex, int tagIdentifier)
78          {
79              if (tagIdentifier == ReducedHTMLParser.HEAD_TAG)
80              {
81                  afterHeadEnd = charIndex;
82              }
83              else if (tagIdentifier == ReducedHTMLParser.BODY_TAG)
84              {
85                  afterBodyEnd = charIndex;
86              }
87          }
88  
89          public void attribute(int charIndex, int tagIdentifier, String key, String value)
90          {
91          }
92      }
93  
94      public void testIsFinished1()
95      {
96          CharSequence seq = "";
97          CallbackListener listener = new ParseCallbackListener();
98          ReducedHTMLParser parser = new ReducedHTMLParser(seq, listener);
99          assertTrue("Empty sequence is finished", parser.isFinished());
100     }
101 
102     public void testIsFinished2()
103     {
104         CharSequence seq = "xx yy";
105         CallbackListener listener = new ParseCallbackListener();
106         ReducedHTMLParser parser = new ReducedHTMLParser(seq, listener);
107 
108         assertFalse("Sequence is finished", parser.isFinished());
109         parser.consumeNonWhitespace();
110         assertFalse("Sequence is finished", parser.isFinished());
111         parser.consumeWhitespace();
112         assertFalse("Sequence is finished", parser.isFinished());
113         parser.consumeNonWhitespace();
114         assertTrue("Sequence is finished", parser.isFinished());
115     }
116 
117     public void testConsumeWhitespace()
118     {
119         CharSequence seq = "  \t  \r\n   xx    yy  ";
120         CallbackListener listener = new ParseCallbackListener();
121         ReducedHTMLParser parser = new ReducedHTMLParser(seq, listener);
122 
123         // test that one call consumes all available whitespace
124         // and that all sorts of whitespace are consumed.
125         assertFalse("Sequence is finished", parser.isFinished());
126         parser.consumeWhitespace();
127         String word1 = parser.consumeNonWhitespace();
128         assertEquals("xx found", "xx", word1);
129 
130         // test that multiple calls don't consume anything but whitespace
131         parser.consumeWhitespace();
132         parser.consumeWhitespace();
133         parser.consumeWhitespace();
134         String word2 = parser.consumeNonWhitespace();
135         assertEquals("yy found", "yy", word2);
136 
137         // test that no failure occurs from consuming whitespace at the
138         // end of the sequence
139         assertFalse("Sequence is finished", parser.isFinished());
140         parser.consumeWhitespace();
141         parser.consumeWhitespace();
142         assertTrue("Sequence is finished", parser.isFinished());
143     }
144 
145     public void testConsumeNonWhitespace()
146     {
147         CharSequence seq = "xx yy zz";
148         CallbackListener listener = new ParseCallbackListener();
149         ReducedHTMLParser parser = new ReducedHTMLParser(seq, listener);
150 
151         String word1 = parser.consumeNonWhitespace();
152         assertEquals("xx found", "xx", word1);
153 
154         // test that a call against whitespace returns null
155         String noWord = parser.consumeNonWhitespace();
156         assertNull("ConsumeNonWhitespace when whitespace is present", noWord);
157 
158         // test that no exception is generated for multiple calls
159         parser.consumeNonWhitespace();
160         parser.consumeNonWhitespace();
161 
162         parser.consumeWhitespace();
163         String word2 = parser.consumeNonWhitespace();
164         assertEquals("yy found", "yy", word2);
165 
166         // test word that is at end of sequence
167         parser.consumeWhitespace();
168         String word3 = parser.consumeNonWhitespace();
169         assertEquals("zz found", "zz", word3);
170 
171         // test that isFinished is set
172         assertTrue("Sequence is finished", parser.isFinished());
173 
174         // test that no failure occurs from consuming nonwhitespace at the
175         // end of the sequence
176         noWord = parser.consumeNonWhitespace();
177         assertNull("ConsumeNonWhitespace at end of sequence", noWord);
178     }
179 
180     public void testConsumeMatch()
181     {
182         CharSequence seq = "xx <!-- yy --> zz";
183         CallbackListener listener = new ParseCallbackListener();
184         ReducedHTMLParser parser = new ReducedHTMLParser(seq, listener);
185 
186         // test non-match
187         assertFalse("Match non-matching pattern", parser.consumeMatch("ffff"));
188 
189         // test valid match. Also verifies that previous match failure didn't
190         // move the parsing offset.
191         assertTrue("Match matching pattern", parser.consumeMatch("xx"));
192 
193         // this won't match until whitespace removed
194         assertFalse("Match non-matching pattern", parser.consumeMatch("<!--"));
195         parser.consumeWhitespace();
196         assertTrue("Match matching pattern", parser.consumeMatch("<!--"));
197 
198         // repeat
199         assertFalse("Match non-matching pattern", parser.consumeMatch("yy"));
200         parser.consumeWhitespace();
201         assertTrue("Match matching pattern", parser.consumeMatch("yy"));
202 
203         parser.consumeWhitespace();
204         assertTrue("Match matching pattern", parser.consumeMatch("-->"));
205 
206         // match at end of sequence
207         parser.consumeWhitespace();
208         assertTrue("Match matching pattern", parser.consumeMatch("zz"));
209 
210         // check no exception on matching on finished sequence
211         assertFalse("Match non-matching pattern", parser.consumeMatch("aa"));
212     }
213 
214     public void testConsumeElementName()
215     {
216         CharSequence seq = "  foo  t:foo t:FooBar t:foo_bar element-name/>";
217         CallbackListener listener = new ParseCallbackListener();
218         ReducedHTMLParser parser = new ReducedHTMLParser(seq, listener);
219 
220         // test that consumeElementName will automatically skip any leading whitespace
221         String name1 = parser.consumeElementName();
222         assertEquals("Element name matched", "foo", name1);
223 
224         String name2 = parser.consumeElementName();
225         assertEquals("Element name matched", "t:foo", name2);
226 
227         String name3 = parser.consumeElementName();
228         assertEquals("Element name matched", "t:FooBar", name3);
229 
230         String name4 = parser.consumeElementName();
231         assertEquals("Element name matched", "t:foo_bar", name4);
232 
233         String name5 = parser.consumeElementName();
234         assertEquals("Element name matched", "element-name", name5);
235     }
236 
237     public void testConsumeStringBasic()
238     {
239         CharSequence s1 = "'string1' \"string2\"";
240         CallbackListener listener = new ParseCallbackListener();
241         ReducedHTMLParser parser = new ReducedHTMLParser(s1, listener);
242 
243         // Note that the consumeString method always expects the leading quote to
244         // have been consumed already..
245 
246         // test single-quote delimited
247         parser.consumeMatch("'");
248         String str1 = parser.consumeString('\'');
249         assertEquals("String correctly parsed", "string1", str1);
250 
251         // test double-quote delimited
252         parser.consumeWhitespace();
253         parser.consumeMatch("\"");
254         String str2 = parser.consumeString('\"');
255         assertEquals("String correctly parsed", "string2", str2);
256     }
257 
258     public void testConsumeStringEscapedQuote()
259     {
260         char quoteMark = '\'';
261 
262         // build literal sequence 'don\'t quote me' not-in-the-string
263         StringBuffer buf = new StringBuffer();
264         buf.append(quoteMark);
265         buf.append("don\\'t quote me");
266         buf.append(quoteMark);
267         buf.append(" not-in-the-string");
268 
269         CallbackListener listener = new ParseCallbackListener();
270         ReducedHTMLParser parser = new ReducedHTMLParser(buf, listener);
271 
272         // Note that the consumeString method always expects the leading quote to
273         // have been consumed already..
274 
275         parser.consumeMatch("'");
276         String str1 = parser.consumeString('\'');
277         assertEquals("String correctly parsed", "don't quote me", str1);
278     }
279 
280     public void testConsumeStringEscapedNonQuote()
281     {
282         char quoteMark = '"';
283 
284         // build literal sequence 'don\'t quote me' not-in-the-string
285         StringBuffer buf = new StringBuffer();
286         buf.append(quoteMark);
287         buf.append("don\\'t quote me");
288         buf.append(quoteMark);
289         buf.append(" not-in-the-string");
290 
291         CallbackListener listener = new ParseCallbackListener();
292         ReducedHTMLParser parser = new ReducedHTMLParser(buf, listener);
293 
294         // Note that the consumeString method always expects the leading quote to
295         // have been consumed already..
296 
297         parser.consumeMatch("\"");
298         String str1 = parser.consumeString('"');
299         assertEquals("String correctly parsed", "don\\'t quote me", str1);
300     }
301     
302     public void testConsumeStringEscapedEscape()
303     {
304         char quoteMark = '\'';
305         char backSlash = '\\';
306 
307         // build literal sequence 'don\\'t escape me' not-in-the-string
308         // The double-backslash should be treated as a single backslash
309         // which does *not* escape the following quote.
310         StringBuffer buf = new StringBuffer();
311         buf.append(quoteMark);
312         buf.append("don");
313         buf.append(backSlash);
314         buf.append(backSlash);
315         buf.append(quoteMark);
316         buf.append("t escape me");
317         buf.append(quoteMark);
318 
319         CallbackListener listener = new ParseCallbackListener();
320         ReducedHTMLParser parser = new ReducedHTMLParser(buf, listener);
321 
322         // Note that the consumeString method always expects the leading quote to
323         // have been consumed already..
324 
325         parser.consumeMatch("'");
326         String str1 = parser.consumeString('\'');
327         assertEquals("String correctly parsed", "don" + backSlash, str1);
328     }
329 
330     public void testConsumeAttrValue()
331     {
332         CharSequence seq = "  bare 'quoted 1' \"quoted 2\" bare2 ";
333         CallbackListener listener = new ParseCallbackListener();
334         ReducedHTMLParser parser = new ReducedHTMLParser(seq, listener);
335 
336         String val1 = parser.consumeAttrValue();
337         assertEquals("Attr value matched", "bare", val1);
338 
339         String val2 = parser.consumeAttrValue();
340         assertEquals("Attr value matched", "quoted 1", val2);
341 
342         String val3 = parser.consumeAttrValue();
343         assertEquals("Attr value matched", "quoted 2", val3);
344 
345         String val4 = parser.consumeAttrValue();
346         assertEquals("Attr value matched", "bare2", val4);
347     }
348 
349     public void testConsumeExcept()
350     {
351         CharSequence seq = "abc$$#dd  ee#ff-gghh ii";
352         CallbackListener listener = new ParseCallbackListener();
353         ReducedHTMLParser parser = new ReducedHTMLParser(seq, listener);
354 
355         parser.consumeExcept("#e");
356         String val1 = parser.consumeNonWhitespace();
357         assertEquals("ConsumeExcept skipped expected chars", "#dd", val1);
358 
359         parser.consumeExcept("z-");
360         String val2 = parser.consumeNonWhitespace();
361         assertEquals("ConsumeExcept skipped expected chars", "-gghh", val2);
362 
363         // check that consumeExcept will reach end of buffer ok if none of
364         // the desired chars are found
365         assertFalse(parser.isFinished());
366         parser.consumeExcept("z");
367         assertTrue(parser.isFinished());
368 
369         // check that calling consumeExcept is safe at end-of-buffer
370         parser.consumeExcept("z");
371     }
372 
373     // test parsing completes when a lessthan is not followed by an element name,
374     // and there is just whitespace up to end of the input.
375     public void testParseBadTagNoElementName1()
376     {
377         String s = "xxxx \n\n <# \n\n";
378         CallbackListener listener = new ParseCallbackListener();
379         ReducedHTMLParser parser = new ReducedHTMLParser(s, listener);
380 
381         parser.parse();
382         assertTrue(parser.isFinished());
383     }
384 
385     // test parsing completes when a lessthan is not followed by an element name,
386     public void testParseBadTagNoElementName2()
387     {
388         String s = "xxxx \n\n <# \n\n hi there";
389         CallbackListener listener = new ParseCallbackListener();
390         ReducedHTMLParser parser = new ReducedHTMLParser(s, listener);
391 
392         parser.parse();
393         assertTrue(parser.isFinished());
394     }
395 
396     // test parsing completes when an invalid char is found where an attribute name
397     // is expected.
398     public void testParseBadTagInvalidAttributeName()
399     {
400         String s = "<foo )/>";
401         CallbackListener listener = new ParseCallbackListener();
402         ReducedHTMLParser parser = new ReducedHTMLParser(s, listener);
403 
404         parser.parse();
405         assertTrue(parser.isFinished());
406     }
407 
408     // test CDATA sections are handled
409     public void testParseCDATA()
410     {
411         String s = "xx<head> <![CDATA[ <head> ]]> <body>";
412         ParseCallbackListener listener = new ParseCallbackListener();
413         ReducedHTMLParser parser = new ReducedHTMLParser(s, listener);
414 
415         parser.parse();
416         assertTrue(parser.isFinished());
417         assertEquals("CDATA works", 8, listener.afterHeadStart);
418         assertEquals("CDATA works", 30, listener.beforeBodyStart);
419     }
420 
421     // test PI sections are handled
422     public void testParsePI()
423     {
424         String s = "<?xml version=\"1.0\"?> xx<head> ";
425         ParseCallbackListener listener = new ParseCallbackListener();
426         ReducedHTMLParser parser = new ReducedHTMLParser(s, listener);
427 
428         parser.parse();
429         assertTrue(parser.isFinished());
430         assertEquals("PI works", 30, listener.afterHeadStart);
431     }
432 
433     // Test script element support; the spec states that a <script> or
434     // <style> tag can contain anything except "/>"
435     public void testScript()
436     {
437         String s1 = "<head>";
438         String s2 = "<script type='text/javascript'>"
439                     + "if (1<2) alert('foo');\n"
440                     + "if (1>2) alert('bar');\n"
441                     + "</script>";
442         String s3 = "</head>";
443         String s4 = "<body>";
444         String s5 = "</body>";
445 
446         StringBuffer buf = new StringBuffer();
447         buf.append(s1);
448         buf.append(s2);
449         buf.append(s3);
450         buf.append(s4);
451         buf.append(s5);
452 
453         ParseCallbackListener listener = new ParseCallbackListener();
454         ReducedHTMLParser parser = new ReducedHTMLParser(buf.toString(), listener);
455 
456         parser.parse();
457         assertTrue(parser.isFinished());
458         assertEquals("Script works", s1.length(), listener.afterHeadStart);
459         int beforeHeadEnd = s1.length() + s2.length();
460         assertEquals("Script works", beforeHeadEnd, listener.beforeHeadEnd);
461         int beforeBodyStart = beforeHeadEnd + s3.length();
462         assertEquals("Script works", beforeBodyStart, listener.beforeBodyStart);
463         int beforeBodyEnd = beforeBodyStart + s4.length();
464         assertEquals("Script works", beforeBodyEnd, listener.beforeBodyEnd);
465     }
466 
467     // test the full parse method
468     public void testParse()
469     {
470         String s0 = "<!DOCTYPE PUBLIC \"sss\" \"http:foo\">\n";
471         String s1 = "<html><head>";
472         String s2 = "\n<!-- a comment --><title>foo</title>";
473         String s3 = "</head>";
474         String s4 = "< body onclick='zz'>";
475         String s5 = "  bodytext ";
476         // if comments aren't correctly parsed, then this will cause the
477         // head/body start positions to get corrupted.
478         String s6 = "  <!-- <head> <body> -->";
479         // if xml attr strings aren't correctly parsed, then this will cause
480         // the head/body start positions to get corrupted
481         String s7 = "<t:foo a1='<head>' a2='<body>'/>";
482         String s8 = "</body> </html>";
483 
484         StringBuffer buf = new StringBuffer();
485         buf.append(s0);
486         buf.append(s1);
487         buf.append(s2);
488         buf.append(s3);
489         buf.append(s4);
490         buf.append(s5);
491         buf.append(s6);
492         buf.append(s7);
493         buf.append(s8);
494 
495         ParseCallbackListener listener = new ParseCallbackListener();
496         ReducedHTMLParser parser = new ReducedHTMLParser(buf, listener);
497 
498         parser.parse();
499 
500         // check that listener has correctly computed the offset to the char just
501         // before the </head> tag starts.
502         int afterHeadStart = s0.length() + s1.length();
503         assertEquals("Pos after <head> tag ", afterHeadStart, listener.afterHeadStart);
504 
505         int beforeBodyStart = afterHeadStart + s2.length() + s3.length();
506         assertEquals("Pos before <body> tag", beforeBodyStart, listener.beforeBodyStart);
507 
508         int afterBodyStart = beforeBodyStart + s4.length();
509         assertEquals("Pos after <body> tag", afterBodyStart, listener.afterBodyStart);
510     }
511 }