This Bugzilla instance is a read-only archive of historic NetBeans bug reports. To report a bug in NetBeans please follow the project's instructions for reporting issues.

View | Details | Raw Unified | Return to bug 117506
Collapse All | Expand All

(-)a/nbbuild/antsrc/org/netbeans/nbbuild/CheckLinks.java (-3 / +202 lines)
Lines 45-57 Link Here
45
import java.net.*;
45
import java.net.*;
46
import java.util.*;
46
import java.util.*;
47
import java.util.regex.*;
47
import java.util.regex.*;
48
49
import org.apache.tools.ant.BuildException;
48
import org.apache.tools.ant.BuildException;
50
import org.apache.tools.ant.FileScanner;
49
import org.apache.tools.ant.FileScanner;
51
import org.apache.tools.ant.Project;
50
import org.apache.tools.ant.Project;
52
import org.apache.tools.ant.Task;
51
import org.apache.tools.ant.Task;
53
import org.apache.tools.ant.taskdefs.MatchingTask;
52
import org.apache.tools.ant.taskdefs.MatchingTask;
54
55
import org.apache.tools.ant.types.Mapper;
53
import org.apache.tools.ant.types.Mapper;
56
54
57
// XXX in Ant 1.6, permit <xmlcatalog> entries to make checking of "external" links
55
// XXX in Ant 1.6, permit <xmlcatalog> entries to make checking of "external" links
Lines 120-125 Link Here
120
        return m;
118
        return m;
121
    }
119
    }
122
120
121
    @Override
123
    public void execute () throws BuildException {
122
    public void execute () throws BuildException {
124
        if (basedir == null) throw new BuildException ("Must specify the basedir attribute");
123
        if (basedir == null) throw new BuildException ("Must specify the basedir attribute");
125
        FileScanner scanner = getDirectoryScanner (basedir);
124
        FileScanner scanner = getDirectoryScanner (basedir);
Lines 155-160 Link Here
155
    
154
    
156
    private static Pattern hrefOrAnchor = Pattern.compile("<(a|img)(\\s+shape=\"rect\")?\\s+(href|name|src)=\"([^\"#]*)(#[^\"]+)?\"(\\s+shape=\"rect\")?\\s*/?>", Pattern.CASE_INSENSITIVE);
155
    private static Pattern hrefOrAnchor = Pattern.compile("<(a|img)(\\s+shape=\"rect\")?\\s+(href|name|src)=\"([^\"#]*)(#[^\"]+)?\"(\\s+shape=\"rect\")?\\s*/?>", Pattern.CASE_INSENSITIVE);
157
    private static Pattern lineBreak = Pattern.compile("^", Pattern.MULTILINE);
156
    private static Pattern lineBreak = Pattern.compile("^", Pattern.MULTILINE);
157
158
    /**
159
     * The state of the Check Links process.
160
     */
161
    protected static class State {
162
        /**
163
         * Ant task to associate with the CheckLinks
164
         */
165
        protected Task task;
166
        /**
167
         * Global ClassLoader for the NetBeans module under test.
168
         * May be <code>null</code> if it called from
169
         * <code>CheckHelpSets</code> at the development time.
170
         *
171
         * @see CheckHelpSetsBin#globalClassLoadercreateGlobalClassLoader(File dir, String [] files)
172
         */
173
        protected ClassLoader globalClassLoader;
174
        /**
175
         * ClassLoader map for the NetBeans module under test.
176
         * May be <code>null</code> if it called from
177
         * <code>CheckHelpSets</code> at the development time.
178
         *
179
         * @see CheckHelpSetsBin#createClassLoaderMap (File dir, String [] files)
180
         */
181
        protected Map classLoaderMap;
182
        /**
183
         * The id string obtained from the javax.help.Map.ID .
184
         */
185
        protected String referrer;
186
        /**
187
         * The referrer file path (or full URL if not file:)
188
         * It is the empty string for the external calls from both
189
         * <code>CheckHelpSets</code> and <code>CheckHelpSetsBin</code>.
190
         * This is used only in error messages, but not in the bussiness logic.
191
         * @see  the local variable <code>basepath</code> in the private method
192
         * CheckLinks.scan(...).
193
         */
194
        protected String referrerLocation;
195
        /**
196
         * The URI to check
197
         */
198
        protected URI u;
199
        /**
200
         * The set of URIs known to be fully checked (including all anchored
201
         * variants etc.)
202
         */
203
        protected Set<URI> okurls;
204
        /**
205
         * The set of URIs known to be bogus.
206
         */
207
        protected Set<URI> badurls;
208
        /**
209
         * The set of (base) URIs known to have had their contents checked.
210
         */
211
        protected Set<URI> cleanurls;
212
        /**
213
         * If <code>true</code>, check external links (all protocols besides
214
         * file:)
215
         */
216
        protected boolean checkexternal;
217
        /**
218
         * If <code>true</code> then it is an error in the case of using a space
219
         * in the URI strings instead of the "%20".
220
         */
221
        protected boolean checkspaces;
222
        /**
223
         * If <code>true</code> then <code>State.filters</code> will be taken
224
         * into account.
225
         * @see State#filters
226
         */
227
        protected boolean checkforbidden;
228
        /**
229
         * one of:
230
         * <ul>
231
         *     <li>0 - just check that it can be opened;</li>
232
         *     <li>1 - check also that any links from it can be opened;</li>
233
         *     <li>2 - recurse</li>
234
         * </ul>
235
         */
236
        protected int recurse;
237
        /**
238
         * The list of Mappers to apply to get source files from HTML files.
239
         */
240
        protected List<Mapper> mappers;
241
        /**
242
         * It seems this filter list is always empty!
243
         */
244
        protected List<Filter> filters;
245
        /**
246
         * The list of strings containing error messages that may be displayed
247
         * for a user.
248
         */
249
        protected List<String> errors;
250
251
        /**
252
         * Creates a state of the CheckLinks process.
253
         *
254
         * @param task an Ant task to associate with this
255
         * @param globalClassLoader Global ClassLoader for the NetBeans module
256
         * under test.
257
         * @param classLoaderMap ClassLoader map for the NetBeans module under
258
         * test.
259
         * @param referrer the referrer file path (or full URL if not file:)
260
         * @param referrerLocation the location in the referrer, e.g. ":38:12",
261
         * or "" if unavailable
262
         * @param u the URI to check
263
         * @param okurls a set of URIs known to be fully checked (including all
264
         * anchored variants etc.)
265
         * @param badurls a set of URIs known to be bogus
266
         * @param cleanurls a set of (base) URIs known to have had their
267
         * contents checked
268
         * @param checkexternal if true, check external links (all protocols
269
         * besides file:)
270
         * @param checkspaces If <code>true</code> then it is an error in the
271
         * case of using a space in the URI strings instead of the "%20".
272
         * @param checkforbidden If <code>true</code> then
273
         * <code>State.filters</code> will be taken into account.
274
         * @param recurse one of:
275
         *                0 - just check that it can be opened;
276
         *                1 - check also that any links from it can be opened;
277
         *                2 - recurse
278
         * @param mappers a list of Mappers to apply to get source files from
279
         * HTML files
280
         * @param filters
281
         * @param errors
282
         */
283
        public State(Task task, ClassLoader globalClassLoader,
284
                     Map classLoaderMap, String referrer,
285
                     String referrerLocation, URI u,
286
                     Set<URI> okurls, Set<URI> badurls, Set<URI> cleanurls,
287
                     boolean checkexternal, boolean checkspaces,
288
                     boolean checkforbidden, int recurse,
289
                     List<Mapper> mappers, List<Filter> filters,
290
                     List<String> errors) {
291
            this.task = task;
292
            this.globalClassLoader = globalClassLoader;
293
            this.classLoaderMap = classLoaderMap;
294
            this.referrer = referrer;
295
            this.referrerLocation = referrerLocation;
296
            this.u = u;
297
            this.okurls = okurls;
298
            this.badurls = badurls;
299
            this.cleanurls = cleanurls;
300
            this.checkexternal = checkexternal;
301
            this.checkspaces = checkspaces;
302
            this.checkforbidden = checkforbidden;
303
            this.recurse = recurse;
304
            this.mappers = mappers;
305
            this.filters = filters;
306
            this.errors = errors;
307
        }
308
309
        private State(State state) {
310
            this.task = state.task;
311
            this.globalClassLoader = state.globalClassLoader;
312
            this.classLoaderMap = state.classLoaderMap;
313
            this.referrer = state.referrer;
314
            this.referrerLocation = state.referrerLocation;
315
            this.u = state.u;
316
            this.okurls = state.okurls;
317
            this.badurls = state.badurls;
318
            this.cleanurls = state.cleanurls;
319
            this.checkexternal = state.checkexternal;
320
            this.checkspaces = state.checkspaces;
321
            this.checkforbidden = state.checkforbidden;
322
            this.recurse = state.recurse;
323
            this.mappers = state.mappers;
324
            this.filters = state.filters;
325
            this.errors = state.errors;
326
        }
327
328
        @Override
329
        public State clone() {
330
            return new State(this);
331
        }
332
333
    } // State
334
335
    /**
336
     * Scan for broken links.
337
     * @param s a state of the CheckLinks process.
338
     * @throws java.io.IOException
339
     */
340
    public static void scan(CheckLinks.State s) throws IOException {
341
         //  System.out.println("CheckLinks.scan   u: " + s.u);
342
        scan(s.task, s.globalClassLoader, s.classLoaderMap, s.referrer,
343
             s.referrerLocation, s.u, s.okurls, s.badurls, s.cleanurls,
344
             s.checkexternal, s.checkspaces, s.checkforbidden, s.recurse,
345
             s.mappers, s.filters, s.errors);
346
    }
158
    
347
    
159
    /**
348
    /**
160
     * Scan for broken links.
349
     * Scan for broken links.
Lines 179-185 Link Here
179
     boolean checkexternal, boolean checkspaces, boolean checkforbidden, int recurse, 
368
     boolean checkexternal, boolean checkspaces, boolean checkforbidden, int recurse, 
180
     List<Mapper> mappers, List<String> errors) throws IOException {
369
     List<Mapper> mappers, List<String> errors) throws IOException {
181
        scan (task, globalClassLoader, classLoaderMap,
370
        scan (task, globalClassLoader, classLoaderMap,
182
        referrer, referrerLocation, u, okurls, badurls, cleanurls, checkexternal, checkspaces, checkforbidden, recurse, mappers, Collections.<Filter>emptyList(), errors);
371
              referrer, referrerLocation, u, okurls, badurls, cleanurls,
372
              checkexternal, checkspaces, checkforbidden, recurse, mappers,
373
              Collections.<Filter>emptyList(), errors);
183
    }
374
    }
184
    
375
    
185
    private static void scan
376
    private static void scan
Lines 545-550 Link Here
545
                    } // else we are only checking that this one has right anchors
736
                    } // else we are only checking that this one has right anchors
546
                }
737
                }
547
            }
738
            }
739
            // Process HTML Object Elements
740
            CheckLinks.State state =
741
                  new State(task, globalClassLoader, classLoaderMap, basepath,
742
                  referrerLocation, u, okurls, badurls, cleanurls,
743
                  checkexternal, checkspaces, checkforbidden, recurse,
744
                  mappers, filters, errors);
745
            HTMLObjectElementsChecker coe = new HTMLObjectElementsChecker(state);
746
            coe.check(content);
548
        } else {
747
        } else {
549
            task.log("Not checking contents of " + base, Project.MSG_VERBOSE);
748
            task.log("Not checking contents of " + base, Project.MSG_VERBOSE);
550
        }
749
        }
(-)0d6086c24c93 (+867 lines)
Added Link Here
1
/*
2
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3
 *
4
 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
5
 *
6
 * The contents of this file are subject to the terms of either the GNU
7
 * General Public License Version 2 only ("GPL") or the Common
8
 * Development and Distribution License("CDDL") (collectively, the
9
 * "License"). You may not use this file except in compliance with the
10
 * License. You can obtain a copy of the License at
11
 * http://www.netbeans.org/cddl-gplv2.html
12
 * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
13
 * specific language governing permissions and limitations under the
14
 * License.  When distributing the software, include this License Header
15
 * Notice in each file and include the License file at
16
 * nbbuild/licenses/CDDL-GPL-2-CP.  Sun designates this
17
 * particular file as subject to the "Classpath" exception as provided
18
 * by Sun in the GPL Version 2 section of the License file that
19
 * accompanied this code. If applicable, add the following below the
20
 * License Header, with the fields enclosed by brackets [] replaced by
21
 * your own identifying information:
22
 * "Portions Copyrighted [year] [name of copyright owner]"
23
 *
24
 * If you wish your version of this file to be governed by only the CDDL
25
 * or only the GPL Version 2, indicate your decision by adding
26
 * "[Contributor] elects to include this software in this distribution
27
 * under the [CDDL or GPL Version 2] license." If you do not indicate a
28
 * single choice of license, a recipient has the option to distribute
29
 * your version of this file under either the CDDL, the GPL Version 2 or
30
 * to extend the choice of license to its licensees as provided above.
31
 * However, if you add GPL Version 2 code and therefore, elected the GPL
32
 * Version 2 license, then the option applies only if the new code is
33
 * made subject to such option by the copyright holder.
34
 *
35
 * Contributor(s):
36
 *
37
 * Portions Copyrighted 2009 Sun Microsystems, Inc.
38
 */
39
40
package org.netbeans.nbbuild;
41
42
import java.io.IOException;
43
import java.net.URI;
44
import java.net.URISyntaxException;
45
import java.text.MessageFormat;
46
import java.util.ArrayList;
47
import java.util.Iterator;
48
import java.util.List;
49
import java.util.regex.Matcher;
50
import java.util.regex.Pattern;
51
import org.apache.tools.ant.Project;
52
53
/**
54
 * Checker for the &lt;object&gt; elements.
55
 * The &lt;object&gt; elements may be used by the HelpSet authors in the HTML
56
 * sources.
57
 * By default the checker is aware about the following classes of the
58
 * &lt;object&gt; elements:
59
 * <ul>
60
 *   <li><code>java:org.netbeans.modules.javahelp.BrowserDisplayer</code>
61
 *       Testable URI value is contained in a &lt;param&gt; element with the
62
 *       name <code>content</name>.
63
 *   </li>
64
 *   <li><code>java:com.sun.java.help.impl.JHSecondaryViewer</code>
65
 *       Testable URI value is contained in a &lt;param&gt; element with the
66
 *       name <code>content</name>.
67
 *   </li>
68
 * </ul>
69
 *
70
 * @version 1.0
71
 *
72
 * @see <a href="http://www.netbeans.org/issues/show_bug.cgi?id=117506">
73
 * Issue #117506</a>
74
 * @see <a href="http://wiki.netbeans.org/JavaHelp_CheckHelpSets">
75
 * JavaHelp - Help Set Checkers</a>
76
 *
77
 * @author Victor G. Vasilyev <vvg@netbeans.org>
78
 *
79
 * TODO:
80
 * - TBD It will be better to move all the help set checkers to the separate
81
 *   package org.netbeans.nbbuild.helpsets
82
 * - Getting a name list of the &lt;object&gt; properties as an input to provide
83
 *   checking of their values for correctness of URLs.
84
 *   The list should be passed via Ant property.
85
 * - Getting a &lt;object&gt; classid list as an input to provide
86
 *   exclusion of this object from checking. Take the list into account in the
87
 *   method check(String content). The list should be passed via Ant property.
88
 */
89
public class HTMLObjectElementsChecker {
90
91
    /**
92
     * The separator used in the identifiers of the &lt;param&gt; elements
93
     * to separate <code>classid</code> from a name of the &lt;param&gt;
94
     * element.
95
     *
96
     * @see #getParamID(java.lang.String, java.lang.String)
97
     */
98
    public static final String PARAM_NAME_SEPARATOR = ".";
99
100
    private static final String ERROR_PREFIX = "HTML <object> element error: ";
101
    
102
    /**
103
     * The pattern for the message:
104
     * "The element without classid attribute."
105
     * where:
106
     * <ul>
107
     *   <li>{0} - a URI string.</li>
108
     *   <li>{1} - a string value of the offset of the buggy text in the HTML
109
     *       file.</li>
110
     *   <li>{2} - a buggy text.</li>
111
     * </ul>
112
     */
113
    private static final String ERROR1 =
114
            "The element without classid attribute. \n" +
115
            "file:\n" +
116
            "{0}\n" +
117
            "offset: {1}\n" +
118
            "text: \n" +
119
            "{2}";
120
121
    /**
122
     * The pattern for the message:
123
     * "The element with unknown classid:"
124
     * where:
125
     * <ul>
126
     *   <li>{0} - a string value of the classid attrute.</li>
127
     *   <li>{1} - a URI string.</li>
128
     *   <li>{2} - a string value of the offset of the buggy text in the HTML
129
     *       file.</li>
130
     *   <li>{3} - a buggy text.</li>
131
     * </ul>
132
     */
133
    private static final String ERROR2 =
134
            "The element with unknown classid: \n" +
135
                      "{0}\n" +
136
                      "file:\n" +
137
                      "{1}\n" +
138
                      "offset: {2}\n" +
139
                      "text: \n" +
140
                      "{3}";
141
142
    /**
143
     * The pattern for the message:
144
     * "The &lt;param&gt; element with unknown name."
145
     * where:
146
     * <ul>
147
     *   <li>{0} - a text of &lt;param&gt; element.</li>
148
     *   <li>{1} - a URI string.</li>
149
     *   <li>{2} - a string value of the offset of the buggy text in the HTML
150
     *       file.</li>
151
     *   <li>{3} - a buggy text.</li>
152
     * </ul>
153
     */
154
    private static final String ERROR3 =
155
            "The <param> element with unknown name. \n" +
156
            "{0}\n" +
157
            "file:\n" +
158
            "{1}\n" +
159
            "offset: {2}\n" +
160
            "text: \n" +
161
            "{3}";
162
163
    /**
164
     * The pattern for the message:
165
     * "The &lt;param&gt; element with unknown value."
166
     * where:
167
     * <ul>
168
     *   <li>{0} - a text of &lt;param&gt; element.</li>
169
     *   <li>{1} - a URI string.</li>
170
     *   <li>{2} - a string value of the offset of the buggy text in the HTML
171
     *       file.</li>
172
     *   <li>{3} - a buggy text.</li>
173
     * </ul>
174
     */
175
    private static final String ERROR4 =
176
            "The <param> element with unknown value. \n" +
177
            "{0}\n" +
178
            "file:\n" +
179
            "{1}\n" +
180
            "offset: {2}\n" +
181
            "text: \n" +
182
            "{3}";
183
184
    /**
185
     * The pattern for the message:
186
     * "The &lt;param&gt; element with unknown value."
187
     * where:
188
     * <ul>
189
     *   <li>{0} - a text of &lt;param&gt; element.</li>
190
     *   <li>{1} - a URI string.</li>
191
     *   <li>{2} - a string value of the offset of the buggy text in the HTML
192
     *       file.</li>
193
     *   <li>{3} - a buggy text.</li>
194
     *   <li>{4} - a URISyntaxException text</li>
195
     * </ul>
196
     */
197
    private static final String ERROR5 =
198
            "The <param> element has incorrect URI value. \n" +
199
            "exception: {4}\n" +
200
            "{0}\n" +
201
            "file:\n" +
202
            "{1}\n" +
203
            "offset: {2}\n" +
204
            "text: \n" +
205
            "{3}\n";
206
207
    private CheckLinks.State state;
208
209
    private List<String> knownClassIDs = new ArrayList<String>();
210
211
    private List<String> knownURIParams = new ArrayList<String>();
212
    {
213
        addTestableURIParam(
214
                "java:org.netbeans.modules.javahelp.BrowserDisplayer.content");
215
        addTestableURIParam(
216
                "java:com.sun.java.help.impl.JHSecondaryViewer.content");
217
    }
218
219
    /**
220
     * Constructs <code>HTMLObjectElementsChecker</code> associated with the
221
     * specified <code>CheckLinks</code> process.
222
     * @param task - The state of the <code>CheckLinks</code> process.
223
     */
224
    public HTMLObjectElementsChecker(CheckLinks.State state) {
225
        this.state = state;
226
    }
227
228
    /**
229
     * Excludes class of the &lt;object&gt; elements from testing by registering
230
     * the specifyed <code>classid</code> as a known classid for this checker.
231
     * @param classid - a value of the <code>classid</code> attribute
232
     * used in the &lt;object&gt; elements of that class.
233
     */
234
    public void excludeObjectClass(String classid) {
235
        knownClassIDs.add(classid);
236
    }
237
238
    /**
239
     * Registers the specified <code>paramid</code> as a URI Param that will be
240
     * tested.
241
     * <br/>
242
     * <pre><code>
243
     * paramid := classid PARAM_NAME_SEPARATOR paramName
244
     * paramName is a value of the name attribute of the param element nested to
245
     * the object element with the classid attribute whose value is used in the
246
     * paramid :-)
247
     * </code></pre>
248
     * @param paramid - The id of the PARAM element associated with the classid
249
     * of the outer OBJECT element.
250
     */
251
    public void addTestableURIParam(String paramid) {
252
        knownURIParams.add(paramid);
253
        int dotPos = paramid.lastIndexOf(PARAM_NAME_SEPARATOR);
254
        String classid = paramid.substring(0, dotPos);
255
        knownClassIDs.add(classid);
256
    }
257
258
    /**
259
     * Checks the specified HTML <code>content</code>.
260
     * The HTML <code>content</code> may contain the &lt;object&gt; elements
261
     * that will be checked.
262
     *
263
     * @param content - The HTML content under test.
264
     */
265
    public void check(String content) {
266
        URI uri = state.u;
267
        antLog("Check HTML <object> elements in " + uri, Project.MSG_VERBOSE);
268
        HTML html = new HTML(content);
269
        // process OBJECT elements:
270
        Iterable<HTML.Object> oi = html.getObjects();
271
        for(HTML.Object e : oi) {
272
            HTML.Attribute a = e.getAttribute("classid");
273
            if(a == null) {
274
                error(ERROR1, uri, e.getLineColumn(), e.getText());
275
                continue;
276
            }
277
            String classid=a.getValue();
278
            if(!knownClassIDs.contains(classid)) {
279
                error(ERROR2, a.getValue(), uri, e.getLineColumn(),
280
                      e.getText());
281
                continue;
282
            }
283
            // TODO: May be check the known <object> against its XML Schema,
284
            // but when the HTML text should be normalized before.
285
286
            // process nested PARAM elements
287
            Iterable<HTML.Object.Param> pi = e.getParams();
288
            for(HTML.Object.Param p : pi) {
289
                // process attributes of the PARAM element:;
290
                HTML.Attribute paName = p.getAttribute("name");
291
                if(paName == null) {
292
                    error(ERROR3, p.getText(), uri, e.getLineColumn(),
293
                          e.getText());
294
                    continue;
295
                }
296
                String paNameValue = paName.getValue();
297
                String paramid = getParamID(classid, paNameValue);
298
                if(!knownURIParams.contains(paramid)) {
299
                    continue;
300
                }
301
                // check URI defined in the param element.
302
                antLog("Check HTML <param> element with URI type: " + paramid,
303
                       Project.MSG_VERBOSE);
304
305
                HTML.Attribute paValue = p.getAttribute("value");
306
                if(paValue == null) {
307
                    error(ERROR4, p.getText(), uri, e.getLineColumn(),
308
                          e.getText());
309
                    continue;
310
                }
311
                String testableURI = paValue.getValue();
312
                CheckLinks.State s = state.clone();
313
                s.recurse = 0;
314
                s.referrer = testableURI;
315
                try {
316
                    s.u = new URI(testableURI);
317
                } catch (URISyntaxException ex) {
318
                    error(ERROR5, p.getText(), uri, e.getLineColumn(),
319
                          e.getText(), ex.toString());
320
                }
321
                try {
322
                    CheckLinks.scan(s);
323
                } catch (IOException ex) {
324
                    ex.printStackTrace();
325
                }
326
            } // for(HTML.Object.Param p : pi)
327
        } // for(HTML.Object e : oi)
328
329
    }
330
331
    private String getParamID(String classid, String paramName) {
332
        return classid + PARAM_NAME_SEPARATOR + paramName;
333
    }
334
335
    /**
336
     * Shows Ant log message.
337
     * @param s - The message string.
338
     * @param level - The level of the message, e.g Project.MSG_VERBOSE
339
     *
340
     * @see org.apache.tools.ant.Project
341
     */
342
    private void antLog(String s, int level) {
343
        state.task.log(s, level);
344
    }
345
346
    /**
347
     * Adds the specified error message to the error list associated with the
348
     * <code>CheckLinks.State</code>. An associated Ant task will show these
349
     * messages at the completion. The <code>ERROR_PREFIX</code> will be added
350
     * before each message.
351
     *
352
     * @param s - The error message.
353
     */
354
    private void error(String s) {
355
        state.errors.add(ERROR_PREFIX + s);
356
    }
357
358
    /**
359
     * Adds the specified error message to the error list associated with the
360
     * <code>CheckLinks.State</code>. An associated Ant task will show these
361
     * messages at the completion.
362
     * <p>The error message is specified by the given <code>pattern</code> and
363
     * uses it to format the given <code>arguments</code>.</p>
364
     * <p>The <code>ERROR_PREFIX</code> will be added before each message.</p>
365
     *
366
     * @param pattern - The pattern of the message.
367
     * @param arguments - The arguments of the message.
368
     */
369
    private void error(String pattern, Object... arguments) {
370
        error(MessageFormat.format(pattern, arguments));
371
    }
372
373
374
    /**
375
     * HTML parser that helps to find &lt;object&gt; elements, nested 
376
     * &lt;param&gt; elements and their attributes in the specified HTML
377
     * source.
378
     * <br/>
379
     * Usage:
380
     * <pre><code>
381
     * HTML html = new HTML(htmlText);
382
     * // process OBJECT elements:
383
     * Iterable<HTML.Object> oi = html.getObjects();
384
     * for(HTML.Object e : oi) {
385
     *     int start=e.getStart(); // start position of the element
386
     *     int end=e.getEnd(); // end position of the element
387
     *     String text = e.getText(); // text of the element
388
     *     // ...
389
     *     // process attributes of the OBJECT element:;
390
     *     Iterable<HTML.Attribute> ai = e.getAttributes();
391
     *     for(HTML.Attribute a : ai) {
392
     *         String name=a.getName(); // name of the attribute
393
     *         String value=a.getValue(); // value of the attribute
394
     *         // ...
395
     *     }
396
     *     // process nested PARAM elements
397
     *     Iterable<HTML.Object.Param> pi = e.getParams();
398
     *     for(HTML.Object.Param p : pi) {
399
     *         String ptext = p.getText(); // text of the element
400
     *         // process attributes of the PARAM element:;
401
     *         Iterable<HTML.Attribute> pai = p.getAttributes();
402
     *         for(HTML.Attribute pa : pai) {
403
     *             String name=pa.getName(); // name of the attribute
404
     *             String value=pa.getValue(); // value of the attribute
405
     *             // ...
406
     *         }
407
     *     }
408
     * }
409
     * </code></pre>
410
     */
411
    public static class HTML {
412
413
        private static Pattern lineBreak =
414
                Pattern.compile("^", Pattern.MULTILINE);
415
416
        /**
417
         * SGML source where <!-- ... --> tokens are valid comment delimiters.
418
         */
419
        protected String source;
420
421
        /**
422
         *
423
         * @param source
424
         */
425
        public HTML(String source) {
426
            this.source = source;
427
        }
428
429
        /**
430
         * Returns an instance of <code>Iterable</code> object for the
431
         * &lt;object&gt; elements contained in the HTML source.
432
         * @return an <code>Iterable</code> object.
433
         */
434
        public Iterable<HTML.Object> getObjects() {
435
            @SuppressWarnings("unchecked") // the cast is correct
436
            final Iterator<HTML.Object> ei = new HTML.Object(this);
437
            return new Iterable<HTML.Object>() {
438
                public Iterator<HTML.Object> iterator() {
439
                    return ei;
440
                }
441
            };
442
        }
443
444
445
        /**
446
         * Abstract HTML construction.
447
         */
448
        public abstract class Construction implements Iterator {
449
450
            /**
451
             * Match flags, a bit mask that may include
452
             * Pattern.CASE_INSENSITIVE, Pattern.MULTILINE, Pattern.DOTALL,
453
             * Pattern.UNICODE_CASE, and Pattern.CANON_EQ
454
             */
455
            private int matchFlags;
456
457
            private String regex;
458
459
            private Matcher m;
460
461
            /**
462
             * Creates instance of the HTML
463
             * @param regex
464
             * @param matchFlags
465
             */
466
            protected Construction(String regex, int matchFlags) {
467
                this.regex = regex;
468
                this.matchFlags = matchFlags;
469
                Pattern p = getPattern();
470
                m = p.matcher(source);
471
            }
472
473
            /**
474
             * Returns <code>Matcher</code> associated with the Construction.
475
             * @return <code>Matcher</code> for the Construction.
476
             */
477
            protected Matcher getMatcher() {
478
                return m;
479
            }
480
481
            /**
482
             * Returns <code>Pattern</code> associated with the Construction.
483
             * @return <code>Pattern</code> for the Construction.
484
             */
485
            public Pattern getPattern() {
486
                return Pattern.compile(regex, matchFlags);
487
            }
488
489
            /**
490
             * Returns text of the <code>Construction</code>.
491
             * @return text of the <code>Construction</code>.
492
             */
493
            public String getText() {
494
                return getMatcher().group();
495
            }
496
497
           /**
498
             * Checks whether this <code>HTML.Element</code> is not commented
499
             * out.
500
             * @return false if comment start is before element, but end is not.
501
             */
502
            protected boolean isCommentedOut() {
503
                int pos = getMatcher().start();
504
                int commentStart = source.lastIndexOf (Comment.START, pos);
505
                int commentEnd = source.lastIndexOf (Comment.END, pos);
506
                if(commentStart == -1) { // i.e. ... elem ...
507
                    return false;
508
                }
509
                if(commentEnd == -1) {
510
                    return true;  // i.e. <!-- ... elem ...
511
                }
512
                if(commentEnd < commentStart) {
513
                    return true;  // i.e. ... --> ... <!-- ... elem ...
514
                }
515
                return false; // i.e. ... <!-- ... --> ... elem ...
516
            }
517
518
            /**
519
             * Returns the start index of the <code>Construction</code>.
520
             *
521
             * @return The index of the first character of the
522
             * <code>Construction</code>.
523
             * @throws <code>IllegalStateException</code> - If no match has yet
524
             * been attempted, or if the previous match operation failed.
525
             */
526
            public int getStart() {
527
                return getMatcher().start();
528
            }
529
530
            /**
531
             * Returns the offset after the last character of the
532
             * <code>Construction</code>.
533
             *
534
             * @return The offset after the last character of the
535
             * <code>Construction</code>.
536
             * @throws <code>IllegalStateException</code> - If no match has yet
537
             * been attempted, or if the previous match operation failed.
538
             */
539
            public int getEnd() {
540
                return getMatcher().end();
541
            }
542
543
            /**
544
             * Return the start offset of the <code>Construction</code> as a
545
             * string in form: <code>Line:Column</code>.
546
             * @return a line:col string.
547
             */
548
            public String getLineColumn() {
549
                Matcher lbm = lineBreak.matcher(source);
550
                int line = 0;
551
                int col = 1;
552
                int pos = getStart();
553
                while (lbm.find()) {
554
                    if (lbm.start() <= pos) {
555
                        line++;
556
                        col = pos - lbm.start() + 1;
557
                    } else {
558
                        break;
559
                    }
560
                }
561
                return line + ":" + col;
562
            }
563
564
            public boolean hasNext() {
565
                while(getMatcher().find()) {
566
                    if(!isCommentedOut()) {
567
                        return true;
568
                    }
569
                }
570
                return false;
571
           }
572
573
            public Construction next() {
574
                return this;
575
            }
576
577
            public void remove() {
578
                throw new UnsupportedOperationException();
579
            }
580
581
        } // Element
582
583
        /**
584
         * Commented block.
585
         */
586
        public class Comment {
587
            /**
588
             * Start token of the HTML commented block.
589
             */
590
            public static final String START = "<!--"; // NOI18N
591
            /**
592
             * End token of the HTML commented block.
593
             */
594
            public static final String END = "-->"; // NOI18N
595
        }
596
597
        /**
598
         * HTML <code>Construction</code> with attributes.
599
         */
600
        public class AttributableConstruction extends Construction {
601
602
            private int attListGroup;
603
604
            /**
605
             * Creates <code>AttributableConstruction</code>.
606
             * @param regex - The regular expression associated with the
607
             * <code>AttributableConstruction</code>.
608
             * @param matchFlags
609
             * @param attListGroup
610
             */
611
            public AttributableConstruction(String regex,
612
                                            int matchFlags, int attListGroup) {
613
                super(regex, matchFlags);
614
                this.attListGroup = attListGroup;
615
            }
616
617
            /**
618
             * Returns an <code>Iterable</code> object for the set of attributes
619
             * associated whit the HTML element.
620
             *
621
             * @return an <code>Iterable</code> object.
622
             */
623
            public Iterable<Attribute> getAttributes() {
624
                int startAttList = getMatcher().start(attListGroup);
625
                int endAttList = getMatcher().end(attListGroup);
626
                @SuppressWarnings("unchecked") // the cast is correct
627
                final Iterator<Attribute> i = new HTML.Attribute(startAttList,
628
                                                      endAttList);
629
                return new Iterable<Attribute>() {
630
                    public Iterator<Attribute> iterator() {
631
                        return i;
632
                    }
633
                };
634
            }
635
636
            /**
637
             * Returns an object attribute with the specified <code>name</code>.
638
             * @param name - The name of the object attribute.
639
             * @return The object attribute if exists, otherwise
640
             * <code>null</code>.
641
             */
642
            public HTML.Attribute getAttribute(String name) {
643
                Iterable<HTML.Attribute> ai = getAttributes();
644
                for(HTML.Attribute a : ai) {
645
                    if(name.equalsIgnoreCase(a.getName())) {
646
                        return a;
647
                    }
648
                }
649
                return null;
650
            }
651
        } // AttributableConstruction
652
653
        /**
654
         * The HTML object element.
655
         */
656
        public class Object extends AttributableConstruction {
657
658
            /**
659
             * Regular expression for the attribute list group.
660
             * @see http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2
661
             */
662
            private static final String ATTLIST = "(.*?)"; // NOI18N
663
            private static final String START = "<object" + ATTLIST+ ">"; // NOI18N
664
            private static final String BODY = "(.*?)"; // NOI18N
665
            private static final String END = "</object>"; // NOI18N
666
            private static final String REGEX = START + BODY + END;
667
            private static final int ATTLIST_GROUP = 1;
668
            private static final int PARAMS_GROUP = 2;
669
            private static final int MATCH_FLAGS = Pattern.CASE_INSENSITIVE|
670
                             Pattern.DOTALL|Pattern.MULTILINE;
671
672
            /**
673
             *
674
             * @param html
675
             */
676
            public Object(HTML html) {
677
                super(REGEX, MATCH_FLAGS, ATTLIST_GROUP);
678
            }
679
            
680
            /**
681
             *
682
             * @return
683
             */
684
            public Iterable<Param> getParams() {
685
                int startParams = getMatcher().start(PARAMS_GROUP);
686
                int endParams = getMatcher().end(PARAMS_GROUP);
687
                @SuppressWarnings("unchecked") // the cast is correct
688
                final Iterator<Param> i = new Param(startParams, endParams);
689
                return new Iterable<Param>() {
690
                    public Iterator<Param> iterator() {
691
                        return i;
692
                    }
693
                };
694
            }
695
696
            /**
697
             *
698
             */
699
            public class Param extends AttributableConstruction {
700
                private static final String REGEX = "<param(.*?)>"; // NOI18N
701
                private static final int ATTLIST_GROUP = 1;
702
                private static final int MATCH_FLAGS = Pattern.CASE_INSENSITIVE|
703
                             Pattern.DOTALL|Pattern.MULTILINE;
704
705
                private Matcher paramMatcher;
706
707
                /**
708
                 *
709
                 * @param start
710
                 * @param end
711
                 */
712
                public Param(int start, int end) {
713
                    super(REGEX, MATCH_FLAGS, ATTLIST_GROUP);
714
                    Matcher parentMatcher = super.getMatcher();
715
                    paramMatcher = parentMatcher.region(start, end);
716
                }
717
718
                /**
719
                 *
720
                 * @return
721
                 */
722
                @Override
723
                public Matcher getMatcher() {
724
                    return paramMatcher;
725
                }
726
                
727
            } // Param
728
729
        } // Object
730
731
        /**
732
         * Attribute of the element.
733
         * This implementation has the following limitations:
734
         * <ul>
735
         * <li> Only default case of the attribute syntax specified by the HTML
736
         *      4.01 Specification is supported:<br/>
737
         *      "By default, SGML requires that all attribute values be
738
         *      delimited using either double quotation marks (ASCII decimal
739
         *      34) or single quotation marks (ASCII decimal 39).<br/>
740
         *      ...<br/>
741
         *      <b>We recommend using quotation marks even when it is possible
742
         *      to eliminate them.</b>"</li>
743
         * <li>DON'T SUPPORTED: "Single quote marks can be included within the
744
         *     attribute value when the value is delimited by double quote
745
         *     marks, and vice versa. Authors may also use numeric character
746
         *     references to represent double quotes (&#34;) and single quotes
747
         *     (&#39;). For double quotes authors can also use the character
748
         *     entity reference &amp;quot;</li>
749
         * <li>DON'T SUPPORTED: In certain cases, authors may specify the value
750
         *     of an attribute without any quotation marks. The attribute value
751
         *     may only contain letters (a-z and A-Z), digits (0-9), hyphens
752
         *     (ASCII decimal 45), periods (ASCII decimal 46), underscores
753
         *     (ASCII decimal 95), and colons (ASCII decimal 58).</li>
754
         * </ul>
755
         *
756
         * @see <a href=http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2>
757
         * HTML 4.01 Specification - 3.2.2 Attributes</a>
758
         */
759
        public class Attribute extends Construction {
760
            private static final String NAME = "(\\w*?)";
761
            private static final String SD = "(\"|')"; // Start delimiter
762
            private static final String VALUE = "(.*?)";
763
            private static final String ED = "\\2"; // reffers to SD group
764
            private static final String REGEX = 
765
                    NAME + "=\\s*?" + SD + VALUE + ED; // NOI18N
766
            private static final int NAME_GROUP = 1;
767
            private static final int VALUE_GROUP = 3;
768
            private static final int MATCH_FLAGS = 
769
                    Pattern.CASE_INSENSITIVE|Pattern.DOTALL; // Not multiline!
770
771
            private Matcher attMatcher;
772
773
            /**
774
             *
775
             * @param start
776
             * @param end
777
             */
778
            public Attribute(int start, int end) {
779
                super(REGEX, MATCH_FLAGS);
780
                Matcher parentMatcher = super.getMatcher();
781
                attMatcher = parentMatcher.region(start, end);
782
            }
783
784
            /**
785
             *
786
             * @return
787
             */
788
            @Override
789
            public Matcher getMatcher() {
790
                return attMatcher;
791
            }
792
793
            /**
794
             *
795
             * @return
796
             */
797
            public String getName() {
798
                return getMatcher().group(NAME_GROUP);
799
            }
800
801
            /**
802
             *
803
             * @return
804
             */
805
            public String getValue() {
806
                return getMatcher().group(VALUE_GROUP);
807
            }
808
       } // Attribute
809
810
    } // HTML
811
812
    /**
813
     * Development time test entry point.
814
     * @param args the command line arguments
815
     */
816
    public static void main(String[] args) {
817
818
//        String testHTML = "<html> \n " +
819
//                "\t<OBJECT CLASSID=\"java:com.sun.java.help.impl.JHSecondaryViewer\" type= 'aaa'> \n" +
820
//                "\t\t<param name=\"content\" value=\"1popup_gloss.html\"> \n" +
821
//                "\t\t<param name=\"viewerActivator\" value=\"javax.help.LinkLabel\">\n" +
822
//                "\t\t<param name=\"viewerStyle\" value=\"javax.help.Popup\">\n" +
823
//                "\t\t<param name=\"viewerSize\" value=\"400,250\">\n" +
824
//                "\t\t<param name=\"text\" value=\"popup windows\">\n" +
825
//                "\t\t<param name=\"textColor\" value=\"blue\">\n" +
826
//                "\t\t<param name=\"viewerName\" value=\"1glossary\">\n" +
827
//                "\t</OBJECT>\n" +
828
//                "\n <a />"+
829
//                "\t<OBJECT CLASSID=\"java:com.sun.java.help.impl.JHSecondaryViewer\" > \n" +
830
//                "\t\t<param name=\"content\" value=\"2popup_gloss.html\" /> \n" +
831
//                "\t\t<param name=\"viewerActivator\" value=\"javax.help.LinkLabel\">\n" +
832
//                "\t\t<param name=\"viewerStyle\" value=\"javax.help.Popup\">\n" +
833
//                "\t\t<param name=\"viewerSize\" value=\"400,250\">\n" +
834
//                "\t\t<param name=\"text\" value=\"popup windows\">\n" +
835
//                "\t\t<param name=\"textColor\" value=\"blue\">\n" +
836
//                "\t\t<param name=\"viewerName\" value=\"2glossary\">\n" +
837
//                "\t</OBJECT>\n" +
838
//                "</html>";
839
//        HTML html = new HTML(testHTML);
840
//
841
//        Iterable<HTML.Object> oi = html.getObjects();
842
//        for(HTML.Object e : oi) {
843
//            System.out.println("start="+e.getStart());
844
//            System.out.println("end="+e.getEnd());
845
//            System.out.println(e.getText());
846
//            System.out.println("Attributes:");
847
//            Iterable<HTML.Attribute> ai = e.getAttributes();
848
//            for(HTML.Attribute a : ai) {
849
//                System.out.println(a.getText());
850
//                System.out.println("name=["+ a.getName() + "] value=[" + a.getValue() + "]" );
851
//            }
852
//            System.out.println("Parameters:");
853
//            Iterable<HTML.Object.Param> pi = e.getParams();
854
//            for(HTML.Object.Param p : pi) {
855
//                System.out.println(p.getText());
856
//                // Attributes of the PARAM element:;
857
//                Iterable<HTML.Attribute> pai = p.getAttributes();
858
//                for(HTML.Attribute pa : pai) {
859
//                    String name=pa.getName(); // name of the attribute
860
//                    String value=pa.getValue(); // value of the attribute
861
//                    System.out.println("name=["+ name + "] value=[" + value + "]" );
862
//                    // ...
863
//                }
864
//            }
865
//        }
866
    }
867
}

Return to bug 117506