1010import com .semmle .js .extractor .ExtractorConfig .SourceType ;
1111import com .semmle .js .parser .ParseError ;
1212import com .semmle .util .data .Option ;
13+ import com .semmle .util .data .Pair ;
1314import com .semmle .util .data .StringUtil ;
1415import com .semmle .util .io .WholeIO ;
1516import com .semmle .util .trap .TrapWriter ;
17+ import com .semmle .util .trap .TrapWriter .Label ;
1618
1719import net .htmlparser .jericho .Attribute ;
1820import net .htmlparser .jericho .Attributes ;
2325
2426/** Extractor for handling HTML and XHTML files. */
2527public class HTMLExtractor implements IExtractor {
28+ private LoCInfo locInfo = new LoCInfo (0 , 0 );
29+
2630 private class JavaScriptHTMLElementHandler implements HtmlPopulator .ElementHandler {
2731 private final ScopeManager scopeManager ;
2832 private final TextualExtractor textualExtractor ;
29- private LoCInfo locInfo ;
30-
33+
3134 public JavaScriptHTMLElementHandler (TextualExtractor textualExtractor ) {
3235 this .textualExtractor = textualExtractor ;
3336
34- this .locInfo = new LoCInfo (0 , 0 );
35-
3637 this .scopeManager =
3738 new ScopeManager (textualExtractor .getTrapwriter (), config .getEcmaVersion ());
3839 }
@@ -42,8 +43,7 @@ public JavaScriptHTMLElementHandler(TextualExtractor textualExtractor) {
4243 * attribute values.
4344 */
4445 @ Override
45- public void handleElement (Element elt ) {
46- LoCInfo snippetLoC = null ;
46+ public void handleElement (Element elt , HtmlPopulator .Context context ) {
4747 if (elt .getName ().equals (HTMLElementName .SCRIPT )) {
4848 SourceType sourceType = getScriptSourceType (elt , textualExtractor .getExtractedFile ());
4949 if (sourceType != null ) {
@@ -72,16 +72,17 @@ public void handleElement(Element elt) {
7272 source = source .replace ("<![CDATA[" , " " ).replace ("]]>" , " " );
7373 if (!source .trim ().isEmpty ()) {
7474 RowColumnVector contentStart = content .getRowColumnVector ();
75- snippetLoC =
76- extractSnippet (
77- TopLevelKind .inlineScript ,
78- config .withSourceType (sourceType ),
79- scopeManager ,
80- textualExtractor ,
81- source ,
82- contentStart .getRow (),
83- contentStart .getColumn (),
84- isTypeScript );
75+ extractSnippet (
76+ TopLevelKind .inlineScript ,
77+ config .withSourceType (sourceType ),
78+ scopeManager ,
79+ textualExtractor ,
80+ source ,
81+ contentStart .getRow (),
82+ contentStart .getColumn (),
83+ isTypeScript ,
84+ elt ,
85+ context );
8586 }
8687 }
8788 } else {
@@ -95,16 +96,17 @@ public void handleElement(Element elt) {
9596 String source = attr .getValue ();
9697 RowColumnVector valueStart = attr .getValueSegment ().getRowColumnVector ();
9798 if (JS_ATTRIBUTE .matcher (attr .getName ()).matches ()) {
98- snippetLoC =
99- extractSnippet (
100- TopLevelKind .eventHandler ,
101- config ,
102- scopeManager ,
103- textualExtractor ,
104- source ,
105- valueStart .getRow (),
106- valueStart .getColumn (),
107- false /* isTypeScript */ );
99+ extractSnippet (
100+ TopLevelKind .eventHandler ,
101+ config ,
102+ scopeManager ,
103+ textualExtractor ,
104+ source ,
105+ valueStart .getRow (),
106+ valueStart .getColumn (),
107+ false /* isTypeScript */ ,
108+ attr ,
109+ context );
108110 } else if (isAngularTemplateAttributeName (attr .getName ())) {
109111 // For an attribute *ngFor="let var of EXPR", start parsing at EXPR
110112 int offset = 0 ;
@@ -116,37 +118,33 @@ public void handleElement(Element elt) {
116118 source = expr ;
117119 }
118120 }
119- snippetLoC =
120- extractSnippet (
121- TopLevelKind .eventHandler ,
122- config .withSourceType (SourceType .ANGULAR_TEMPLATE ),
123- scopeManager ,
124- textualExtractor ,
125- source ,
126- valueStart .getRow (),
127- valueStart .getColumn () + offset ,
128- false /* isTypeScript */ );
121+ extractSnippet (
122+ TopLevelKind .eventHandler ,
123+ config .withSourceType (SourceType .ANGULAR_TEMPLATE ),
124+ scopeManager ,
125+ textualExtractor ,
126+ source ,
127+ valueStart .getRow (),
128+ valueStart .getColumn () + offset ,
129+ false /* isTypeScript */ ,
130+ attr ,
131+ context );
129132 } else if (source .startsWith ("javascript:" )) {
130133 source = source .substring (11 );
131- snippetLoC =
132- extractSnippet (
133- TopLevelKind .javascriptUrl ,
134- config ,
135- scopeManager ,
136- textualExtractor ,
137- source ,
138- valueStart .getRow (),
139- valueStart .getColumn () + 11 ,
140- false /* isTypeScript */ );
134+ extractSnippet (
135+ TopLevelKind .javascriptUrl ,
136+ config ,
137+ scopeManager ,
138+ textualExtractor ,
139+ source ,
140+ valueStart .getRow (),
141+ valueStart .getColumn () + 11 ,
142+ false /* isTypeScript */ ,
143+ attr ,
144+ context );
141145 }
142146 }
143147 }
144-
145- if (snippetLoC != null ) locInfo .add (snippetLoC );
146- }
147-
148- public LoCInfo getLoCInfo () {
149- return this .locInfo ;
150148 }
151149 }
152150
@@ -202,7 +200,7 @@ public LoCInfo extract(TextualExtractor textualExtractor) {
202200
203201 extractor .doit (Option .some (eltHandler ));
204202
205- return eltHandler . getLoCInfo () ;
203+ return locInfo ;
206204 }
207205
208206 /**
@@ -270,18 +268,26 @@ private String getAttributeValueLC(Element elt, String attr) {
270268 return val == null ? val : StringUtil .lc (val );
271269 }
272270
273- private LoCInfo extractSnippet (
271+ private void extractSnippet (
274272 TopLevelKind toplevelKind ,
275273 ExtractorConfig config ,
276274 ScopeManager scopeManager ,
277275 TextualExtractor textualExtractor ,
278276 String source ,
279277 int line ,
280278 int column ,
281- boolean isTypeScript ) {
279+ boolean isTypeScript ,
280+ Segment parentHtmlNode ,
281+ HtmlPopulator .Context context ) {
282+ TrapWriter trapWriter = textualExtractor .getTrapwriter ();
283+ LocationManager locationManager = textualExtractor .getLocationManager ();
284+ LocationManager scriptLocationManager =
285+ new LocationManager (
286+ locationManager .getSourceFile (), trapWriter , locationManager .getFileLabel ());
287+ scriptLocationManager .setStart (line , column );
282288 if (isTypeScript ) {
283289 if (isEmbedded ) {
284- return null ; // Do not extract files from HTML embedded in other files.
290+ return ; // Do not extract files from HTML embedded in other files.
285291 }
286292 Path file = textualExtractor .getExtractedFile ().toPath ();
287293 FileSnippet snippet =
@@ -302,28 +308,36 @@ private LoCInfo extractSnippet(
302308 }
303309 state .getSnippets ().put (virtualFile , snippet );
304310 }
305- return null ; // LoC info is accounted for later
311+ Label topLevelLabel = ASTExtractor .makeTopLevelLabel (
312+ textualExtractor .getTrapwriter (),
313+ scriptLocationManager .getFileLabel (),
314+ scriptLocationManager .getStartLine (),
315+ scriptLocationManager .getStartColumn ());
316+ emitTopLevelXmlNodeBinding (parentHtmlNode , topLevelLabel , context , trapWriter );
317+ // Note: LoC info is accounted for later, so not added here.
318+ return ;
306319 }
307- TrapWriter trapwriter = textualExtractor .getTrapwriter ();
308- LocationManager locationManager = textualExtractor .getLocationManager ();
309- LocationManager scriptLocationManager =
310- new LocationManager (
311- locationManager .getSourceFile (), trapwriter , locationManager .getFileLabel ());
312- scriptLocationManager .setStart (line , column );
313320 JSExtractor extractor = new JSExtractor (config );
314321 try {
315322 TextualExtractor tx =
316323 new TextualExtractor (
317- trapwriter ,
324+ trapWriter ,
318325 scriptLocationManager ,
319326 source ,
320327 config .getExtractLines (),
321328 textualExtractor .getMetrics (),
322329 textualExtractor .getExtractedFile ());
323- return extractor .extract (tx , source , toplevelKind , scopeManager ).snd ();
330+ Pair <Label , LoCInfo > result = extractor .extract (tx , source , toplevelKind , scopeManager );
331+ emitTopLevelXmlNodeBinding (parentHtmlNode , result .fst (), context , trapWriter );
332+ locInfo .add (result .snd ());
324333 } catch (ParseError e ) {
325334 e .setPosition (scriptLocationManager .translatePosition (e .getPosition ()));
326335 throw e .asUserError ();
327336 }
328337 }
338+
339+ private void emitTopLevelXmlNodeBinding (Segment parentHtmlNode , Label topLevelLabel , HtmlPopulator .Context context , TrapWriter writer ) {
340+ Label htmlNodeLabel = context .getNodeLabel (parentHtmlNode );
341+ writer .addTuple ("toplevel_parent_xml_node" , topLevelLabel , htmlNodeLabel );
342+ }
329343}
0 commit comments