Hubbub
in_foreign_content.c
Go to the documentation of this file.
1 /*
2  * This file is part of Hubbub.
3  * Licensed under the MIT License,
4  * http://www.opensource.org/licenses/mit-license.php
5  * Copyright 2008 Andrew Sidwell <takkaria@netsurf-browser.org>
6  */
7 
8 #include <assert.h>
9 #include <string.h>
10 
11 #include "treebuilder/modes.h"
12 #include "treebuilder/internal.h"
14 #include "utils/utils.h"
15 #include "utils/string.h"
16 
17 
18 /*** Attribute-correction stuff ***/
19 
20 #define S(s) s, SLEN(s)
21 
25 typedef struct
26 {
27  const char *attr;
28  size_t len;
29  const char *proper;
30 } case_changes;
31 
32 static const case_changes svg_attributes[] = {
33  { S("attributename"), "attributeName" },
34  { S("attributetype"), "attributeType" },
35  { S("basefrequency"), "baseFrequency" },
36  { S("baseprofile"), "baseProfile" },
37  { S("calcmode"), "calcMode" },
38  { S("clippathunits"), "clipPathUnits" },
39  { S("contentscripttype"), "contentScriptType" },
40  { S("contentstyletype"), "contentStyleType" },
41  { S("diffuseconstant"), "diffuseConstant" },
42  { S("edgemode"), "edgeMode" },
43  { S("externalresourcesrequired"), "externalResourcesRequired" },
44  { S("filterres"), "filterRes" },
45  { S("filterunits"), "filterUnits" },
46  { S("glyphref"), "glyphRef" },
47  { S("gradienttransform"), "gradientTransform" },
48  { S("gradientunits"), "gradientUnits" },
49  { S("kernelmatrix"), "kernelMatrix" },
50  { S("kernelunitlength"), "kernelUnitLength" },
51  { S("keypoints"), "keyPoints" },
52  { S("keysplines"), "keySplines" },
53  { S("keytimes"), "keyTimes" },
54  { S("lengthadjust"), "lengthAdjust" },
55  { S("limitingconeangle"), "limitingConeAngle" },
56  { S("markerheight"), "markerHeight" },
57  { S("markerunits"), "markerUnits" },
58  { S("markerwidth"), "markerWidth" },
59  { S("maskcontentunits"), "maskContentUnits" },
60  { S("maskunits"), "maskUnits" },
61  { S("numoctaves"), "numOctaves" },
62  { S("pathlength"), "pathLength" },
63  { S("patterncontentunits"), "patternContentUnits" },
64  { S("patterntransform"), "patternTransform" },
65  { S("patternunits"), "patternUnits" },
66  { S("pointsatx"), "pointsAtX" },
67  { S("pointsaty"), "pointsAtY" },
68  { S("pointsatz"), "pointsAtZ" },
69  { S("preservealpha"), "preserveAlpha" },
70  { S("preserveaspectratio"), "preserveAspectRatio" },
71  { S("primitiveunits"), "primitiveUnits" },
72  { S("refx"), "refX" },
73  { S("refy"), "refY" },
74  { S("repeatcount"), "repeatCount" },
75  { S("repeatdur"), "repeatDur" },
76  { S("requiredextensions"), "requiredExtensions" },
77  { S("requiredfeatures"), "requiredFeatures" },
78  { S("specularconstant"), "specularConstant" },
79  { S("specularexponent"), "specularExponent" },
80  { S("spreadmethod"), "spreadMethod" },
81  { S("startoffset"), "startOffset" },
82  { S("stddeviation"), "stdDeviation" },
83  { S("stitchtiles"), "stitchTiles" },
84  { S("surfacescale"), "surfaceScale" },
85  { S("systemlanguage"), "systemLanguage" },
86  { S("tablevalues"), "tableValues" },
87  { S("targetx"), "targetX" },
88  { S("targety"), "targetY" },
89  { S("textlength"), "textLength" },
90  { S("viewbox"), "viewBox" },
91  { S("viewtarget"), "viewTarget" },
92  { S("xchannelselector"), "xChannelSelector" },
93  { S("ychannelselector"), "yChannelSelector" },
94  { S("zoomandpan"), "zoomAndPan" },
95 };
96 
97 static const case_changes svg_tagnames[] = {
98  { S("altglyph"), "altGlyph" },
99  { S("altglyphdef"), "altGlyphDef" },
100  { S("altglyphitem"), "altGlyphItem" },
101  { S("animatecolor"), "animateColor" },
102  { S("animatemotion"), "animateMotion" },
103  { S("animatetransform"), "animateTransform" },
104  { S("clippath"), "clipPath" },
105  { S("feblend"), "feBlend" },
106  { S("fecolormatrix"), "feColorMatrix" },
107  { S("fecomponenttransfer"), "feComponentTransfer" },
108  { S("fecomposite"), "feComposite" },
109  { S("feconvolvematrix"), "feConvolveMatrix" },
110  { S("fediffuselighting"), "feDiffuseLighting" },
111  { S("fedisplacementmap"), "feDisplacementMap" },
112  { S("fedistantlight"), "feDistantLight" },
113  { S("feflood"), "feFlood" },
114  { S("fefunca"), "feFuncA" },
115  { S("fefuncb"), "feFuncB" },
116  { S("fefuncg"), "feFuncG" },
117  { S("fefuncr"), "feFuncR" },
118  { S("fegaussianblur"), "feGaussianBlur" },
119  { S("feimage"), "feImage" },
120  { S("femerge"), "feMerge" },
121  { S("femergenode"), "feMergeNode" },
122  { S("femorphology"), "feMorphology" },
123  { S("feoffset"), "feOffset" },
124  { S("fepointlight"), "fePointLight" },
125  { S("fespecularlighting"), "feSpecularLighting" },
126  { S("fespotlight"), "feSpotLight" },
127  { S("fetile"), "feTile" },
128  { S("feturbulence"), "feTurbulence" },
129  { S("foreignobject"), "foreignObject" },
130  { S("glyphref"), "glyphRef" },
131  { S("lineargradient"), "linearGradient" },
132  { S("radialgradient"), "radialGradient" },
133  { S("textpath"), "textPath" },
134 };
135 
136 #undef S
137 
145  hubbub_tag *tag)
146 {
147  size_t i;
148  UNUSED(treebuilder);
149 
150  for (i = 0; i < tag->n_attributes; i++) {
151  hubbub_attribute *attr = &tag->attributes[i];
152  const uint8_t *name = attr->name.ptr;
153  size_t len = attr->name.len;
154 
155  if (hubbub_string_match(name, len,
156  (const uint8_t *) "definitionurl",
157  SLEN("definitionurl"))) {
158  attr->name.ptr = (uint8_t *) "definitionURL";
159  }
160  }
161 }
162 
170  hubbub_tag *tag)
171 {
172  size_t i;
173  UNUSED(treebuilder);
174 
175  for (i = 0; i < tag->n_attributes; i++) {
176  hubbub_attribute *attr = &tag->attributes[i];
177 
178  const uint8_t *name = attr->name.ptr;
179  size_t len = attr->name.len;
180  size_t j;
181 
182  for (j = 0; j < N_ELEMENTS(svg_attributes); j++) {
183  if (hubbub_string_match(name, len,
184  (uint8_t *) svg_attributes[j].attr,
185  svg_attributes[j].len)) {
186  attr->name.ptr =
187  (uint8_t *) svg_attributes[j].proper;
188  }
189  }
190  }
191 }
192 
200  hubbub_tag *tag)
201 {
202  const uint8_t *name = tag->name.ptr;
203  size_t len = tag->name.len;
204  size_t i;
205 
206  UNUSED(treebuilder);
207 
208  for (i = 0; i < N_ELEMENTS(svg_tagnames); i++) {
209  if (hubbub_string_match(name, len,
210  (uint8_t *) svg_tagnames[i].attr,
211  svg_tagnames[i].len)) {
212  tag->name.ptr = (uint8_t *) svg_tagnames[i].proper;
213  }
214  }
215 }
216 
217 
218 
219 #define S(s) (uint8_t *) s, SLEN(s)
220 
228  hubbub_tag *tag)
229 {
230  size_t i;
231  UNUSED(treebuilder);
232 
233  for (i = 0; i < tag->n_attributes; i++) {
234  hubbub_attribute *attr = &tag->attributes[i];
235  const uint8_t *name = attr->name.ptr;
236 
237  /* 10 == strlen("xlink:href") */
238  if (attr->name.len >= 10 &&
239  strncmp((char *) name, "xlink:",
240  SLEN("xlink:")) == 0) {
241  size_t len = attr->name.len - 6;
242  name += 6;
243 
244  if (hubbub_string_match(name, len, S("actuate")) ||
245  hubbub_string_match(name, len,
246  S("arcrole")) ||
247  hubbub_string_match(name, len,
248  S("href")) ||
249  hubbub_string_match(name, len,
250  S("role")) ||
251  hubbub_string_match(name, len,
252  S("show")) ||
253  hubbub_string_match(name, len,
254  S("title")) ||
255  hubbub_string_match(name, len,
256  S("type"))) {
257  attr->ns = HUBBUB_NS_XLINK;
258  attr->name.ptr += 6;
259  attr->name.len -= 6;
260  }
261  /* 8 == strlen("xml:base") */
262  } else if (attr->name.len >= 8 &&
263  strncmp((char *) name, "xml:",
264  SLEN("xml:")) == 0) {
265  size_t len = attr->name.len - 4;
266  name += 4;
267 
268  if (hubbub_string_match(name, len, S("base")) ||
269  hubbub_string_match(name, len,
270  S("lang")) ||
271  hubbub_string_match(name, len,
272  S("space"))) {
273  attr->ns = HUBBUB_NS_XML;
274  attr->name.ptr += 4;
275  attr->name.len -= 4;
276  }
277  } else if (hubbub_string_match(name, attr->name.len,
278  S("xmlns"))) {
279  attr->ns = HUBBUB_NS_XMLNS;
280  } else if (hubbub_string_match(name, attr->name.len,
281  S("xmlns:xlink"))) {
282  attr->ns = HUBBUB_NS_XMLNS;
283  attr->name.ptr += 6;
284  attr->name.len -= 6;
285  }
286 
287  }
288 }
289 
290 #undef S
291 
292 
293 
294 /*** Foreign content insertion mode ***/
295 
296 
302 {
303  element_context *stack = treebuilder->context.element_stack;
304  uint32_t node;
305 
306  assert((signed) treebuilder->context.current_node >= 0);
307 
308  for (node = treebuilder->context.current_node; node > 0; node--) {
309  element_type node_type = stack[node].type;
310 
311  /* The list of element types given in the spec here are the
312  * scoping elements excluding TABLE and HTML. TABLE is handled
313  * in the previous conditional and HTML should only occur
314  * as the first node in the stack, which is never processed
315  * in this loop. */
316  if (node_type == TABLE || is_scoping_element(node_type))
317  break;
318 
319  if (stack[node].ns != HUBBUB_NS_HTML)
320  return true;
321  }
322 
323  return false;
324 }
325 
330  const hubbub_token *token)
331 {
332  hubbub_error err;
333 
334  /* Because we don't support calling insertion modes directly,
335  * instead we set the current mode to the secondary mode,
336  * call the token handler, and then reset the mode afterward
337  * as long as it's unchanged, as this has the same effect */
338 
339  treebuilder->context.mode = treebuilder->context.second_mode;
340 
341  err = hubbub_treebuilder_token_handler(token, treebuilder);
342  if (err != HUBBUB_OK) {
343  treebuilder->context.mode = IN_FOREIGN_CONTENT;
344  return err;
345  }
346 
347  if (treebuilder->context.mode == treebuilder->context.second_mode)
348  treebuilder->context.mode = IN_FOREIGN_CONTENT;
349 
350  if (treebuilder->context.mode == IN_FOREIGN_CONTENT &&
351  !element_in_scope_in_non_html_ns(treebuilder)) {
352  treebuilder->context.mode = treebuilder->context.second_mode;
353  }
354 
355  return HUBBUB_OK;
356 }
357 
361 static void foreign_break_out(hubbub_treebuilder *treebuilder)
362 {
363  element_context *stack = treebuilder->context.element_stack;
364 
367  while (stack[treebuilder->context.current_node].ns !=
368  HUBBUB_NS_HTML) {
369  hubbub_ns ns;
371  void *node;
372 
373  element_stack_pop(treebuilder, &ns, &type, &node);
374 
375  treebuilder->tree_handler->unref_node(
376  treebuilder->tree_handler->ctx,
377  node);
378  }
379 
380  treebuilder->context.mode = treebuilder->context.second_mode;
381 }
382 
391  const hubbub_token *token)
392 {
393  hubbub_error err = HUBBUB_OK;
394 
395  switch (token->type) {
397  err = append_text(treebuilder, &token->data.character);
398  break;
400  err = process_comment_append(treebuilder, token,
401  treebuilder->context.element_stack[
402  treebuilder->context.current_node].node);
403  break;
406  break;
408  {
409  hubbub_ns cur_node_ns = treebuilder->context.element_stack[
410  treebuilder->context.current_node].ns;
411 
412  element_type cur_node = current_node(treebuilder);
414  &token->data.tag.name);
415 
416  if (cur_node_ns == HUBBUB_NS_HTML ||
417  (cur_node_ns == HUBBUB_NS_MATHML &&
418  (type != MGLYPH && type != MALIGNMARK) &&
419  (cur_node == MI || cur_node == MO ||
420  cur_node == MN || cur_node == MS ||
421  cur_node == MTEXT)) ||
422  (type == SVG && (cur_node_ns == HUBBUB_NS_MATHML &&
423  cur_node == ANNOTATION_XML)) ||
424  (cur_node_ns == HUBBUB_NS_SVG &&
425  (cur_node == FOREIGNOBJECT ||
426  cur_node == DESC ||
427  cur_node == TITLE))) {
428  err = process_as_in_secondary(treebuilder, token);
429  } else if (type == B || type == BIG || type == BLOCKQUOTE ||
430  type == BODY || type == BR || type == CENTER ||
431  type == CODE || type == DD || type == DIV ||
432  type == DL || type == DT || type == EM ||
433  type == EMBED || type == H1 || type == H2 ||
434  type == H3 || type == H4 || type == H5 ||
435  type == H6 || type == HEAD || type == HR ||
436  type == I || type == IMG || type == LI ||
437  type == LISTING || type == MENU ||
438  type == META || type == NOBR || type == OL ||
439  type == P || type == PRE || type == RUBY ||
440  type == S || type == SMALL || type == SPAN ||
441  type == STRONG || type == STRIKE ||
442  type == SUB || type == SUP || type == TABLE ||
443  type == TT || type == U || type == UL ||
444  type == VAR) {
445  foreign_break_out(treebuilder);
446  err = HUBBUB_REPROCESS;
447  } else if (type == FONT) {
448  const hubbub_tag *tag = &token->data.tag;
449  size_t i;
450 
451  for (i = 0; i < tag->n_attributes; i++) {
452  hubbub_attribute *attr = &tag->attributes[i];
453  const uint8_t *name = attr->name.ptr;
454  size_t len = attr->name.len;
455 
456  if (hubbub_string_match(name, len,
457  (const uint8_t *) "color",
458  SLEN("color")) ||
459  hubbub_string_match(name, len,
460  (const uint8_t *) "face",
461  SLEN("face")) ||
462  hubbub_string_match(name, len,
463  (const uint8_t *) "size",
464  SLEN("size")))
465  break;
466  }
467 
468  if (i != tag->n_attributes) {
469  foreign_break_out(treebuilder);
470  err = HUBBUB_REPROCESS;
471  }
472  } else {
473  hubbub_tag tag = token->data.tag;
474 
475  adjust_foreign_attributes(treebuilder, &tag);
476 
477  if (cur_node_ns == HUBBUB_NS_SVG) {
478  adjust_svg_tagname(treebuilder, &tag);
479  adjust_svg_attributes(treebuilder, &tag);
480  }
481 
482  /* Set to the right namespace and insert */
483  tag.ns = cur_node_ns;
484 
485  if (token->data.tag.self_closing) {
486  err = insert_element(treebuilder, &tag, false);
488  } else {
489  err = insert_element(treebuilder, &tag, true);
490  }
491  }
492  }
493  break;
495  err = process_as_in_secondary(treebuilder, token);
496  break;
497  case HUBBUB_TOKEN_EOF:
498  foreign_break_out(treebuilder);
499  err = HUBBUB_REPROCESS;
500  break;
501  }
502 
503  return err;
504 }
#define SLEN(s)
Definition: utils.h:34
hubbub_ns ns
Element namespace.
Definition: internal.h:44
static hubbub_error process_as_in_secondary(hubbub_treebuilder *treebuilder, const hubbub_token *token)
Process a token as if in the secondary insertion mode.
element_type current_node(hubbub_treebuilder *treebuilder)
Peek at the top element of the element stack.
Definition: treebuilder.c:1258
hubbub_error process_comment_append(hubbub_treebuilder *treebuilder, const hubbub_token *token, void *parent)
Process a comment token, appending it to the given parent.
Definition: treebuilder.c:420
Definition: internal.h:18
Definition: internal.h:32
hubbub_token_type type
The token type.
Definition: types.h:120
Definition: internal.h:20
Definition: internal.h:20
void * ctx
Context pointer.
Definition: tree.h:292
hubbub_ns ns
Tag namespace.
Definition: types.h:109
Data for a tag.
Definition: types.h:108
Token data.
Definition: types.h:119
const char * attr
Lower case attribute name.
bool is_scoping_element(element_type type)
Determine if a node is a scoping element.
Definition: treebuilder.c:1026
hubbub_string name
Tag name.
Definition: types.h:110
void adjust_foreign_attributes(hubbub_treebuilder *treebuilder, hubbub_tag *tag)
Adjust foreign attributes.
Mapping table for case changes.
hubbub_tree_handler * tree_handler
Callback table.
Definition: internal.h:122
const uint8_t * ptr
Pointer to data.
Definition: types.h:77
element_type
Definition: internal.h:13
Definition: internal.h:17
Definition: internal.h:27
Definition: internal.h:17
Definition: internal.h:18
Definition: internal.h:25
#define UNUSED(x)
Definition: utils.h:38
hubbub_string name
Attribute name.
Definition: types.h:86
Definition: internal.h:30
insertion_mode mode
The current insertion mode.
Definition: internal.h:75
Definition: internal.h:27
Definition: internal.h:19
size_t len
Byte length of string.
Definition: types.h:78
Definition: internal.h:22
Definition: internal.h:23
static const case_changes svg_attributes[]
bool self_closing
Whether the tag can have children.
Definition: types.h:113
Definition: internal.h:27
Definition: internal.h:19
#define N_ELEMENTS(x)
Definition: utils.h:42
Definition: internal.h:30
Definition: internal.h:20
Definition: internal.h:19
const char * name
Definition: initial.c:22
Definition: internal.h:34
hubbub_treebuilder_context context
Our context.
Definition: internal.h:120
Definition: internal.h:21
Definition: internal.h:21
Definition: internal.h:19
Definition: internal.h:18
hubbub_error insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag_name, bool push)
Create element and insert it into the DOM, potentially pushing it on the stack.
Definition: treebuilder.c:751
hubbub_attribute * attributes
Array of attribute data.
Definition: types.h:112
hubbub_error hubbub_treebuilder_token_handler(const hubbub_token *token, void *pw)
Handle tokeniser emitting a token.
Definition: treebuilder.c:264
hubbub_error
Definition: errors.h:18
void * node
Node pointer.
Definition: internal.h:54
static const case_changes svg_tagnames[]
hubbub_error append_text(hubbub_treebuilder *treebuilder, const hubbub_string *string)
Append text to the current node, inserting into the last child of the current node, iff it's a Text node.
Definition: treebuilder.c:943
void adjust_svg_tagname(hubbub_treebuilder *treebuilder, hubbub_tag *tag)
Adjust SVG tagnmes.
Item on the element stack.
Definition: internal.h:42
hubbub_string character
Definition: types.h:129
size_t len
Length of name in bytes.
Definition: internal.h:32
hubbub_tree_unref_node unref_node
Unreference node.
Definition: tree.h:279
element_type type
Definition: treebuilder.c:25
No error.
Definition: errors.h:19
size_t len
Definition: initial.c:23
Definition: internal.h:27
hubbub_tag tag
Definition: types.h:125
Definition: internal.h:19
Definition: internal.h:27
Definition: internal.h:19
Tag attribute data.
Definition: types.h:84
Definition: internal.h:30
Definition: internal.h:19
Definition: internal.h:30
Definition: internal.h:19
hubbub_ns
Possible namespaces.
Definition: types.h:63
union hubbub_token::@3 data
Type-specific data.
hubbub_ns ns
Attribute namespace.
Definition: types.h:85
Definition: internal.h:27
Definition: internal.h:27
Definition: internal.h:32
insertion_mode second_mode
The secondary insertion mode.
Definition: internal.h:76
Definition: internal.h:32
const char * proper
Correctly cased version.
void adjust_mathml_attributes(hubbub_treebuilder *treebuilder, hubbub_tag *tag)
Adjust MathML attributes.
void adjust_svg_attributes(hubbub_treebuilder *treebuilder, hubbub_tag *tag)
Adjust SVG attributes.
#define S(s)
Definition: internal.h:27
Definition: internal.h:21
Definition: internal.h:27
element_type type
Element type.
Definition: internal.h:45
Definition: internal.h:17
static bool element_in_scope_in_non_html_ns(hubbub_treebuilder *treebuilder)
Returns true iff there is an element in scope that has a namespace other than the HTML namespace...
Definition: internal.h:30
bool hubbub_string_match(const uint8_t *a, size_t a_len, const uint8_t *b, size_t b_len)
Check that one string is exactly equal to another.
Definition: string.c:23
hubbub_error handle_in_foreign_content(hubbub_treebuilder *treebuilder, const hubbub_token *token)
Handle tokens in "in foreign content" insertion mode.
hubbub_error element_stack_pop(hubbub_treebuilder *treebuilder, hubbub_ns *ns, element_type *type, void **node)
Pop an element off the stack of open elements.
Definition: treebuilder.c:1112
element_type element_type_from_name(hubbub_treebuilder *treebuilder, const hubbub_string *tag_name)
Convert an element name into an element type.
Definition: treebuilder.c:986
static void foreign_break_out(hubbub_treebuilder *treebuilder)
Break out of foreign content as a result of certain start tags or EOF.
Treebuilder object.
Definition: internal.h:116
Definition: internal.h:35
Definition: internal.h:27
element_context * element_stack
Stack of open elements.
Definition: internal.h:79
Definition: internal.h:18
uint32_t n_attributes
Count of attributes.
Definition: types.h:111
uint32_t current_node
Index of current node in stack.
Definition: internal.h:81
Definition: internal.h:19
Definition: internal.h:32