1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.javascript.regexp;
16
17 import java.util.ArrayList;
18 import java.util.HashMap;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.regex.MatchResult;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24 import java.util.regex.PatternSyntaxException;
25
26 import org.apache.commons.lang3.StringUtils;
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.htmlunit.NotYetImplementedException;
30 import org.htmlunit.corejs.javascript.Context;
31 import org.htmlunit.corejs.javascript.RegExpProxy;
32 import org.htmlunit.corejs.javascript.ScriptRuntime;
33 import org.htmlunit.corejs.javascript.Scriptable;
34 import org.htmlunit.corejs.javascript.regexp.NativeRegExp;
35 import org.htmlunit.corejs.javascript.regexp.RegExpImpl;
36 import org.htmlunit.corejs.javascript.regexp.SubString;
37 import org.htmlunit.javascript.JavaScriptEngine;
38
39
40
41
42
43
44
45
46
47 public class HtmlUnitRegExpProxy extends RegExpImpl {
48
49 private static final Log LOG = LogFactory.getLog(HtmlUnitRegExpProxy.class);
50
51 private static final Map<String, Pattern> PATTENS = new HashMap<>();
52
53 private final RegExpProxy wrapped_;
54
55
56
57
58
59 public HtmlUnitRegExpProxy(final RegExpProxy wrapped) {
60 super();
61 wrapped_ = wrapped;
62 }
63
64
65
66
67
68 @Override
69 public Object action(final Context cx, final Scriptable scope, final Scriptable thisObj,
70 final Object[] args, final int actionType) {
71 try {
72 return doAction(cx, scope, thisObj, args, actionType);
73 }
74 catch (final RegExStickyNotSupportedException e) {
75 if (LOG.isWarnEnabled()) {
76 LOG.warn(e.getMessage(), e);
77 }
78 return wrapped_.action(cx, scope, thisObj, args, actionType);
79 }
80 catch (final StackOverflowError e) {
81
82
83 if (LOG.isWarnEnabled()) {
84 LOG.warn(e.getMessage(), e);
85 }
86 return wrapped_.action(cx, scope, thisObj, args, actionType);
87 }
88 }
89
90 private Object doAction(final Context cx, final Scriptable scope, final Scriptable thisObj,
91 final Object[] args, final int actionType) {
92
93 if ((RA_REPLACE == actionType || RA_REPLACE_ALL == actionType)
94 && args.length == 2 && args[1] instanceof String) {
95 final String thisString = JavaScriptEngine.toString(thisObj);
96 final String replacement = (String) args[1];
97 final Object arg0 = args[0];
98 if (arg0 instanceof String) {
99
100 return doStringReplacement(thisString, (String) arg0, replacement, RA_REPLACE_ALL == actionType);
101 }
102
103 if (arg0 instanceof NativeRegExp) {
104 try {
105 final NativeRegExp regexp = (NativeRegExp) arg0;
106
107 if (RA_REPLACE_ALL == actionType
108 && (regexp.getFlags() & NativeRegExp.JSREG_GLOB) == 0) {
109 throw ScriptRuntime.typeError(
110 "replaceAll must be called with a global RegExp");
111 }
112
113 final RegExpData reData = new RegExpData(regexp);
114 final Matcher matcher = reData.getPattern().matcher(thisString);
115 return doReplacement(thisString, replacement, matcher,
116 reData.isGlobal() || RA_REPLACE_ALL == actionType);
117 }
118 catch (final PatternSyntaxException e) {
119 if (LOG.isWarnEnabled()) {
120 LOG.warn(e.getMessage(), e);
121 }
122 }
123 }
124 }
125 else if (RA_MATCH == actionType || RA_SEARCH == actionType) {
126 if (args.length == 0) {
127 return null;
128 }
129 final Object arg0 = args[0];
130 final String thisString = JavaScriptEngine.toString(thisObj);
131 final RegExpData reData;
132 if (arg0 instanceof NativeRegExp) {
133 reData = new RegExpData((NativeRegExp) arg0);
134 }
135 else {
136 reData = new RegExpData(JavaScriptEngine.toString(arg0));
137 }
138
139 final Matcher matcher = reData.getPattern().matcher(thisString);
140
141 final boolean found = matcher.find();
142 if (RA_SEARCH == actionType) {
143 if (found) {
144 setProperties(matcher, thisString, matcher.start(), matcher.end());
145 return matcher.start();
146 }
147 return -1;
148 }
149
150 if (!found) {
151 return null;
152 }
153 final int index = matcher.start(0);
154 final List<Object> groups = new ArrayList<>();
155 if (reData.isGlobal()) {
156 groups.add(matcher.group(0));
157 setProperties(matcher, thisString, matcher.start(0), matcher.end(0));
158
159 while (matcher.find()) {
160 groups.add(matcher.group(0));
161 setProperties(matcher, thisString, matcher.start(0), matcher.end(0));
162 }
163 }
164 else {
165 for (int i = 0; i <= matcher.groupCount(); i++) {
166 Object group = matcher.group(i);
167 if (group == null) {
168 group = JavaScriptEngine.UNDEFINED;
169 }
170 groups.add(group);
171 }
172
173 setProperties(matcher, thisString, matcher.start(), matcher.end());
174 }
175 final Scriptable response = cx.newArray(scope, groups.toArray());
176
177 response.put("index", response, Integer.valueOf(index));
178 response.put("input", response, thisString);
179 return response;
180 }
181
182 return wrappedAction(cx, scope, thisObj, args, actionType);
183 }
184
185 private String doStringReplacement(final String originalString,
186 final String searchString, final String replacement,
187 final boolean replaceAll) {
188 if (originalString == null) {
189 return "";
190 }
191
192 final StaticStringMatcher matcher = new StaticStringMatcher(originalString, searchString);
193
194 final StringBuilder sb = new StringBuilder();
195 int previousIndex = 0;
196
197 while (matcher.find()) {
198 sb.append(originalString, previousIndex, matcher.start());
199
200 String localReplacement = replacement;
201 if (replacement.contains("$")) {
202 localReplacement = computeReplacementValue(localReplacement, originalString, matcher, false);
203 }
204 sb.append(localReplacement);
205 previousIndex = matcher.end();
206
207 if (!replaceAll) {
208 break;
209 }
210 }
211 sb.append(originalString, previousIndex, originalString.length());
212 return sb.toString();
213 }
214
215 private String doReplacement(final String originalString, final String replacement, final Matcher matcher,
216 final boolean replaceAll) {
217
218 final StringBuilder sb = new StringBuilder();
219 int previousIndex = 0;
220 while (matcher.find()) {
221 sb.append(originalString, previousIndex, matcher.start());
222 String localReplacement = replacement;
223 if (replacement.contains("$")) {
224 localReplacement = computeReplacementValue(replacement, originalString, matcher, false);
225 }
226 sb.append(localReplacement);
227 previousIndex = matcher.end();
228
229 setProperties(matcher, originalString, matcher.start(), previousIndex);
230 if (!replaceAll) {
231 break;
232 }
233 }
234 sb.append(originalString, previousIndex, originalString.length());
235 return sb.toString();
236 }
237
238 String computeReplacementValue(final String replacement, final String originalString,
239 final MatchResult matcher, final boolean group0ReturnsWholeMatch) {
240
241 int lastIndex = 0;
242 final StringBuilder result = new StringBuilder();
243 int i;
244 while ((i = replacement.indexOf('$', lastIndex)) > -1) {
245 if (i > 0) {
246 result.append(replacement, lastIndex, i);
247 }
248 String ss = null;
249 if (i < replacement.length() - 1 && (i == lastIndex || replacement.charAt(i - 1) != '$')) {
250 final char next = replacement.charAt(i + 1);
251
252 if (next >= '1' && next <= '9') {
253 final int num1digit = next - '0';
254 final char next2 = i + 2 < replacement.length() ? replacement.charAt(i + 2) : 'x';
255 final int num2digits;
256
257
258 if (next2 >= '1' && next2 <= '9') {
259 num2digits = num1digit * 10 + (next2 - '0');
260 }
261 else {
262 num2digits = Integer.MAX_VALUE;
263 }
264 if (num2digits <= matcher.groupCount()) {
265 ss = matcher.group(num2digits);
266 i++;
267 }
268 else if (num1digit <= matcher.groupCount()) {
269 ss = StringUtils.defaultString(matcher.group(num1digit));
270 }
271 }
272 else {
273 switch (next) {
274 case '&':
275 ss = matcher.group();
276 break;
277 case '0':
278 if (group0ReturnsWholeMatch) {
279 ss = matcher.group();
280 }
281 break;
282 case '`':
283 ss = originalString.substring(0, matcher.start());
284 break;
285 case '\'':
286 ss = originalString.substring(matcher.end());
287 break;
288 case '$':
289 ss = "$";
290 break;
291 default:
292 }
293 }
294 }
295 if (ss == null) {
296 result.append('$');
297 lastIndex = i + 1;
298 }
299 else {
300 result.append(ss);
301 lastIndex = i + 2;
302 }
303 }
304
305 result.append(replacement, lastIndex, replacement.length());
306
307 return result.toString();
308 }
309
310
311
312
313 private Object wrappedAction(final Context cx, final Scriptable scope, final Scriptable thisObj,
314 final Object[] args, final int actionType) {
315
316
317
318 try {
319 ScriptRuntime.setRegExpProxy(cx, wrapped_);
320 return wrapped_.action(cx, scope, thisObj, args, actionType);
321 }
322 finally {
323 ScriptRuntime.setRegExpProxy(cx, this);
324 }
325 }
326
327 private void setProperties(final Matcher matcher, final String thisString, final int startPos, final int endPos) {
328
329 final String match = matcher.group();
330 if (match == null) {
331 lastMatch = new SubString();
332 }
333 else {
334 lastMatch = new SubString(match, 0, match.length());
335 }
336
337
338 final int groupCount = matcher.groupCount();
339 if (groupCount == 0) {
340 parens = null;
341 }
342 else {
343 final int count = Math.min(9, groupCount);
344 parens = new SubString[count];
345 for (int i = 0; i < count; i++) {
346 final String group = matcher.group(i + 1);
347 if (group == null) {
348 parens[i] = new SubString();
349 }
350 else {
351 parens[i] = new SubString(group, 0, group.length());
352 }
353 }
354 }
355
356
357 if (groupCount > 0) {
358 final String last = matcher.group(groupCount);
359 if (last == null) {
360 lastParen = new SubString();
361 }
362 else {
363 lastParen = new SubString(last, 0, last.length());
364 }
365 }
366
367
368 if (startPos > 0) {
369 leftContext = new SubString(thisString, 0, startPos);
370 }
371 else {
372 leftContext = new SubString();
373 }
374
375
376 final int length = thisString.length();
377 if (endPos < length) {
378 rightContext = new SubString(thisString, endPos, length - endPos);
379 }
380 else {
381 rightContext = new SubString();
382 }
383 }
384
385
386
387
388 @Override
389 public Object compileRegExp(final Context cx, final String source, final String flags) {
390 try {
391 return wrapped_.compileRegExp(cx, source, flags);
392 }
393 catch (final Exception e) {
394 if (LOG.isWarnEnabled()) {
395 LOG.warn("compileRegExp() threw for >" + source + "<, flags: >" + flags + "<. "
396 + "Replacing with a '####shouldNotFindAnything###'");
397 }
398 return wrapped_.compileRegExp(cx, "####shouldNotFindAnything###", "");
399 }
400 }
401
402
403
404
405 @Override
406 public int find_split(final Context cx, final Scriptable scope, final String target,
407 final String separator, final Scriptable re, final int[] ip, final int[] matchlen,
408 final boolean[] matched, final String[][] parensp) {
409 return wrapped_.find_split(cx, scope, target, separator, re, ip, matchlen, matched, parensp);
410 }
411
412
413
414
415 @Override
416 public boolean isRegExp(final Scriptable obj) {
417 return wrapped_.isRegExp(obj);
418 }
419
420
421
422
423 @Override
424 public Scriptable wrapRegExp(final Context cx, final Scriptable scope, final Object compiled) {
425 return wrapped_.wrapRegExp(cx, scope, compiled);
426 }
427
428 private static class RegExpData {
429 private final boolean global_;
430 private Pattern pattern_;
431
432 RegExpData(final NativeRegExp re) {
433 final String str = re.toString();
434 final String jsFlags = StringUtils.substringAfterLast(str, "/");
435
436 if (jsFlags.indexOf('y') != -1) {
437 throw new RegExStickyNotSupportedException(str);
438 }
439
440 global_ = jsFlags.indexOf('g') != -1;
441
442 pattern_ = PATTENS.get(str);
443 if (pattern_ == null) {
444 final String jsSource = StringUtils.substringBeforeLast(str.substring(1), "/");
445 pattern_ = Pattern.compile(jsRegExpToJavaRegExp(jsSource), getJavaFlags(jsFlags));
446 PATTENS.put(str, pattern_);
447 }
448 }
449
450 RegExpData(final String string) {
451 global_ = false;
452
453 pattern_ = PATTENS.get(string);
454 if (pattern_ == null) {
455 pattern_ = Pattern.compile(jsRegExpToJavaRegExp(string), 0);
456 PATTENS.put(string, pattern_);
457 }
458 }
459
460
461
462
463
464 private static int getJavaFlags(final String jsFlags) {
465 int flags = 0;
466 if (jsFlags.contains("i")) {
467 flags |= Pattern.CASE_INSENSITIVE;
468 }
469 if (jsFlags.contains("m")) {
470 flags |= Pattern.MULTILINE;
471 }
472 if (jsFlags.contains("s")) {
473 flags |= Pattern.DOTALL;
474 }
475 return flags;
476 }
477
478 boolean isGlobal() {
479 return global_;
480 }
481
482 Pattern getPattern() {
483 return pattern_;
484 }
485 }
486
487
488
489
490
491
492 static String jsRegExpToJavaRegExp(final String re) {
493 final RegExpJsToJavaConverter regExpJsToJavaFSM = new RegExpJsToJavaConverter();
494 return regExpJsToJavaFSM.convert(re);
495 }
496
497
498
499
500 private static final class StaticStringMatcher implements MatchResult {
501 private final String original_;
502 private final String search_;
503
504 private int start_;
505 private int end_;
506
507 StaticStringMatcher(final String originalString, final String searchString) {
508 original_ = originalString;
509 search_ = searchString;
510
511 start_ = -1;
512 end_ = 0;
513 }
514
515 public boolean find() {
516 if (start_ == end_) {
517 end_++;
518 }
519 if (end_ > original_.length()) {
520 return false;
521 }
522
523 final int pos = original_.indexOf(search_, end_);
524 if (pos != -1) {
525 start_ = pos;
526 end_ = pos + search_.length();
527 return true;
528 }
529 return false;
530 }
531
532 @Override
533 public String group() {
534 return search_;
535 }
536
537 @Override
538 public int start() {
539 return start_;
540 }
541
542 @Override
543 public int end() {
544 return end_;
545 }
546
547 @Override
548 public int start(final int group) {
549 throw new NotYetImplementedException("StaticStringMatcher.start(int)");
550 }
551
552 @Override
553 public int end(final int group) {
554 throw new NotYetImplementedException("StaticStringMatcher.end(int)");
555 }
556
557 @Override
558 public String group(final int group) {
559 throw new NotYetImplementedException("StaticStringMatcher.group(int)");
560 }
561
562 @Override
563 public int groupCount() {
564 return 0;
565 }
566 }
567
568
569 private static class RegExStickyNotSupportedException extends IllegalArgumentException {
570 RegExStickyNotSupportedException(final String regex) {
571 super("RegEx sticky flag is not supported (" + regex + ") by HtmlUnitRegExProxy");
572 }
573 }
574
575 }