Sunday, November 14, 2021

Split keep function. Lookbehind, match, loop

Common code:
1
2
3
function escapeRegExp(text) {
    return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
}
Lookbehind:
1
2
3
4
String.prototype.splitKeep = function (tokens) {
    const escaped = escapeRegExp(tokens);
    return this.split(new RegExp(`(?=[${escaped}])|(?<=[${escaped}])`, "g"));
};
Match. Safari does not support lookbehind, use match approach:
1
2
3
4
5
6
7
8
9
String.prototype.splitKeep = function (tokens) {
    const tokensEscaped = tokens
        .split("")
        .map((s) => escapeRegExp(s))
        .join("|");
 
    const wordMatch = `[^${escapeRegExp(tokens)}]+`;
    return this.match(new RegExp(tokensEscaped + "|" + wordMatch, "g"));
};
Loop-based approach:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
String.prototype.splitKeep = function (tokens) {
    let toPush = "";
    const splitList = [];
    for (const c of this) {
        if (tokens.includes(c)) {
            if (toPush.length > 0) {
                splitList.push(toPush);
            }
            splitList.push(c);
            toPush = "";
            continue;
        }
 
        toPush += c;
    }
 
    if (toPush.length > 0) {
        splitList.push(toPush);
    }
 
    return splitList;
};
Benchmark code:
1
2
3
4
5
console.time("test");
for (let i = 0; i < 10000; ++i) {
    "pin1yin1".splitKeep("12345 ");
}
console.timeEnd("test");
Results:
% node splitkeep-lookbehind.js
test: 19.35ms
% node splitkeep-lookbehind.js
test: 18.951ms
% node splitkeep-match.js       
test: 54.635ms
% node splitkeep-match.js
test: 51.998ms
% node splitkeep-loop.js 
test: 14.647ms
% node splitkeep-loop.js
test: 13.035ms

No comments:

Post a Comment