25.ES9 正则扩展

Posted on 2023-01-16 Edited on 2023-03-13 In ECMAScript

1.ES9 正则扩展-命名捕获分组

(1) 传统方式

// 声明一个字符串
let str = '<p><a href="http://www.google.com">Google</a></p>';
// 定义正则表达式提取 url 和 标签文本
const reg = /<a href="(.*)">(.*)<\/a>/;
// 执行
const result = reg.exec(str);

console.log(result);
groups 属性为 undefined
// (3) ['<a href="http://www.google.com">Google</a>', 'http://www.google.com', 'Google', index: 3, input: '<p><a href="http://www.google.com">Google</a></p>', groups: undefined]
console.log(result[1]);
// http://www.google.com
console.log(result[2]);
// Google

(2) 分组命名

// 声明一个字符串
let str = '<p><a href="http://www.google.com">Google</a></p>';
// 分组命名
const reg = /<a href="(?<url>.*)">(?<text>.*)<\/a>/;
// 执行
const result = reg.exec(str);

console.log(result);
// groups 属性有值
// (3) ['<a href="http://www.google.com">Google</a>', 'http://www.google.com', 'Google', index: 3, input: '<p><a href="http://www.google.com">Google</a></p>', groups: {…}]
//     0: "<a href=\"http://www.google.com\">Google</a>"
//     1: "http://www.google.com"
//     2: "Google"
//     groups: {url: 'http://www.google.com', text: 'Google'}
//     index: 3
//     input: "<p><a href=\"http://www.google.com\">Google</a></p>"
//     length: 3
//     [[Prototype]]: Array(0)
console.log(result.groups.url);
// http://www.google.com
console.log(result.groups.text);
// Google

2.ES9 正则扩展-反向断言

(1) 正向断言

// 声明字符串
let str = 'HelloWorld123456test123abc';
// 正向断言
const reg = /\d+(?=test)/;
const result = reg.exec(str);
console.log(result);
// ['123456', index: 10, input: 'HelloWorld123456test123abc', groups: undefined]

(2) 反向断言

// 声明字符串
let str = 'HelloWorld123456test123abc';
// 反向断言
const reg = /(?<=HelloWorld)\d+/;
const result = reg.exec(str);
console.log(result);
// ['123456', index: 10, input: 'HelloWorld123456test123abc', groups: undefined]

3.ES9 正则扩展-dotAll模式

1
2
3

*: 匹配前面的子表达式零次或多次
+: 匹配前面的子表达式一次或多次
?: 匹配前面的子表达式零次或一次

(1) 传统方式

// dot(.)元字符: 匹配除换行符（\n、\r）之外的任何单个字符
let str = `
    <ul>
        <li>
            <a>MySQL</a>
            <p>2020-05-01</p>
        </li>
        <li>
            <a>Redis</a>
            <p>2020-06-10</p>
        </li>
    </ul>
`;

// 声明正则表达式，\s: 空白字符，包括换行符、空格、制表符、换页符等
const reg = /<li>\s+<a>(.*?)<\/a>\s+<p>(.*?)<\/p>/;
const result = reg.exec(str);
console.log(result);
// 0: "<li>\n                    <a>MySQL</a>\n                    <p>2020-05-01</p>"
// 1: "MySQL"
// 2: "2020-05-01"
// groups: undefined
// index: 34
// input: "\n            <ul>\n                <li>\n                    <a>MySQL</a>\n                    <p>2020-05-01</p>\n                </li>\n                <li>\n                    <a>Redis</a>\n                    <p>2020-06-10</p>\n                </li>\n            </ul>\n        "
// length: 3
// [[Prototype]]: Array(0)
console.log(result[1]);
// MySQL
console.log(result[2]);
// 2020-05-01

(2) 元字符 doc(.) 匹配任意字符

let str = `
    <ul>
        <li>
            <a>MySQL</a>
            <p>2020-05-01</p>
        </li>
        <li>
            <a>Redis</a>
            <p>2020-06-10</p>
        </li>
    </ul>
`;

// 在正则表达式末尾添加 s，元字符 doc(.) 就可以匹配任意字符了
const reg = /<li>.*?<a>(.*?)<\/a>.*?<p>(.*?)<\/p>/s;
const result = reg.exec(str);
console.log(result);
// 0: "<li>\n                    <a>MySQL</a>\n                    <p>2020-05-01</p>"
// 1: "MySQL"
// 2: "2020-05-01"
// groups: undefined
// index: 34
// input: "\n            <ul>\n                <li>\n                    <a>MySQL</a>\n                    <p>2020-05-01</p>\n                </li>\n                <li>\n                    <a>Redis</a>\n                    <p>2020-06-10</p>\n                </li>\n            </ul>\n        "
// length: 3
// [[Prototype]]: Array(0)
console.log(result[1]);
// MySQL
console.log(result[2]);
// 2020-05-01

(3) 元字符 doc(.) 全局匹配任意字符

let str = `
    <ul>
        <li>
            <a>MySQL</a>
            <p>2020-05-01</p>
        </li>
        <li>
            <a>Redis</a>
            <p>2020-06-10</p>
        </li>
    </ul>
`;

// 在正则表达式末尾添加 g，全局匹配，查找所有的匹配项
const reg = /<li>.*?<a>(.*?)<\/a>.*?<p>(.*?)<\/p>/gs;
let data = [];
let result;
while (result = reg.exec(str)) {
    data.push({ title: result[1], time: result[2] });
}
console.log(data);
// 0: {title: 'MySQL', time: '2020-05-01'}
// 1: {title: 'Redis', time: '2020-06-10'}
// length: 2
// [[Prototype]]: Array(0)