加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 百科 > 正文

动态正则匹配

发布时间:2020-12-14 00:43:44 所属栏目:百科 来源:网络整理
导读:需求: 1、写一个动态正则; 2、只要写出日志的Schma就可以获取到日志的正则。 package com.donews.utilimport java.util.regex.Patternimport scala.collection.mutable.ArrayBuffer/** * Created by yuhui on 2016 / 8 / 5 . *//***列子: www.donews.com

需求:

1、写一个动态正则;

2、只要写出日志的Schma就可以获取到日志的正则。

package com.donews.util

import java.util.regex.Pattern

import scala.collection.mutable.ArrayBuffer

/**
  * Created by yuhui on 2016/8/5.
  */

/***
列子:       www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" China 22 Beijing
第一版本    "$domain $ip - $remote_user [$timestamp] "$http_url" $status $body_bytes_sent "$http_referer" "$http_user_agent" $country $region $city"

例子 :      www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" "-" "China" "22" "Beijing"
第二版本    "$domain $ip - $remote_user [$timestamp] "$http_url" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$e_ip" "$country" "$region" "$city""

例子 :     www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" "http://www.donews.com/media/201408/2834414.shtm" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" "-" "China" "22" "Beijing"
第三版本    $domain $http_x_forwarded_for - $remote_user [$timestamp] "$http_url" "$url" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$e_ip" "$country" "$region" "$city"
 */


object DynamicRegex{

  var cmd = ""

  var regex =""

  def tran(cmd: String): String = {
    val sb = new StringBuffer()
    sb.append("^")
    val regex = "^(W+)$"
    val p = Pattern.compile(regex)
    cmd.split(" ").foreach(key =>
      if (!p.matcher(key).find()) {
        key.substring(0,key.indexOf("$"))
        match {
          case "" =>
            if (key.split("$").length > 2) {
              var split = ""
              val regex = "($w+)(W+)($w+)(.*)"
              val p = Pattern.compile(regex)
              val m = p.matcher(key)
              while (m.find()) {
                split = m.group(2)
              }
              sb.append("(")
              for (i <- Range(0,key.split("$").length - 1,1)) {
                if (i < key.split("$").length - 2) {
                  sb.append("[S]+[" + split + "]")
                } else {
                  sb.append("[S]+")
                }
              }
              sb.append(")s")
            } else {
              sb.append("([S]+)s")
            }
          case _ =>
            val regex = "(W+)($w+)(W+)"
            val p = Pattern.compile(regex)
            val m = p.matcher(key)
            if (m.find) {
              val pre = m.group(1)
              val end = m.group(3)
              sb.append("(" + escape(pre) + ".+" + escape(end) + ")s")
            }
        }
      }else{
        sb.append("(W+)s")
      }
    )
    val str = sb.toString
    str.substring(0,str.length - 2).concat("$")
  }

  def escape(original: String): String = {
    val tb = new StringBuffer()
    for (i <- Range(0,original.length(),1)) {
      if (""".equals(original.charAt(i).toString)) {
      } else {
        tb.append("")
      }
      tb.append(original.charAt(i))
    }
    tb.toString
  }

  def lineToGroup(line: String): ArrayBuffer[String] = {
    val groups = ArrayBuffer[String]()
    val p = Pattern.compile(regex)
    val m = p.matcher(line)
    while (m.find()) {
      for (i <- Range(1,m.groupCount() + 1,1)) {
        groups.append(m.group(i))
      }
    }
    groups
  }

  def main(args: Array[String]): Unit = {

    cmd = "$domain $http_x_forwarded_for - $remote_user [$timestamp] "$http_url" "$url" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$e_ip" "$country" "$region" "$city""
    regex=tran(cmd)
    println(regex)
    val log = "www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" "http://www.donews.com/media/201408/2834414.shtm" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" "-" "China" "22" "Beijing""
    lineToGroup(log).foreach(x=>println(x))

  }
}

输出结果:

^([S]+)s([S]+)s(W+)s([S]+)s([.+])s(".+")s(".+")s([S]+)s([S]+)s(".+")s(".+")s(".+")s(".+")s(".+")s(".+")$ www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" "http://www.donews.com/media/201408/2834414.shtm" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" "-" "China" "22" "Beijing"

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读