logstash同步文本数据到es

发布时间 2023-10-26 15:20:37作者: slnngk

环境:
OS:Centos 7
ES:6.8.5
logstash:6.8.5

1.准备测试文件

[root@localhost myfile]# pwd
/tmp/myfile
[root@localhost myfile]# more a.log 
111@3334@3333
vvv@aaa@gggg
utri@rtkjr@trtr
4354@5454@654
rerr@78@ffg

 

2.logstash配置文件

[root@localhost config]# more sync_file_to_es.conf 
input {
    file {
        path=> [ "/tmp/myfile/*.log" ]
        start_position => beginning
        stat_interval => 1 #设置多长时间检测文件是否修改 默认是1s
        discover_interval => 15
    }
}

output {
     elasticsearch {
       hosts => ["http://192.168.1.109:19200"]
       user => "elastic"
       password => "elastic123"
       action => "index"
       index => "file_%{+YYYYMMdd}" ##按照时间格式创建index
       ##document_type => "_doc" ##可以不指定,6.8默认是doc
     }
}

 

3.启动

[root@localhost config]# /opt/logstash-6.8.5/bin/logstash -f /opt/logstash-6.8.5/config/sync_file_to_es.conf

启动日志有这么一段输出

[2023-10-26T03:08:15,132][INFO ][logstash.inputs.file     ] No sincedb_path set, generating one based on the "path" setting {:sincedb_path=>"/opt/logstash-6.8.5/data/plugins/inputs/file/.sincedb_f019a9f5e77dadb5d6981e37ca0b16f6", :path=>["/tmp/myfile/*.log"]}
[2023-10-26T03:08:15,230][INFO ][logstash.pipeline        ] Pipeline started successfully {:pipeline_id=>"main", :thread=>"#<Thread:0x40beb660 run>"}
[2023-10-26T03:08:15,517][INFO ][logstash.agent           ] Pipelines running {:count=>1, :running_pipelines=>[:main], :non_running_pipelines=>[]}
[2023-10-26T03:08:15,577][INFO ][filewatch.observingtail  ] START, creating Discoverer, Watch with file and sincedb collections
[2023-10-26T03:08:16,810][INFO ][logstash.agent           ] Successfully started Logstash API endpoint {:port=>9600}

 

 No sincedb_path set, generating one based on the "path" setting {:sincedb_path=>"/opt/logstash-6.8.5/data/plugins/inputs/file/.sincedb_f019a9f5e77dadb5d6981e37ca0b16f6", :path=>["/tmp/myfile/*.log"]}

 

若想删除之前的索引重新同步的话需要将sincedb文件删除掉,否则之前同步过的文件数据不会同步了。

 

4.查看同步的数据

 

[root@localhost ~]# curl -u elastic:elastic123 -H "Content-Type: application/json" -XGET '192.168.1.109:19200/file_20231026/_search?pretty' -d '
> {
> "query": { "match_all": {} }
> }'
{
  "took" : 5,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 5,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "file_20231026",
        "_type" : "doc",
        "_id" : "iJDQaosBHfZiQDCXAfRs",
        "_score" : 1.0,
        "_source" : {
          "@version" : "1",
          "@timestamp" : "2023-10-26T07:08:17.322Z",
          "host" : "localhost.localdomain",
          "message" : "111@3334@3333",
          "path" : "/tmp/myfile/a.log"
        }
      },
      {
        "_index" : "file_20231026",
        "_type" : "doc",
        "_id" : "i5DQaosBHfZiQDCXAfRs",
        "_score" : 1.0,
        "_source" : {
          "@version" : "1",
          "@timestamp" : "2023-10-26T07:08:17.408Z",
          "host" : "localhost.localdomain",
          "message" : "4354@5454@654",
          "path" : "/tmp/myfile/a.log"
        }
      },
      {
        "_index" : "file_20231026",
        "_type" : "doc",
        "_id" : "iZDQaosBHfZiQDCXAfRs",
        "_score" : 1.0,
        "_source" : {
          "@version" : "1",
          "@timestamp" : "2023-10-26T07:08:17.394Z",
          "host" : "localhost.localdomain",
          "message" : "vvv@aaa@gggg",
          "path" : "/tmp/myfile/a.log"
        }
      },
      {
        "_index" : "file_20231026",
        "_type" : "doc",
        "_id" : "ipDQaosBHfZiQDCXAfRs",
        "_score" : 1.0,
        "_source" : {
          "@version" : "1",
          "@timestamp" : "2023-10-26T07:08:17.407Z",
          "host" : "localhost.localdomain",
          "message" : "utri@rtkjr@trtr",
          "path" : "/tmp/myfile/a.log"
        }
      },
      {
        "_index" : "file_20231026",
        "_type" : "doc",
        "_id" : "jJDQaosBHfZiQDCXAfRs",
        "_score" : 1.0,
        "_source" : {
          "@version" : "1",
          "@timestamp" : "2023-10-26T07:08:17.409Z",
          "host" : "localhost.localdomain",
          "message" : "rerr@78@ffg",
          "path" : "/tmp/myfile/a.log"
        }
      }
    ]
  }
}

 

可以看到es不会对文本内容进行分割,它是一整行进行存储到message字段的。