Add custom field on augmented Apache log parsing with Grok in Logstash - apache

here is my problem : Let's say I have some standard Apache logs, like so :
IP1 IP2 - - [13/Jun/2016:14:45:05 +0200] "GET /page/requested.html HTTP/1.1" 200 4860 "-" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"
I can sucessfully parse these logs with my actual configuration of Logstash :
input {
file {
path => '/home/user/logsDir/*'
}
}
filter {
grok {
match => { "message" => "%{COMBINEDAPACHELOG}"}
}
}
output {
elasticsearch { }
stdout { codec => rubydebug }
}
But on these logs, I apply some machine learning algorithm and I give them a score. So the new log line looks like that :
IP1 IP2 - - [13/Jun/2016:14:45:05 +0200] "GET /page/requested.html HTTP/1.1" 200 4860 "-" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0" 0.00950628507703
Note the 0.00950628507703 at the end of the line, which is the actual score
Now, I would like to parse this line so I could use score for visualisation in Kibana (Logstash is integeated in the whole ELK stack ). So it would be great if the score could be parse as a float.
NB: I can place the score before or after the standard Apache log message and insert any kind of characters between the two (currently it is just a space).
Any idea on how to tackle this problem ?
Thanks in advance !

Eventually I found how to process. I add a little keyword before the score : the word pred
So my lines are know like this :
IP1 IP2 - - [13/Jun/2016:14:45:05 +0200] "GET /page/requested.html HTTP/1.1" 200 4860 "-" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0" pred:0.00950628507703
And I use this configuration for logstash :
input {
file {
path => '/home/user/logsDir/*'
start_position => "beginning"
}
}
filter {
grok {
match => { "message" => "%{COMBINEDAPACHELOG} pred:%{NUMBER:prediction_score}"}
}
# I convert the score into a float in order to vizualise it in Kibana
mutate {
convert => {"prediction_score" => "float"}
}
}
output {
elasticsearch { }
stdout { codec => rubydebug }
}
I hope this will help you if you are stuck with the same problem !
Cheers !

Related

logstash parsing old logs adds timestamp and #timestamp as date now

hello all i am trying to parse an old apache log the output has a correct timestamp but also a #timestamp field, the #timestamp is the now date time, how can i make sure that the timestamp becomes the #timestamp for kibana/elasticsearch.
example input:
172.31.21.26 - - [20/Jul/2017:22:1``0:52 +0200] "GET /mobile/getParent/NzE4MzU1ZmUtNmIwOC00N2JkLTk1YmYtNmNhZTUyZmVmNGYz HTTP/1.1" 200 452 "-" "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 (4301339520)"
conf file:
input {
file {
path=>"/home/ronald/Downloads/log/httpd/short.log"
start_position=>"beginning"
}
}
filter {
grok {
match => { "message" => "%{COMBINEDAPACHELOG}" }
}
}
output {
elasticsearch{
hosts=>"localhost"
index=>"roha_test"
document_type=>"demo1"
}
stdout{
codec => "rubydebug"
}
}
output:
"request" =>"/mobile/getParent/NzE4MzU1ZmUtNmIwOC00N2JkLTk1YmYtNmNhZTUyZmVmNGYz",
"agent" => "\"Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 (4301339520)\"",
"auth" => "-",
"ident" => "-",
"verb" => "GET",
"message" => "172.31.21.26 - - [20/Jul/2017:22:10:52 +0200] \"GET /mobile/getParent/NzE4MzU1ZmUtNmIwOC00N2JkLTk1YmYtNmNhZTUyZmVmNGYz HTTP/1.1\" 200 452 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 (4301339520)\"",
"path" => "/home/ronald/Downloads/log/httpd/short.log",
"referrer" => "\"-\"",
"#timestamp" => 2017-10-06T08:49:10.440Z,
"response" => "200",
"bytes" => "452",
"clientip" => "172.31.21.26",
"#version" => "1",
"host" => "ronald-XPS-13-9343",
"httpversion" => "1.1",
"timestamp" => "20/Jul/2017:22:10:52 +0200"
logstash version 5.6.1
You'll have to add a date filter which converts the timestamp field to a parsed datetime object which elasticsearch understands. Something like:
date {
match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
}

rabbitmq / logstash lost message

I have a rabbitmq which stores message successfully but my logstash reading the queue ignore most of my messages.
RabbitMQ is OK, I have a small python script to display all messages
import pika
i=0
def on_message(channel, method_frame, header_frame, body):
global i
print i
print("Message body", body)
channel.basic_ack(delivery_tag=method_frame.delivery_tag)
i+=1
credentials = pika.PlainCredentials('***', '***')
parameters = pika.ConnectionParameters('***',5672,'logstash', credentials=credentials)
connection = pika.BlockingConnection(parameters)
channel = connection.channel()
channel.exchange_declare(exchange="logstash", exchange_type="topic", passive=False, durable=True, auto_delete=False)
channel.queue_declare(queue="hbbtv", auto_delete=False, durable=True)
channel.queue_bind(queue="hbbtv", exchange="logstash", routing_key="hbbtv")
channel.basic_qos(prefetch_count=1)
channel.basic_consume(on_message, 'hbbtv')
try:
channel.start_consuming()
except KeyboardInterrupt:
channel.stop_consuming()
connection.close()
I can see all my messages
12 ('Message body', '{"message":"212.95.70.118 - -
[25/Feb/2016:11:19:53 +0100] \"GET
/services/web/index.php/OPA/categories/ARTEPLUS7/fr HTTP/1.1\" 200
348
\"http://www.arte.tv/hbbtvv2/notv/cehtml/index.cehtml?lang=de_DE&page=PLUS7&tv=false\"
\"Opera/9.80 (Linux armv7l; HbbTV/1.1.1 (; Philips; ; ; PhilipsTV; )
CE-HTML/1.0 NETTV/4.3.1 PhilipsTV/2.1.1 Firmware/003.015.000.001
(PhilipsTV, 2.1.1,) en) Presto/2.12.362 Version/12.11 \"
hbbtvdyn.arte.tv","#version":"1","#timestamp":"2016-02-25T10:19:53.000Z","path":"/data/logs/access","host":"arte-hbbtvdyn-web1.sdv.fr","type":"apache-access","application":"hbbtv","clientip":"212.95.70.118","ident":"-","auth":"-","timestamp":"25/Feb/2016:11:19:53 +0100","verb":"GET","request":"/services/web/index.php/OPA/categories/ARTEPLUS7/fr","httpversion":"1.1","response":"200","bytes":"348","referrer":"\"http://www.arte.tv/hbbtvv2/notv/cehtml/index.cehtml?lang=de_DE&page=PLUS7&tv=false\"","agent":"\"Opera/9.80
(Linux armv7l; HbbTV/1.1.1 (; Philips; ; ; PhilipsTV; ) CE-HTML/1.0
NETTV/4.3.1 PhilipsTV/2.1.1 Firmware/003.015.000.001 (PhilipsTV,
2.1.1,) en) Presto/2.12.362 Version/12.11 \"","targethost":"hbbtvdyn.arte.tv","geoip":{"ip":"212.95.70.118","country_code2":"FR","country_code3":"FRA","country_name":"France","continent_code":"EU","region_name":"C1","city_name":"Strasbourg","latitude":48.60040000000001,"longitude":7.787399999999991,"timezone":"Europe/Paris","real_region_name":"Alsace","location":[7.787399999999991,48.60040000000001]}}')
13 ('Message body', '{"message":"212.95.70.118 - -
[25/Feb/2016:11:19:53 +0100] \"GET
/services/web/index.php/OPA/videos/highlights/6/ARTEPLUS7/de/GE
HTTP/1.1\" 500 4519
\"http://www.arte.tv/hbbtvv2/notv/cehtml/index.cehtml?lang=de_DE&page=PLUS7&tv=false\"
\"Opera/9.80 (Linux armv7l; HbbTV/1.1.1 (; Philips; ; ; PhilipsTV; )
CE-HTML/1.0 NETTV/4.3.1 PhilipsTV/2.1.1 Firmware/003.015.000.001
(PhilipsTV, 2.1.1,) en) Presto/2.12.362 Version/12.11 \"
hbbtvdyn.arte.tv","#version":"1","#timestamp":"2016-02-25T10:19:53.000Z","path":"/data/logs/access","host":"arte-hbbtvdyn-web1.sdv.fr","type":"apache-access","application":"hbbtv","clientip":"212.95.70.118","ident":"-","auth":"-","timestamp":"25/Feb/2016:11:19:53 +0100","verb":"GET","request":"/services/web/index.php/OPA/videos/highlights/6/ARTEPLUS7/de/GE","httpversion":"1.1","response":"500","bytes":"4519","referrer":"\"http://www.arte.tv/hbbtvv2/notv/cehtml/index.cehtml?lang=de_DE&page=PLUS7&tv=false\"","agent":"\"Opera/9.80
(Linux armv7l; HbbTV/1.1.1 (; Philips; ; ; PhilipsTV; ) CE-HTML/1.0
NETTV/4.3.1 PhilipsTV/2.1.1 Firmware/003.015.000.001 (PhilipsTV,
2.1.1,) en) Presto/2.12.362 Version/12.11 \"","targethost":"hbbtvdyn.arte.tv","geoip":{"ip":"212.95.70.118","country_code2":"FR","country_code3":"FRA","country_name":"France","continent_code":"EU","region_name":"C1","city_name":"Strasbourg","latitude":48.60040000000001,"longitude":7.787399999999991,"timezone":"Europe/Paris","real_region_name":"Alsace","location":[7.787399999999991,48.60040000000001]}}')
14 ('Message body', '{"message":"212.95.70.119 - -
[25/Feb/2016:11:19:53 +0100] \"GET
/OPA/getOPAData.php?url=videoStreams%3Flanguage%3Dfr%26protocol%3DHTTP%26mediaType%3Dmp4%26quality%3DEQ%2CSQ%2CHQ%26profileAmm%3D%24nin%3AAMM-YTFR-HAB%2CAMM-YTFR%2CAMM-DT%26kind%3DSHOW%26availableScreens%3Dtv%26fields%3DprogramId%2Curl%2Cquality%2CaudioSlot%2CaudioCode%2CaudioLabel%2CaudioShortLabel%2Cchannel%26programId%3D048353-033-A%26platform%3DARTEPLUS7&filename=PLUS7_stream_048353-033-A_fr_FR.json
HTTP/1.1\" 200 5508 \"-\" \"Mozilla/5.0 (Linux; Tizen 2.3;
SmartHub; SMART-TV; SmartTV; U; Maple2012) AppleWebKit/538.1+ (KHTML,
like Gecko) TV Safari/538.1+ \"
hbbtvdyn.arte.tv","#version":"1","#timestamp":"2016-02-25T10:19:53.000Z","path":"/data/logs/access","host":"arte-hbbtvdyn-web1.sdv.fr","type":"apache-access","application":"hbbtv","clientip":"212.95.70.119","ident":"-","auth":"-","timestamp":"25/Feb/2016:11:19:53 +0100","verb":"GET","request":"/OPA/getOPAData.php?url=videoStreams%3Flanguage%3Dfr%26protocol%3DHTTP%26mediaType%3Dmp4%26quality%3DEQ%2CSQ%2CHQ%26profileAmm%3D%24nin%3AAMM-YTFR-HAB%2CAMM-YTFR%2CAMM-DT%26kind%3DSHOW%26availableScreens%3Dtv%26fields%3DprogramId%2Curl%2Cquality%2CaudioSlot%2CaudioCode%2CaudioLabel%2CaudioShortLabel%2Cchannel%26programId%3D048353-033-A%26platform%3DARTEPLUS7&filename=PLUS7_stream_048353-033-A_fr_FR.json","httpversion":"1.1","response":"200","bytes":"5508","referrer":"\"-\"","agent":"\"Mozilla/5.0
(Linux; Tizen 2.3; SmartHub; SMART-TV; SmartTV; U; Maple2012)
AppleWebKit/538.1+ (KHTML, like Gecko) TV Safari/538.1+
\"","targethost":"hbbtvdyn.arte.tv","geoip":{"ip":"212.95.70.119","country_code2":"FR","country_code3":"FRA","country_name":"France","continent_code":"EU","region_name":"C1","city_name":"Strasbourg","latitude":48.60040000000001,"longitude":7.787399999999991,"timezone":"Europe/Paris","real_region_name":"Alsace","location":[7.787399999999991,48.60040000000001]}}')
with the good rate message (several per seconds) and I have absolutely not grok parse failure.
So the issue happen while logstash read message. Problems are
a lot of message missing
all message have _grokparsefailure even if
it's complete
The input part of logstash is
rabbitmq {
host=>"arte-elasticlog.sdv.fr"
user=>"***"
password=>"***"
queue=>"hbbtv"
vhost=>"logstash"
port=>5672
auto_delete=>false
durable=>true
type => "rabbit_hbbtv"
}
_grokparsefailure indicates that it is unable to parse the message. meaning the message successfully read from the queue, but your grok expression has problem or could not be applied on your message content.
One more thing is the default codec of the rabbitmq input is "json", if your rabbitmq message content is not json, you should set the codec of your input to for instance:
codec => plain {}
The problem was due to my logstash filter,
I had two apache access but with different pattern so when logstash tried to parsee a message, sometimes he had the good pattern --> in ES and sometimes not --> no message.
Now, for all my different log, I add (add-field)
application-->"my application name"
in my input and all my grok filter depends on the application.
All is good now, thanks for your help.

How to properly parse JSONP callback function in Meteor?

Does someone know how to parse JSONP callback in Meteor server methods?
I do
let response = HTTP.call('GET', AVIASALES_API_ENDPOINTS.getLocationFromIP, {
params: {
locale: 'en',
callback: 'useriata',
ip: clientIP
}
});
in response.content I’ve got
useriata({"iata":"MSQ","name":"Minsk","country_name":"Belarus"})
How to properly parse it?
It could help to know what you really try to accomplish? But here is a working example meteor actually doesn't do anything unusual with the requests.
Meteor.startup(function () {
var result = HTTP.call("GET", "https://api.github.com/legacy/repos/search/meteor", {
params: {},
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"
}
});
console.log(result.data); // it's js object you can do result.data.repositories[0].name
console.log(JSON.stringify(result.data)); // json string
console.log(JSON.parse(JSON.stringify(result.data))) // if for some reason you need to parse it this way will work, but seems unnecessary
});
Update: The string you got back from the response wasn't valid JSON so you couldn't parse it used some regex to remove the invalid strings here is working example: http://meteorpad.com/pad/JCy5WkFsrtciG9PR5/Copy%20of%20Leaderboard

nginx reverse prroxy gives 404s on everything

This project is generally served with apache but I want to introduce nginx as a front controller to proxy requests through to memcached or fall back to apache if the URI is not found as a key in memcached.
What is happening when I make the request through nginx is I get 404s on every asset. I can paste a single asset URL from a request right in the URL bar and retrieve it, but with a 404 status. The 404s cause most of the page not to render but it seems the assets are being downloaded.
I can make the same request straight through apache and it works perfectly.
Here is my nginx config:
upstream memcached-upstream {
server 127.0.0.1:11211;
}
upstream apache-upstream {
server 127.0.0.1:5678;
}
server {
listen 4567;
root /vagrant;
server_name sc;
index index.php;
access_log /var/log/nginx/www.sc.com.access.log;
error_log /var/log/nginx/www.sc.com.error.log error;
location / {
# Only use this method for GET requests.
if ($request_method != GET ) {
proxy_pass http://apache-upstream;
break;
}
# Attempt to fetch from memcache. Instead of 404ing, use the #fallback internal location
set $memcached_key $request_uri;
memcached_pass memcached-upstream; # Use an upstream { } block for memcached resiliency
default_type application/json; # Our services only speak JSON
error_page 404 = #fallback;
}
location #fallback {
proxy_pass http://apache-upstream;
}
}
here is a sample from my nginx access log:
10.0.2.2 - - [18/Dec/2013:23:50:08 +0000] "GET /templates/assets/js/csrf.js HTTP/1.1" 404 545 "http://localhost:4567/templates/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36"
And the same request from the apache log:
www.sc.com:80 127.0.0.1 - - [18/Dec/2013:23:50:08 +0000] "GET /templates/assets/js/csrf.js HTTP/1.0" 200 857 "http://localhost:4567/templates/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36"
Any help would be much appreciated.
try replacing the error_page with this
error_page 404 =200 #fallback;

In winston for Node.js is there a way to suppress log level from message?

I'm using winston to stream log messages from Express based on various comments elsewhere, my setup is essentially:
var express = require("express"),
winston = require("winston");
// enable web server logging; pipe those log messages through winston
var requestLogger = new (winston.Logger)(
{
transports: [
new (winston.transports.File)(
{
filename: "logs/request.log",
json: false,
timestamp: false
}
)
]
}
),
winstonStream = {
write: function(message, encoding) {
requestLogger.info(message.replace(/(\r?\n)$/, ''));
}
};
this.use(express.logger({stream: winstonStream}));
But I'd like a way to suppress the output of the log level because I know for this particular logger it will always be "info". So rather than:
info: 127.0.0.1 - - [Fri, 20 Sep 2013 13:48:02 GMT] "POST /v1/submission HTTP/1.1" 200 261 "http://localhost:8887/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.65 Safari/537.36"
I would get:
127.0.0.1 - - [Fri, 20 Sep 2013 13:48:02 GMT] "POST /v1/submission HTTP/1.1" 200 261 "http://localhost:8887/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.65 Safari/537.36"