1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import json
import os
import sys
import urllib2

reload(sys)
sys.setdefaultencoding('utf-8')

class exportEsData():
def __init__(self, url, siteid, startdate, enddate, scroll):
self.url = '%s/_search' % (url)
self.siteid = siteid
self.startdate = startdate
self.enddate = enddate
self.scroll = scroll
self.result = ""

def exportData(self, scrollID):
#esdata = urllib2.urlopen("http://www.baidu.com/").read()
opener = urllib2.build_opener()
headers = {'User-Agent':'Mozilla /5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6' }
if scrollID == "":
print("Exporting site%s..." % self.siteid)
queryJson = { \
"size": 1000, \
"query": { "filtered": {"filter": {"bool": {"must": {"bool": {"must": [ \
{"query": {"match": {"b": {"query": self.siteid,"type": "phrase"}}}}, \
{"range":{"c":{"gte": self.startdate + " 00:00:00","lte":self.enddate + " 23:59:59"}}} \
]}}}}} \
} \
}
url2 = '%s?scroll=%s' % (self.url, self.scroll)
else:
queryJson = { "scroll" : self.scroll, "scroll_id" : scrollID }
url2 = self.url + "/scroll"
req = urllib2.Request(url2, data=json.dumps(queryJson), headers=headers)
response = opener.open(req)
esdata = response.read()
self.processData(esdata)

def processData(self, data):
#msg = json.dumps(data, ensure_ascii=False)
msg = json.loads(data)
#print(type(data))
#print(msg['hits']['hits'][2]['_source']['f8'])
scrollID = msg["_scroll_id"]
attacks = msg['hits']['hits']
for attack in attacks:
self.result = '%s%s\n' % (self.result, attack['_source'])
if len(attacks) > 0:
self.exportData(scrollID)
else:
self.writeFile(self.result)

def writeFile(self, data):
try:
filename = 'AttackData_%s.txt' % (self.siteid)
f = open(filename, "w+")
f.write(data)
print("site%s successfully exported" % self.siteid)
finally:
f.flush()
f.close()

if __name__ == '__main__':
siteids = [1912, 1918]
for siteid in siteids:
exportEsData("http://127.0.0.1:9201", siteid, "2017-07-03", "2017-12-01", "5m").exportData("")

os.system("pause")