kali에서 thug 설치
http://buffer.github.io/thug/doc/index.html
thug 메뉴얼
http://www.makethenmakeinstall.com/2013/03/install-thug-on-kali-linux/
Kali에 thug 설치하는 방법이 여기 사이트에 자동 설치 스크립트가 있다.
그대로 실행해보았다
# Install the dependencies that are available in aptitude
apt-get -y install subversion libboost-dev libboost-python-dev libboost-thread-dev libboost-system-dev python-pip python-dev libbz2-dev libboost-all-dev python-magic autoconf automake dh-autoreconf
# Install libemu. Used for shellcode emulation
cd
git clone git://git.carnivore.it/libemu.git
cd libemu
autoreconf -v -i
./configure --enable-python-bindings --prefix=/opt/libemu
make -j4
make install
ldconfig -n /opt/libemu/lib
# Install pylibemu - used for libemu to talk with python
cd
git clone https://github.com/buffer/pylibemu.git
sh -c "echo /opt/libemu/lib > /etc/ld.so.conf.d/pylibemu.conf"
cd pylibemu
python setup.py build
python setup.py install
# Install some remaining python libraries that are needed
pip install beautifulsoup4 zope.interface pymongo cssutils httplib2 pefile chardet html5lib
# pydot requires pyparsing, but the last version of pyparsing that supports python 2.x is 1.5.7
easy_install pyparsing==1.5.7
pip install pydot
# Change to a working directory and get thug
cd /usr/local/src
mkdir thug
cd thug
git clone https://github.com/buffer/thug.git
# Download, configure and install Google V8
svn checkout http://v8.googlecode.com/svn/trunk/ v8
svn checkout http://pyv8.googlecode.com/svn/trunk/ pyv8
# Patch from thug
cp thug/patches/V8-patch* ./
patch -p0 < V8-patch1.diff
# setup V8 and PyV8
export V8_HOME=/usr/local/src/thug/v8/
cd pyv8/
python setup.py build
python setup.py install
# Make a copy of thug in /opt for use
cd ..
cp -ar ./thug/ /opt/thug
# Make sure python knows where libemu is in this session an permanently
export LD_LIBRARY_PATH=/opt/libemu/lib
echo 'export LD_LIBRARY_PATH=/opt/libemu/lib' >> ~/.bashrc
# Profit!
python /opt/thug/src/thug.py -h
안 된다..ㅠㅠ
root@kali:~/thug/run# python /opt/thug/src/thug.py <url>
thug 설치하기는 정말 너~무 힘든거 같고....(성공 하신분 연락좀)
그냥 Malware Crawler를 소개하겠다.
Python 스크립트 2개
https://github.com/seifreed/mwcrawler/blob/master/mwcrawler.py
#!/usr/bin/python
# Copyright (C) 2012 Ricardo Dias
#
# Malware Crawler Module v0.4
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Requirements:
# - BeautifulSoup 3.0.8
# Original script by Ricardo Dias
# 1) Added more sources
# 2) Fix some errors login etc...
from BeautifulSoup import BeautifulSoup as bs
import sys
import hashlib
import re
import urllib2
import magic
import os
import socket
import datetime
import argparse
import logging
import tempfile
# By default thug analyis is disabled
isthug = False
# variable for date value manipulation
now = datetime.datetime.now()
str(now)
# maximum wait time of http gets
timeout = 15
socket.setdefaulttimeout(timeout)
# load thug function, also checks if thug is installed
def loadthug():
try:
sys.path.append('/opt/thug/src')
import thug
isthug = True
logging.info("Thug module loaded for html analysis")
except ImportError:
logging.warning("No Thug module found, html code inspection won't be available")
# determine file type for correct archival
def gettype(file):
ms = magic.open(magic.MAGIC_NONE)
ms.load()
return ms.buffer(file)
# beautifulsoup parser
def parse(url):
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1)')
try:
http = bs(urllib2.urlopen(request, timeout=60))
except:
logging.error('Error parsing %s',url)
return
return http
def decisor(url):
if not re.match('http',url):
url = 'http://'+url
try:
url_dl = urllib2.urlopen(url).read()
except Exception, e:
logging.error('Could not fetch %s', url)
return
filetype = gettype(url_dl).split(' ')[0]
md5 = hashlib.md5(url_dl).hexdigest()
if (filetype == 'HTML'):
if isthug:
logging.debug('Thug candidate: HTML code in %s', url)
try:
thug.Thug([url])()
except Exception, e:
logging.error('Thug error: %s', e)
return
else:
dest = dumpdir+'/unsorted/'+filetype
fpath = dest+'/'+str(md5)
if not os.path.exists(dest):
os.makedirs(dest)
if not os.path.exists(fpath):
file = open(fpath, 'wb')
file.write(url_dl)
file.close
logging.info("Saved file type %s with md5 %s from URL %s", filetype, md5, url)
else:
logging.debug("Found duplicate of file with md5 %s on URL %s", md5, url)
def malwaredl(soup):
logging.info("Fetching from Malware Domain List")
mdl=[]
for row in soup('description'):
mdl.append(row)
del mdl[0]
mdl_sites=[]
for row in mdl:
site = re.sub('&','&',str(row).split()[1]).replace(',','')
if site == '-':
mdl_sites.append(re.sub('&','&',str(row).split()[4]).replace(',',''))
else:
mdl_sites.append(site)
logging.info('Found %s urls', len(mdl))
for row in mdl_sites:
decisor(row)
def vxvault(soup):
logging.info("Fetching from VXVault")
vxv=[]
for row in soup('pre'):
vxv = row.string.split('\r\n')
del vxv[:4]
del vxv[-1]
logging.info('Found %s urls', len(vxv))
for row in vxv:
decisor(row)
def malc0de(soup):
logging.info("Fetching from Malc0de")
mlc=[]
for row in soup('description'):
mlc.append(row)
del mlc[0]
mlc_sites=[]
for row in mlc:
site = re.sub('&','&',str(row).split()[1]).replace(',','')
mlc_sites.append(site)
logging.info('Found %s urls', len(mlc_sites))
for row in mlc_sites:
decisor(row)
def malwarebl(soup):
logging.info("Fetching from Malware Black List")
mbl=[]
for row in soup('description'):
site = str(row).split()[1].replace(',','')
mbl.append(site)
logging.info('Found %s urls', len(mbl))
for row in mbl:
decisor(row)
def minotaur(soup):
logging.info("Fetching from NovCon Minotaur")
minsites=[]
for row in soup('td'):
try:
if re.match('http',row.string):
minsites.append(row.string)
except:
pass
logging.info('Found %s urls', len(minsites))
for row in minsites:
decisor(row)
def sacour(soup):
logging.info("Fetching from Sacour.cn")
for url in soup('a'):
sacsites=[]
if re.match('list/',url['href']):
suburl = parse('http://www.sacour.cn/'+url['href'])
for text in suburl('body'):
for urls in text.contents:
if re.match('http://',str(urls)):
sacsites.append(str(urls))
if len(sacsites) > 0:
logging.info('Found %s urls in %s', len(sacsites),url['href'])
for row in sacsites:
decisor(row)
#----------------------------------------------------------------------
# Extra
def onlyThug(url):
if not re.match('http',url):
url = 'http://'+url
if isthug:
logging.debug('Thug candidate: HTML code in %s', url)
try:
thug.Thug([url])()
except Exception, e:
logging.error('Thug error: %s', e)
return
def cleanmxparserow(soup, attrClass):
cols=soup.findAll('td', {'class':attrClass})
if len(cols)==0:
return
lastcol=cols[len(cols)-1]
ases=lastcol.findAll('a', href=True)
if len(ases)==0:
return
lasta=ases[len(ases)-1]
return lasta['href']
def cleanMx(soup):
logging.info("Fetching from Clean MX")
table=soup.find('table', {'class':'liste'})
rows=table.findAll('tr')
urls=[]
for row in rows:
url=cleanmxparserow(row, 'zellen01')
if url:
urls.append(url)
url=cleanmxparserow(row, 'zellennormal')
if url:
urls.append(url)
for url in urls:
decisor(url)
def spyEyeTracker(soup):
logging.info("Fetching from SpyEye Tracker")
table=soup.find('table', {'class':'table'})
rows=table.findAll('tr')
urls=[]
for row in rows:
columns=row.findAll('td')
if columns[1].find('a'):
urls.append(columns[1].a.string)
for url in urls:
decisor(url)
def zeusTracker(soup):
logging.info("Fetching from Zeus Tracker")
table=soup.find('table', {'class':'table'})
rows=table.findAll('tr')
urls=[]
for row in rows:
columns=row.findAll('td')
if columns[1].find('a'):
urls.append(columns[1].find('a').string)
for url in urls:
decisor(url)
def mwisRu(soup):
logging.info("Fetching from mwis.ru")
rows=soup.findAll('tr')
urls=[]
for row in rows:
columns=row.findAll('td')
if len(columns)>=3:
if columns[2].find('a'):
urls.append(columns[2].find('a').string)
for url in urls:
# decisor(url)
onlyThug(url)
def threatLog(soup):
logging.info("Fetching from Threat Log")
table=soup.find('table', {'class':'table table-striped table-bordered'})
rows=table.findAll('tr')
urls=[]
for row in rows:
columns=row.findAll('td')
url=''
if len(columns)>=4:
if columns[2].find('b'):
url=columns[2].find('b').string
# if columns[3].string!='-':
# url=url+columns[3].string #los paths acaban en '...'
urls.append(url)
for url in urls:
# decisor(url)
onlyThug(url)
#----------------------------------------------------------------------
if __name__ == "__main__":
print "Malware Crawler v0.4"
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--thug", help="Enable thug analysis", action="store_true")
parser.add_argument("-p", "--proxy", help="Define HTTP proxy as address:port")
parser.add_argument("-d", "--dumpdir", help="Define dump directory for retrieved files")
parser.add_argument("-l", "--logfile", help="Define file for logging progress")
args = parser.parse_args()
try:
if args.thug:
loadthug()
except:
logging.warning("Thug analysis not enabled (use -t to enable thug)")
# proxy support
if args.proxy:
proxy = urllib2.ProxyHandler({'http': args.proxy})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
logging.info('Using proxy %s', args.proxy)
my_ip = urllib2.urlopen('http://whatthehellismyip.com/?ipraw').read()
logging.info('External sites see %s',my_ip)
# dump directory
# http://stackoverflow.com/questions/14574889/verify-directory-write-privileges
if args.dumpdir:
try:
d = tempfile.mkdtemp(dir=args.dumpdir)
dumpdir=args.dumpdir
except Exception as e:
logging.error('Could not open %s for writing (%s), using default', dumpdir, e)
dumpdir = '/tmp/malware/unsorted'
else:
os.rmdir(d)
else:
dumpdir = '/tmp/malware/unsorted'
if args.logfile:
logging.basicConfig(filename=args.logfile, level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
else:
logging.basicConfig(level=logging.DEBUG, format='%(asctime) %message(s)', datefmt='%Y-%m-%d %H:%M:%S')
#source list
try:
minotaur(parse('http://minotauranalysis.com/malwarelist-urls.aspx'))
except:
logging.error('Couldn\'t load Minotaur')
pass
try:
malwaredl(parse('http://www.malwaredomainlist.com/hostslist/mdl.xml'))
except:
logging.error('Couldn\'t load Malware Domain List')
pass
try:
vxvault(parse('http://vxvault.siri-urz.net/URL_List.php'))
except:
logging.error('Couldn\'t load VxVault')
pass
try:
malc0de(parse('http://malc0de.com/rss'))
except:
logging.error('Couldn\'t load Malc0de')
pass
try:
malwarebl(parse('http://www.malwareblacklist.com/mbl.xml'))
except:
logging.error('Couldn\'t load Malware Black List')
pass
try:
sacour(parse('http://www.sacour.cn/showmal.asp?month=%d&year=%d' % (now.month, now.year)))
except:
logging.error('Couldn\'t load Sacour')
pass
try:
cleanMx(parse('http://support.clean-mx.de/clean-mx/viruses'))
except:
logging.error('Couldn\'t load Clean MX')
pass
try:
spyEyeTracker(parse('https://spyeyetracker.abuse.ch/monitor.php?browse=binaries'))
except:
logging.error('Couldn\'t load SpyEyeTracker')
pass
try:
zeusTracker(parse('https://zeustracker.abuse.ch/monitor.php?browse=binaries'))
except:
logging.error('Couldn\'t load ZeusTracker')
pass
#Solo thug
try:
mwisRu(parse('http://www.mwis.ru/'))
except:
logging.error('Couldn\'t load mwis.ru')
pass
try:
threatLog(parse('http://www.threatlog.com/'))
except:
logging.error('Couldn\'t load Threat Log')
pass
https://github.com/0day1day/mwcrawler/blob/master/mwcrawler.py
#!/usr/bin/python
# Copyright (C) 2012 Ricardo Dias
#
# Malware Crawler Module v0.4
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Requirements:
# - BeautifulSoup 3.0.8
from BeautifulSoup import BeautifulSoup as bs
import sys
import hashlib
import re
import urllib2
import magic
import os
import socket
import datetime
# By default thug analyis is disabled
isthug = False
# variable for date value manipulation
now = datetime.datetime.now()
str(now)
# maximum wait time of http gets
timeout = 15
socket.setdefaulttimeout(timeout)
# load thug function, also checks if thug is installed
def loadthug():
try:
sys.path.append('/opt/thug/src')
import thug
isthug = True
print "- Thug module loaded for html analysis"
except ImportError:
print "- No Thug module found, html code inspection won't be available"
# determine file type for correct archival
def gettype(file):
ms = magic.open(magic.MAGIC_NONE)
ms.load()
return ms.buffer(file)
# beautifulsoup parser
def parse(url):
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1)')
try:
http = bs(urllib2.urlopen(request))
except:
print "- Error parsing %s" % (url)
return
return http
def decisor(url):
if not re.match('http',url):
url = 'http://'+url
try:
url_dl = urllib2.urlopen(url).read()
except Exception, e:
#print "-- Error: %s" % e
return
filetype = gettype(url_dl).split(' ')[0]
md5 = hashlib.md5(url_dl).hexdigest()
if (filetype == 'HTML'):
if isthug:
print "-- Thug candidate: HTML code in %s" % url
try:
thug.Thug([url])()
except Exception, e:
print "- Thug error: %s" % e
return
else:
dest = '/opt/malware/unsorted/'+filetype
fpath = dest+'/'+str(md5)
if not os.path.exists(dest):
os.makedirs(dest)
if not os.path.exists(fpath):
file = open(fpath, 'wb')
file.write(url_dl)
file.close
print "-- Saved file type %s with md5: %s" % (filetype,md5)
def malwaredl(soup):
print "- Fetching from Malware Domain List"
mdl=[]
for row in soup('description'):
mdl.append(row)
del mdl[0]
mdl_sites=[]
for row in mdl:
site = re.sub('&','&',str(row).split()[1]).replace(',','')
if site == '-':
mdl_sites.append(re.sub('&','&',str(row).split()[4]).replace(',',''))
else:
mdl_sites.append(site)
print "-- Found %s urls" % len(mdl)
for row in mdl_sites:
decisor(row)
def vxvault(soup):
print "- Fetching from VXVault"
vxv=[]
for row in soup('pre'):
vxv = row.string.split('\r\n')
del vxv[:4]
del vxv[-1]
print "-- Found %s urls" % len(vxv)
for row in vxv:
decisor(row)
def malc0de(soup):
print "- Fetching from Malc0de"
mlc=[]
for row in soup('description'):
mlc.append(row)
del mlc[0]
mlc_sites=[]
for row in mlc:
site = re.sub('&','&',str(row).split()[1]).replace(',','')
mlc_sites.append(site)
print "-- Found %s urls" % len(mlc_sites)
for row in mlc_sites:
decisor(row)
def malwarebl(soup):
print "- Fetching from Malware Black List"
mbl=[]
for row in soup('description'):
site = str(row).split()[1].replace(',','')
mbl.append(site)
print "-- Found %s urls" % len(mbl)
for row in mbl:
decisor(row)
def minotaur(soup):
print "- Fetching from NovCon Minotaur"
min=[]
for row in soup('td'):
try:
if re.match('http',row.string):
min.append(row.string)
except:
pass
print "-- Found %s urls" % len(min)
for row in min:
decisor(row)
def sacour(soup):
print "- Fetching from Sacour.cn"
for url in soup('a'):
min=[]
if re.match('list/',url['href']):
suburl = parse('http://www.sacour.cn/'+url['href'])
for text in suburl('body'):
for urls in text.contents:
if re.match('http://',str(urls)):
min.append(str(urls))
if len(min) > 0:
print "-- Found %s urls in %s" % (len(min),url['href'])
for row in min:
decisor(row)
if __name__ == "__main__":
print "Malware Parser v0.4"
try:
if sys.argv[1] == '-t':
loadthug()
except:
print "- Thug analysis not enabled (use -t to enable thug)"
#source list
minotaur(parse('http://minotauranalysis.com/malwarelist-urls.aspx'))
malwaredl(parse('http://www.malwaredomainlist.com/hostslist/mdl.xml'))
vxvault(parse('http://vxvault.siri-urz.net/URL_List.php'))
malc0de(parse('http://malc0de.com/rss'))
malwarebl(parse('http://www.malwareblacklist.com/mbl.xml'))
sacour(parse('http://www.sacour.cn/showmal.asp?month=%d&year=%d' % (now.month, now.year)))
악성코드를 수집할 수 있는 Python 스크립트를 2개를 구했는데
둘다 잘 작동 된다
위의 스크립트는 /tmp/malware 경로에 저장되고
아래 스크립트는 /opt/malware에 저장된다.
코드를 해석해보면 악성코드 분석 사이트에서 악성코드 샘플들을 다운받는식이다
악성코드 사이트가 점검중이거나 하면 에러 발생할 수도 있는데 코드를 좀 수정하면 된다.
하드를 추가하여 /temp에 마운트 시키고 거기에 따로 보관하도록 하고 스크립트 실행해봤당
(SSD 용량 압박 ㄷㄷ)
몇일간 수집으로 28000개정도 모왔다
하나하나 분석하기엔 너무 많구낭 ㅠㅠ
ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ
(지금은 31000개 넘어서 그만 모음)
파일명은 MD5로 저장됨..
내 입장에선 전부 악성코드라고 보기엔 좀 그렇다.
크랙이나 키젠, 그래고 패킹된 파일같은 파일들도 악성으로 올라오기 때문이다!!
(나쁜건 맞긴하다..ㅠㅠ)
이것저것 악성코드분석좋아하시는분들은 여기서 받아서 분석해보시면 좋을듯..합니다..
저처럼 너무 많이 받지는 마시구염...ㅠㅠ 하드 터져염...