악성코드 수집 및 분석 도구

보안/시스템해킹

악성코드 수집 및 분석 도구

2015. 4. 13. 20:59

kali에서 thug 설치

http://buffer.github.io/thug/doc/index.html

thug 메뉴얼

http://www.makethenmakeinstall.com/2013/03/install-thug-on-kali-linux/

Kali에 thug 설치하는 방법이 여기 사이트에 자동 설치 스크립트가 있다.

그대로 실행해보았다

# Install the dependencies that are available in aptitude

apt-get -y install subversion libboost-dev libboost-python-dev libboost-thread-dev libboost-system-dev python-pip python-dev libbz2-dev libboost-all-dev python-magic autoconf automake dh-autoreconf

# Install libemu. Used for shellcode emulation

git clone git://git.carnivore.it/libemu.git

cd libemu

autoreconf -v -i

./configure --enable-python-bindings --prefix=/opt/libemu

make -j4

make install

ldconfig -n /opt/libemu/lib

# Install pylibemu - used for libemu to talk with python

git clone https://github.com/buffer/pylibemu.git

sh -c "echo /opt/libemu/lib > /etc/ld.so.conf.d/pylibemu.conf"

cd pylibemu

python setup.py build

python setup.py install

# Install some remaining python libraries that are needed

pip install beautifulsoup4 zope.interface pymongo cssutils httplib2 pefile chardet html5lib

# pydot requires pyparsing, but the last version of pyparsing that supports python 2.x is 1.5.7

easy_install pyparsing==1.5.7

pip install pydot

# Change to a working directory and get thug

cd /usr/local/src

mkdir thug

cd thug

git clone https://github.com/buffer/thug.git

# Download, configure and install Google V8

svn checkout http://v8.googlecode.com/svn/trunk/ v8

svn checkout http://pyv8.googlecode.com/svn/trunk/ pyv8

# Patch from thug

cp thug/patches/V8-patch* ./

patch -p0 < V8-patch1.diff

# setup V8 and PyV8

export V8_HOME=/usr/local/src/thug/v8/

cd pyv8/

python setup.py build

python setup.py install

# Make a copy of thug in /opt for use

cd ..

cp -ar ./thug/ /opt/thug

# Make sure python knows where libemu is in this session an permanently

export LD_LIBRARY_PATH=/opt/libemu/lib

echo 'export LD_LIBRARY_PATH=/opt/libemu/lib' >> ~/.bashrc

# Profit!

python /opt/thug/src/thug.py -h

안 된다..ㅠㅠ

root@kali:~/thug/run# python /opt/thug/src/thug.py <url>

thug 설치하기는 정말 너~무 힘든거 같고....(성공 하신분 연락좀)

그냥 Malware Crawler를 소개하겠다.

Python 스크립트 2개

https://github.com/seifreed/mwcrawler/blob/master/mwcrawler.py

#!/usr/bin/python

# Malware Crawler Module v0.4

# This program is free software: you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation, either version 3 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program. If not, see <http://www.gnu.org/licenses/>.

# Requirements:

# - BeautifulSoup 3.0.8

# Original script by Ricardo Dias

# 1) Added more sources

# 2) Fix some errors login etc...

from BeautifulSoup import BeautifulSoup as bs

import sys

import hashlib

import re

import urllib2

import magic

import os

import socket

import datetime

import argparse

import logging

import tempfile

# By default thug analyis is disabled

isthug = False

# variable for date value manipulation

now = datetime.datetime.now()

str(now)

# maximum wait time of http gets

timeout = 15

socket.setdefaulttimeout(timeout)

# load thug function, also checks if thug is installed

def loadthug():

try:

sys.path.append('/opt/thug/src')

import thug

isthug = True

logging.info("Thug module loaded for html analysis")

except ImportError:

logging.warning("No Thug module found, html code inspection won't be available")

# determine file type for correct archival

def gettype(file):

ms = magic.open(magic.MAGIC_NONE)

ms.load()

return ms.buffer(file)

# beautifulsoup parser

def parse(url):

request = urllib2.Request(url)

request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1)')

try:

http = bs(urllib2.urlopen(request, timeout=60))

except:

logging.error('Error parsing %s',url)

return

return http

def decisor(url):

if not re.match('http',url):

url = 'http://'+url

try:

url_dl = urllib2.urlopen(url).read()

except Exception, e:

logging.error('Could not fetch %s', url)

return

filetype = gettype(url_dl).split(' ')[0]

md5 = hashlib.md5(url_dl).hexdigest()

if (filetype == 'HTML'):

if isthug:

logging.debug('Thug candidate: HTML code in %s', url)

try:

thug.Thug([url])()

except Exception, e:

logging.error('Thug error: %s', e)

return

else:

dest = dumpdir+'/unsorted/'+filetype

fpath = dest+'/'+str(md5)

if not os.path.exists(dest):

os.makedirs(dest)

if not os.path.exists(fpath):

file = open(fpath, 'wb')

file.write(url_dl)

file.close

logging.info("Saved file type %s with md5 %s from URL %s", filetype, md5, url)

else:

logging.debug("Found duplicate of file with md5 %s on URL %s", md5, url)

def malwaredl(soup):

logging.info("Fetching from Malware Domain List")

mdl=[]

for row in soup('description'):

mdl.append(row)

del mdl[0]

mdl_sites=[]

for row in mdl:

site = re.sub('&','&',str(row).split()[1]).replace(',','')

if site == '-':

mdl_sites.append(re.sub('&','&',str(row).split()[4]).replace(',',''))

else:

mdl_sites.append(site)

logging.info('Found %s urls', len(mdl))

for row in mdl_sites:

decisor(row)

def vxvault(soup):

logging.info("Fetching from VXVault")

vxv=[]

for row in soup('pre'):

vxv = row.string.split('\r\n')

del vxv[:4]

del vxv[-1]

logging.info('Found %s urls', len(vxv))

for row in vxv:

decisor(row)

def malc0de(soup):

logging.info("Fetching from Malc0de")

mlc=[]

for row in soup('description'):

mlc.append(row)

del mlc[0]

mlc_sites=[]

for row in mlc:

site = re.sub('&','&',str(row).split()[1]).replace(',','')

mlc_sites.append(site)

logging.info('Found %s urls', len(mlc_sites))

for row in mlc_sites:

decisor(row)

def malwarebl(soup):

logging.info("Fetching from Malware Black List")

mbl=[]

for row in soup('description'):

site = str(row).split()[1].replace(',','')

mbl.append(site)

logging.info('Found %s urls', len(mbl))

for row in mbl:

decisor(row)

def minotaur(soup):

logging.info("Fetching from NovCon Minotaur")

minsites=[]

for row in soup('td'):

try:

if re.match('http',row.string):

minsites.append(row.string)

except:

pass

logging.info('Found %s urls', len(minsites))

for row in minsites:

decisor(row)

def sacour(soup):

logging.info("Fetching from Sacour.cn")

for url in soup('a'):

sacsites=[]

if re.match('list/',url['href']):

suburl = parse('http://www.sacour.cn/'+url['href'])

for text in suburl('body'):

for urls in text.contents:

if re.match('http://',str(urls)):

sacsites.append(str(urls))

if len(sacsites) > 0:

logging.info('Found %s urls in %s', len(sacsites),url['href'])

for row in sacsites:

decisor(row)

#----------------------------------------------------------------------

# Extra

def onlyThug(url):

if not re.match('http',url):

url = 'http://'+url

if isthug:

logging.debug('Thug candidate: HTML code in %s', url)

try:

thug.Thug([url])()

except Exception, e:

logging.error('Thug error: %s', e)

return

def cleanmxparserow(soup, attrClass):

cols=soup.findAll('td', {'class':attrClass})

if len(cols)==0:

return

lastcol=cols[len(cols)-1]

ases=lastcol.findAll('a', href=True)

if len(ases)==0:

return

lasta=ases[len(ases)-1]

return lasta['href']

def cleanMx(soup):

logging.info("Fetching from Clean MX")

table=soup.find('table', {'class':'liste'})

rows=table.findAll('tr')

urls=[]

for row in rows:

url=cleanmxparserow(row, 'zellen01')

if url:

urls.append(url)

url=cleanmxparserow(row, 'zellennormal')

if url:

urls.append(url)

for url in urls:

decisor(url)

def spyEyeTracker(soup):

logging.info("Fetching from SpyEye Tracker")

table=soup.find('table', {'class':'table'})

rows=table.findAll('tr')

urls=[]

for row in rows:

columns=row.findAll('td')

if columns[1].find('a'):

urls.append(columns[1].a.string)

for url in urls:

decisor(url)

def zeusTracker(soup):

logging.info("Fetching from Zeus Tracker")

table=soup.find('table', {'class':'table'})

rows=table.findAll('tr')

urls=[]

for row in rows:

columns=row.findAll('td')

if columns[1].find('a'):

urls.append(columns[1].find('a').string)

for url in urls:

decisor(url)

def mwisRu(soup):

logging.info("Fetching from mwis.ru")

rows=soup.findAll('tr')

urls=[]

for row in rows:

columns=row.findAll('td')

if len(columns)>=3:

if columns[2].find('a'):

urls.append(columns[2].find('a').string)

for url in urls:

# decisor(url)

onlyThug(url)

def threatLog(soup):

logging.info("Fetching from Threat Log")

table=soup.find('table', {'class':'table table-striped table-bordered'})

rows=table.findAll('tr')

urls=[]

for row in rows:

columns=row.findAll('td')

url=''

if len(columns)>=4:

if columns[2].find('b'):

url=columns[2].find('b').string

# if columns[3].string!='-':

# url=url+columns[3].string #los paths acaban en '...'

urls.append(url)

for url in urls:

# decisor(url)

onlyThug(url)

#----------------------------------------------------------------------

if __name__ == "__main__":

print "Malware Crawler v0.4"

logging.basicConfig(level=logging.INFO)

parser = argparse.ArgumentParser()

parser.add_argument("-t", "--thug", help="Enable thug analysis", action="store_true")

parser.add_argument("-p", "--proxy", help="Define HTTP proxy as address:port")

parser.add_argument("-d", "--dumpdir", help="Define dump directory for retrieved files")

parser.add_argument("-l", "--logfile", help="Define file for logging progress")

args = parser.parse_args()

try:

if args.thug:

loadthug()

except:

logging.warning("Thug analysis not enabled (use -t to enable thug)")

# proxy support

if args.proxy:

proxy = urllib2.ProxyHandler({'http': args.proxy})

opener = urllib2.build_opener(proxy)

urllib2.install_opener(opener)

logging.info('Using proxy %s', args.proxy)

my_ip = urllib2.urlopen('http://whatthehellismyip.com/?ipraw').read()

logging.info('External sites see %s',my_ip)

# dump directory

# http://stackoverflow.com/questions/14574889/verify-directory-write-privileges

if args.dumpdir:

try:

d = tempfile.mkdtemp(dir=args.dumpdir)

dumpdir=args.dumpdir

except Exception as e:

logging.error('Could not open %s for writing (%s), using default', dumpdir, e)

dumpdir = '/tmp/malware/unsorted'

else:

os.rmdir(d)

else:

dumpdir = '/tmp/malware/unsorted'

if args.logfile:

logging.basicConfig(filename=args.logfile, level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

else:

logging.basicConfig(level=logging.DEBUG, format='%(asctime) %message(s)', datefmt='%Y-%m-%d %H:%M:%S')

#source list

try:

minotaur(parse('http://minotauranalysis.com/malwarelist-urls.aspx'))

except:

logging.error('Couldn\'t load Minotaur')

pass

try:

malwaredl(parse('http://www.malwaredomainlist.com/hostslist/mdl.xml'))

except:

logging.error('Couldn\'t load Malware Domain List')

pass

try:

vxvault(parse('http://vxvault.siri-urz.net/URL_List.php'))

except:

logging.error('Couldn\'t load VxVault')

pass

try:

malc0de(parse('http://malc0de.com/rss'))

except:

logging.error('Couldn\'t load Malc0de')

pass

try:

malwarebl(parse('http://www.malwareblacklist.com/mbl.xml'))

except:

logging.error('Couldn\'t load Malware Black List')

pass

try:

sacour(parse('http://www.sacour.cn/showmal.asp?month=%d&year=%d' % (now.month, now.year)))

except:

logging.error('Couldn\'t load Sacour')

pass

try:

cleanMx(parse('http://support.clean-mx.de/clean-mx/viruses'))

except:

logging.error('Couldn\'t load Clean MX')

pass

try:

spyEyeTracker(parse('https://spyeyetracker.abuse.ch/monitor.php?browse=binaries'))

except:

logging.error('Couldn\'t load SpyEyeTracker')

pass

try:

zeusTracker(parse('https://zeustracker.abuse.ch/monitor.php?browse=binaries'))

except:

logging.error('Couldn\'t load ZeusTracker')

pass

#Solo thug

try:

mwisRu(parse('http://www.mwis.ru/'))

except:

logging.error('Couldn\'t load mwis.ru')

pass

try:

threatLog(parse('http://www.threatlog.com/'))

except:

logging.error('Couldn\'t load Threat Log')

pass

https://github.com/0day1day/mwcrawler/blob/master/mwcrawler.py

#!/usr/bin/python

# Malware Crawler Module v0.4

# This program is free software: you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation, either version 3 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program. If not, see <http://www.gnu.org/licenses/>.

# Requirements:

# - BeautifulSoup 3.0.8

from BeautifulSoup import BeautifulSoup as bs

import sys

import hashlib

import re

import urllib2

import magic

import os

import socket

import datetime

# By default thug analyis is disabled

isthug = False

# variable for date value manipulation

now = datetime.datetime.now()

str(now)

# maximum wait time of http gets

timeout = 15

socket.setdefaulttimeout(timeout)

# load thug function, also checks if thug is installed

def loadthug():

try:

sys.path.append('/opt/thug/src')

import thug

isthug = True

print "- Thug module loaded for html analysis"

except ImportError:

print "- No Thug module found, html code inspection won't be available"

# determine file type for correct archival

def gettype(file):

ms = magic.open(magic.MAGIC_NONE)

ms.load()

return ms.buffer(file)

# beautifulsoup parser

def parse(url):

request = urllib2.Request(url)

request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1)')

try:

http = bs(urllib2.urlopen(request))

except:

print "- Error parsing %s" % (url)

return

return http

def decisor(url):

if not re.match('http',url):

url = 'http://'+url

try:

url_dl = urllib2.urlopen(url).read()

except Exception, e:

#print "-- Error: %s" % e

return

filetype = gettype(url_dl).split(' ')[0]

md5 = hashlib.md5(url_dl).hexdigest()

if (filetype == 'HTML'):

if isthug:

print "-- Thug candidate: HTML code in %s" % url

try:

thug.Thug([url])()

except Exception, e:

print "- Thug error: %s" % e

return

else:

dest = '/opt/malware/unsorted/'+filetype

fpath = dest+'/'+str(md5)

if not os.path.exists(dest):

os.makedirs(dest)

if not os.path.exists(fpath):

file = open(fpath, 'wb')

file.write(url_dl)

file.close

print "-- Saved file type %s with md5: %s" % (filetype,md5)

def malwaredl(soup):

print "- Fetching from Malware Domain List"

mdl=[]

for row in soup('description'):

mdl.append(row)

del mdl[0]

mdl_sites=[]

for row in mdl:

site = re.sub('&','&',str(row).split()[1]).replace(',','')

if site == '-':

mdl_sites.append(re.sub('&','&',str(row).split()[4]).replace(',',''))

else:

mdl_sites.append(site)

print "-- Found %s urls" % len(mdl)

for row in mdl_sites:

decisor(row)

def vxvault(soup):

print "- Fetching from VXVault"

vxv=[]

for row in soup('pre'):

vxv = row.string.split('\r\n')

del vxv[:4]

del vxv[-1]

print "-- Found %s urls" % len(vxv)

for row in vxv:

decisor(row)

def malc0de(soup):

print "- Fetching from Malc0de"

mlc=[]

for row in soup('description'):

mlc.append(row)

del mlc[0]

mlc_sites=[]

for row in mlc:

site = re.sub('&','&',str(row).split()[1]).replace(',','')

mlc_sites.append(site)

print "-- Found %s urls" % len(mlc_sites)

for row in mlc_sites:

decisor(row)

def malwarebl(soup):

print "- Fetching from Malware Black List"

mbl=[]

for row in soup('description'):

site = str(row).split()[1].replace(',','')

mbl.append(site)

print "-- Found %s urls" % len(mbl)

for row in mbl:

decisor(row)

def minotaur(soup):

print "- Fetching from NovCon Minotaur"

min=[]

for row in soup('td'):

try:

if re.match('http',row.string):

min.append(row.string)

except:

pass

print "-- Found %s urls" % len(min)

for row in min:

decisor(row)

def sacour(soup):

print "- Fetching from Sacour.cn"

for url in soup('a'):

min=[]

if re.match('list/',url['href']):

suburl = parse('http://www.sacour.cn/'+url['href'])

for text in suburl('body'):

for urls in text.contents:

if re.match('http://',str(urls)):

min.append(str(urls))

if len(min) > 0:

print "-- Found %s urls in %s" % (len(min),url['href'])

for row in min:

decisor(row)

if __name__ == "__main__":

print "Malware Parser v0.4"

try:

if sys.argv[1] == '-t':

loadthug()

except:

print "- Thug analysis not enabled (use -t to enable thug)"

#source list

minotaur(parse('http://minotauranalysis.com/malwarelist-urls.aspx'))

malwaredl(parse('http://www.malwaredomainlist.com/hostslist/mdl.xml'))

vxvault(parse('http://vxvault.siri-urz.net/URL_List.php'))

malc0de(parse('http://malc0de.com/rss'))

malwarebl(parse('http://www.malwareblacklist.com/mbl.xml'))

sacour(parse('http://www.sacour.cn/showmal.asp?month=%d&year=%d' % (now.month, now.year)))

악성코드를 수집할 수 있는 Python 스크립트를 2개를 구했는데

둘다 잘 작동 된다

위의 스크립트는 /tmp/malware 경로에 저장되고

아래 스크립트는 /opt/malware에 저장된다.

코드를 해석해보면 악성코드 분석 사이트에서 악성코드 샘플들을 다운받는식이다

악성코드 사이트가 점검중이거나 하면 에러 발생할 수도 있는데 코드를 좀 수정하면 된다.

하드를 추가하여 /temp에 마운트 시키고 거기에 따로 보관하도록 하고 스크립트 실행해봤당

(SSD 용량 압박 ㄷㄷ)

몇일간 수집으로 28000개정도 모왔다

하나하나 분석하기엔 너무 많구낭 ㅠㅠ

ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ

(지금은 31000개 넘어서 그만 모음)

파일명은 MD5로 저장됨..

내 입장에선 전부 악성코드라고 보기엔 좀 그렇다.

크랙이나 키젠, 그래고 패킹된 파일같은 파일들도 악성으로 올라오기 때문이다!!

(나쁜건 맞긴하다..ㅠㅠ)

이것저것 악성코드분석좋아하시는분들은 여기서 받아서 분석해보시면 좋을듯..합니다..

저처럼 너무 많이 받지는 마시구염...ㅠㅠ 하드 터져염...

statistics

total :
today :
yesterday :

보안/시스템해킹

악성코드 수집 및 분석 도구

category

statistics

티스토리툴바