Poogle

by Vaporice on July 4th, 2009
No notes
Syntax: Python
Show lines - Hide lines - Show in textbox - Download
from socket import *
import sys
import os
 
USERAGENT = "Poogle (X11; U; Linux i686; en; rv:1.9.0.7) Poogle"
ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
ACCEPT_LANGUAGE = "en-us,en;q=0.5"
ACCEPT_ENCODING = "gzip,deflate"
ACCEPT_CHARSET = "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
 
default_header = {"Host":"images.google.com","User-Agent":USERAGENT, "Accept":ACCEPT, "Accept-Language":ACCEPT_LANGUAGE, "Keep-Alive":"300","Connection":"keep-alive"}
 
 
def MakeHeader(objects,start="GET / HTTP/1.1"):
	header = start+"\r\n"
	for x in objects.items():
		header+=x[0]+": "+x[1]+"\r\n"
	header += "\r\n"
	return header
 
def StripHeader(message):
	message = message[0:message.find("\r\n\r\n")]
	stripmessage = ""
	for x in message:
		if x != "\r":
			stripmessage += x
	lines = stripmessage.splitlines()
	headerlines = lines[1:len(lines)]
	command = lines[0]
	obj = {}
	for x in headerlines:
		colon_place = x.find(":")
		if colon_place > -1:
			obj[x[0:colon_place]] = x[colon_place+2:len(x)]
	return obj
def StripBody(message):
	header_end = message.find("\r\n\r\n")
	body = ""
	if len(message) > header_end+5:
		body = message[header_end+4:len(message)]
	return body,header_end
 
class Google:
	def __init__(self):
		self.google = socket(AF_INET,SOCK_STREAM)
		self.google.connect(("images.google.com",80))
		self.google.send(MakeHeader(default_header))
		self.debug = 0
		self.mode = "images"
	def send(self,message):
		self.google.send(message)
	def GetPage(self,page):
		self.google.send(MakeHeader(default_header, "GET /%s HTTP/1.1"%page))
	def Search(self,keyword,page):
		self.GetPage("images?gbv=2&hl=en&q=%s&sa=N&start=%s&ndsp=18&safe=%s"%(keyword,str(page*18),self.mode))
		return self.recv_response()
	def Debug(self,message): 
		if self.debug == 1:
			print message
	def SetDebug(self,mode):
		self.debug = mode
	def SetSafeSearch(self,mode):
		self.mode = mode
	def recv_response(self,buffer=2048):
		response = self.google.recv(buffer)
		header = StripHeader(response)
		body,header_end = StripBody(response)
		if "Set-Cookie" in header:
			default_header["Cookie"] = header["Set-Cookie"]
			if self.mode == "off":
				if default_header["Cookie"].find("FF=4") < 0:
					default_header["Cookie"] += ";FF=4"
		if "Location" in header:
			location = header["Location"]
			default_header["Host"] = location[location.find("www"):len(location)-1]
		if "Transfer-Encoding" in header:
			while len(body)==0:
				body = self.google.recv(buffer)
			if body.splitlines()[-2] != "0":
				rbody = body
				chunklength = 1
				times = 0
				body = ""
				if header["Transfer-Encoding"] == "chunked":
					while 1:
						exec("length=0x%s"%rbody.splitlines()[0])
						if length == 0:
							break
						bodychunk = rbody[5:len(rbody)]
						if bodychunk[len(bodychunk)-2:len(bodychunk)] == "\r\n":
							body+= bodychunk[0:len(bodychunk)-2]
						else:
							body += bodychunk
						missing = length-len(bodychunk)+2
						while missing > 0:
							rrbody = self.google.recv(missing)
							missing -= len(rrbody)
							body += rrbody
						if body.splitlines()[-2] == "0":
							if body[len(body)-2:len(body)] == "\r\n":
								body = body[0:len(body)-2]
							break
						elif body.splitlines()[-1] == "0":
							break
						rbody = self.google.recv(buffer)
				return body
 
def GetUrlList(google,keyword,safesearch):
	google.SetSafeSearch(safesearch)
	lastpage = "noneyet"
	urllist = []
        for x in range(60):
                search = google.Search(search_term,x)
                if search == lastpage:
                        break
                looptimes = 0
                while 1:
                        begin = search.find("imgurl=")
                        end = search.find("&imgrefurl")
                        if begin < 0 or end < 0:
                                break
                        if not(search[begin+7:end] in urllist):
                                urllist.append(search[begin+7:end])
                        search = search[end+11:len(search)]
                        looptimes += 1
                if looptimes == 0:
                        break
	return urllist
 
def Urllist2File(urllist,start=0,end=1000):
	file = open('site.html',"wb")
        inrow = 0
        for x in urllist[start:end]:
                file.write('<a href="%s"><img src="%s" width="240" height="160"></a>'%(x,x))
                inrow += 1
                if inrow == 10:
                        file.write("<br>")
	file.close()
 
def wgeturllist(urllist,start=0,end=1000):
	try:
		os.system("mkdir pictures")
	except:
		None
	file = open("pictures/images.txt","wb")
        for x in urllist[start:end]:
                file.write(x+"\n")
        file.close()
        os.system("cd pictures && wget -i images.txt --timeout=2 --tries=1")
 
if __name__ == "__main__":
	google = Google()		
	google.recv_response()
	google.SetDebug(1)
	urllist = []
	searcht = raw_input("Search: ")
	search_term =""
	for x in searcht:
		if x == " ":
			search_term+="\%20"
		else:
			search_term+=x
	urllistimages = GetUrlList(google,search_term,"images")
	urllistoff = GetUrlList(google,search_term,"off")
	urllist = []
	for x in urllistoff:
		if x in urllistimages:
			None
		else:
			urllist.append(x)	
	print "1 - Output search to file (don't download pictures)"
	print "2 - Download all Pictures to a folder called pictures"
	choice = input("> ")
	start = 0
	end = 1000
	range = raw_input("Do you want to specify a range?(y/n)")
	if range == "y":
		start = input("Start: ")
		end = input("End: ")
	if choice == 1:
		Urllist2File(urllist,start,end)
	elif choice == 2:
		wgeturllist(urllist,start,end)

Leave a Reply

Note: XHTML is allowed. Your email address will never be published.

Subscribe to this comment feed via RSS