Poogle
No notes
Syntax:
Python
from socket import * import sys import os USERAGENT = "Poogle (X11; U; Linux i686; en; rv:1.9.0.7) Poogle" ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" ACCEPT_LANGUAGE = "en-us,en;q=0.5" ACCEPT_ENCODING = "gzip,deflate" ACCEPT_CHARSET = "ISO-8859-1,utf-8;q=0.7,*;q=0.7" default_header = {"Host":"images.google.com","User-Agent":USERAGENT, "Accept":ACCEPT, "Accept-Language":ACCEPT_LANGUAGE, "Keep-Alive":"300","Connection":"keep-alive"} def MakeHeader(objects,start="GET / HTTP/1.1"): header = start+"\r\n" for x in objects.items(): header+=x[0]+": "+x[1]+"\r\n" header += "\r\n" return header def StripHeader(message): message = message[0:message.find("\r\n\r\n")] stripmessage = "" for x in message: if x != "\r": stripmessage += x lines = stripmessage.splitlines() headerlines = lines[1:len(lines)] command = lines[0] obj = {} for x in headerlines: colon_place = x.find(":") if colon_place > -1: obj[x[0:colon_place]] = x[colon_place+2:len(x)] return obj def StripBody(message): header_end = message.find("\r\n\r\n") body = "" if len(message) > header_end+5: body = message[header_end+4:len(message)] return body,header_end class Google: def __init__(self): self.google = socket(AF_INET,SOCK_STREAM) self.google.connect(("images.google.com",80)) self.google.send(MakeHeader(default_header)) self.debug = 0 self.mode = "images" def send(self,message): self.google.send(message) def GetPage(self,page): self.google.send(MakeHeader(default_header, "GET /%s HTTP/1.1"%page)) def Search(self,keyword,page): self.GetPage("images?gbv=2&hl=en&q=%s&sa=N&start=%s&ndsp=18&safe=%s"%(keyword,str(page*18),self.mode)) return self.recv_response() def Debug(self,message): if self.debug == 1: print message def SetDebug(self,mode): self.debug = mode def SetSafeSearch(self,mode): self.mode = mode def recv_response(self,buffer=2048): response = self.google.recv(buffer) header = StripHeader(response) body,header_end = StripBody(response) if "Set-Cookie" in header: default_header["Cookie"] = header["Set-Cookie"] if self.mode == "off": if default_header["Cookie"].find("FF=4") < 0: default_header["Cookie"] += ";FF=4" if "Location" in header: location = header["Location"] default_header["Host"] = location[location.find("www"):len(location)-1] if "Transfer-Encoding" in header: while len(body)==0: body = self.google.recv(buffer) if body.splitlines()[-2] != "0": rbody = body chunklength = 1 times = 0 body = "" if header["Transfer-Encoding"] == "chunked": while 1: exec("length=0x%s"%rbody.splitlines()[0]) if length == 0: break bodychunk = rbody[5:len(rbody)] if bodychunk[len(bodychunk)-2:len(bodychunk)] == "\r\n": body+= bodychunk[0:len(bodychunk)-2] else: body += bodychunk missing = length-len(bodychunk)+2 while missing > 0: rrbody = self.google.recv(missing) missing -= len(rrbody) body += rrbody if body.splitlines()[-2] == "0": if body[len(body)-2:len(body)] == "\r\n": body = body[0:len(body)-2] break elif body.splitlines()[-1] == "0": break rbody = self.google.recv(buffer) return body def GetUrlList(google,keyword,safesearch): google.SetSafeSearch(safesearch) lastpage = "noneyet" urllist = [] for x in range(60): search = google.Search(search_term,x) if search == lastpage: break looptimes = 0 while 1: begin = search.find("imgurl=") end = search.find("&imgrefurl") if begin < 0 or end < 0: break if not(search[begin+7:end] in urllist): urllist.append(search[begin+7:end]) search = search[end+11:len(search)] looptimes += 1 if looptimes == 0: break return urllist def Urllist2File(urllist,start=0,end=1000): file = open('site.html',"wb") inrow = 0 for x in urllist[start:end]: file.write('<a href="%s"><img src="%s" width="240" height="160"></a>'%(x,x)) inrow += 1 if inrow == 10: file.write("<br>") file.close() def wgeturllist(urllist,start=0,end=1000): try: os.system("mkdir pictures") except: None file = open("pictures/images.txt","wb") for x in urllist[start:end]: file.write(x+"\n") file.close() os.system("cd pictures && wget -i images.txt --timeout=2 --tries=1") if __name__ == "__main__": google = Google() google.recv_response() google.SetDebug(1) urllist = [] searcht = raw_input("Search: ") search_term ="" for x in searcht: if x == " ": search_term+="\%20" else: search_term+=x urllistimages = GetUrlList(google,search_term,"images") urllistoff = GetUrlList(google,search_term,"off") urllist = [] for x in urllistoff: if x in urllistimages: None else: urllist.append(x) print "1 - Output search to file (don't download pictures)" print "2 - Download all Pictures to a folder called pictures" choice = input("> ") start = 0 end = 1000 range = raw_input("Do you want to specify a range?(y/n)") if range == "y": start = input("Start: ") end = input("End: ") if choice == 1: Urllist2File(urllist,start,end) elif choice == 2: wgeturllist(urllist,start,end)