From a8701a6af3f1ebdc5cd98bb5f857968df829e65b Mon Sep 17 00:00:00 2001 From: prairie-guy Date: Sun, 29 Nov 2020 09:12:49 -0600 Subject: [PATCH] Added function 'search_images_ddg' to search Duck Duck Go with same api as 'search_images_bing', except for the need for the api-key. Also, this function removes duplicated urls. Accordingly, the paramater is 'max_n' (#250) --- utils.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/utils.py b/utils.py index 6297537..ab4beb3 100644 --- a/utils.py +++ b/utils.py @@ -40,6 +40,33 @@ def search_images_bing(key, term, min_sz=128, max_images=150): # - +def search_images_ddg(key,max_n=200): + """Search for 'key' with DuckDuckGo and return a unique urls of 'max_n' images + (Adopted from https://github.com/deepanprabhu/duckduckgo-images-api) + """ + url = 'https://duckduckgo.com/' + params = {'q':key} + res = requests.post(url,data=params) + searchObj = re.search(r'vqd=([\d-]+)\&',res.text) + if not searchObj: print('Token Parsing Failed !'); return + requestUrl = url + 'i.js' + headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0'} + params = (('l','us-en'),('o','json'),('q',key),('vqd',searchObj.group(1)),('f',',,,'),('p','1'),('v7exp','a')) + urls = [] + while True: + try: + res = requests.get(requestUrl,headers=headers,params=params) + data = json.loads(res.text) + for obj in data['results']: + urls.append(obj['image']) + max_n = max_n - 1 + if max_n < 1: return L(set(urls)) # dedupe + if 'next' not in data: return L(set(urls)) + requestUrl = url + data['next'] + except: + pass + + def plot_function(f, tx=None, ty=None, title=None, min=-2, max=2, figsize=(6,4)): x = torch.linspace(min,max) fig,ax = plt.subplots(figsize=figsize)