How to open a new webpage with selenium (and close the older) in Python? -
i want web-scraping website selenium in python 2.7, wait little bit, , after close browser + close geckodriver.exe (because don't want open millions of browser pages , .exe files)
is there way can this?
my code comments:
from bs4 import beautifulsoup selenium import webdriver import time import urllib2 import unicodecsv csv import os import sys import io import time import datetime import pandas pd bs4 import beautifulsoup import mysqldb import re import contextlib import selenium.webdriver.support.ui ui #i create new csv file filename=r'output.csv' resultcsv=open(filename,"wb") output=csv.writer(resultcsv, delimiter=';',quotechar = '"', quoting=csv.quote_nonnumeric, encoding='latin-1') #i opening website selenium (js website) profile=webdriver.firefoxprofile() profile.set_preference("intl.accept_languages","en-us") driver = webdriver.firefox(firefox_profile=profile) driver.get("https://www.flightradar24.com/data/airports/bud/arrivals") time.sleep(10) html_source=driver.page_source soup=beautifulsoup(html_source,"html.parser") print soup #here webscraping informations needed , #after writing csv file. output.writerows(datatable) resultcsv.close() #and question start here. want close sesseion, #wait little, example 10 sec, because needed time web- #scraping dataes after close geckodriver + firefox, , after #repeat code new website. possible? updated code - nutmeg64
i error message:
file "c:/python27/air17.py", line 43, in scrape(urls) file "c:/python27/air17.py", line 28, in scrape table = soup.find('table', { "class" : "table table-condensed table-hover data-table m-n-t-15" }) nameerror: global name 'soup' not defined
from bs4 import beautifulsoup selenium import webdriver import time import urllib2 import unicodecsv csv import os import sys import io import time import datetime import pandas pd bs4 import beautifulsoup import mysqldb import re import contextlib import selenium.webdriver.support.ui ui filename=r'output.csv' resultcsv=open(filename,"wb") output=csv.writer(resultcsv, delimiter=';',quotechar = '"', quoting=csv.quote_nonnumeric, encoding='latin-1') def scrape(urls): browser = webdriver.firefox() url in urls: browser.get(url) html = browser.page_source soup=beautifulsoup(html,"html.parser") table = soup.find('table', { "class" : "table table-condensed table-hover data-table m-n-t-15" }) datatable=[] record in table.find_all('tr', class_="hidden-xs hidden-sm ng-scope"): temp_data = [] data in record.find_all("td"): temp_data.append(data.text.encode('latin-1')) datatable.append(temp_data) output.writerows(datatable) resultcsv.close() time.sleep(10) browser.quit() urls = ["https://www.flightradar24.com/data/airports/bud/arrivals", "https://www.flightradar24.com/data/airports/fco/arrivals"] scrape(urls)
put selenium part of in function , call different url. sleep 10 seconds between iterations.
btw not ideal solution. need open selenium once, read source , browser.get(new_url). after scraping done browser.quit() release.
for example (very simplified):
def scrape(urls): browser = webdriver.firefox() url in urls: browser.get(url) html = browser.page_source # scrape html # create csv file specific url # write results csv , close time.sleep(10) # <-- not necessary. scraping , writing csv long enough break browser.quit() urls = ["http://example.com", "http://notarealwebsite.co.uk", "http://lastwebpagetoscrape.com" ] scrape(urls)
Comments
Post a Comment