I have the following code which scrapes a website for divs with the class "odd" or "even". I'd like to make "odd" and "even" an argument my function takes in which would allow me to add other divs as well. Here is my code:
#
# Imports
#
import urllib2
from bs4 import BeautifulSoup
import re
import os
from pprint import pprint
#
# library
#
def get_soup(url):
page = urllib2.urlopen(url)
contents = page.read()
soup = BeautifulSoup(contents, "html.parser")
body = soup.findAll("tr", ["even", "odd"])
string_list = str([i for i in body])
return string_list
def save_to_file(path, soup):
with open(path, 'w') as fhandle:
fhandle.write(soup)
#
# script
#
def main():
url = r'URL GOES HERE'
path = os.path.join('PATH GOES HERE)
the_soup = get_soup(url)
save_to_file(path, the_soup)
if __name__ == '__main__':
main()
I'd like to incorporate *args into the code. So the get_soup function would look like this:
def get_soup(url, *args):
page = urllib2.urlopen(url)
contents = page.read()
soup = BeautifulSoup(contents, "html.parser")
body = soup.findAll("tr", [args])
string_list = str([i for i in body])
return string_list
def main():
url = r'URL GOES HERE'
path = os.path.join('PATH GOES HERE)
the_soup = get_soup(url, "odd", "even")
save_to_file(path, the_soup)
Unfortunately, this isnt working. Ideas?
Aucun commentaire:
Enregistrer un commentaire