Web Scraping with Python

Web scraping is the process of extracting data from websites. Python offers powerful libraries like requests and BeautifulSoup to scrape and parse HTML content. This is useful for gathering data from the web for analysis, automation, or building datasets.

Why Use Web Scraping?

10 Practical Web Scraping Examples


import requests
from bs4 import BeautifulSoup

# Example 1: Simple GET request
response = requests.get("https://example.com")
print(response.status_code)

# Example 2: Parse HTML content
soup = BeautifulSoup(response.text, 'html.parser')
print(soup.title.string)

# Example 3: Find all links
links = soup.find_all('a')
for link in links:
    print(link.get('href'))

# Example 4: Find element by id
header = soup.find(id="header")
print(header.text if header else "No header found")

# Example 5: Find elements by class
items = soup.find_all(class_="item")
print([item.text for item in items])

# Example 6: Scrape multiple pages (pagination example)
for page in range(1, 4):
    url = f"https://example.com/page/{page}"
    r = requests.get(url)
    sp = BeautifulSoup(r.text, 'html.parser')
    print(f"Page {page} title:", sp.title.string)

# Example 7: Extract table data
table = soup.find('table')
rows = table.find_all('tr') if table else []
for row in rows:
    cols = [ele.text.strip() for ele in row.find_all('td')]
    print(cols)

# Example 8: Use CSS selectors
items = soup.select('div.content > ul > li')
print([item.text for item in items])

# Example 9: Handle headers and user-agent
headers = {'User-Agent': 'Mozilla/5.0'}
resp = requests.get("https://example.com", headers=headers)
print(resp.status_code)

# Example 10: Save scraped data to file
with open("output.html", "w", encoding="utf-8") as f:
    f.write(soup.prettify())