Buy fast & affordable proxy servers. Get 10 proxies today for free.
Download our Proxy Server Extension
Products
© Webshare Proxy
payment methods

TL;DR

Google Play Books hosts millions of titles - a valuable resource for research, data analysis, and content aggregation. In this article, you’ll learn how to build two Python-based scrapers: one to fetch book listings from search results, and another to extract detailed product information such as author, description, price, and ratings.
Follow these steps to scrape book data from Google Play Books and save it to a neatly structured CSV file:
Make sure you have Python installed. Then install the required libraries:
pip install requests beautifulsoup4 lxmlImport the necessary libraries.
import requests
from bs4 import BeautifulSoup
import csv
import urllib.parseYou need to create a Python file (e.g., scrape_google_play_books.py) and do the following:
Here’s how the code looks like.
def scrape_play_books(search_terms):
"""Scrape Google Play Books search results for given terms and save to CSV"""
proxies = {
"http": "http://username:password@p.webshare.io:80",
"https": "http://username:password@p.webshare.io:80"
}
session = requests.Session()
session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
all_books = []
for term in search_terms:
url = f"https://play.google.com/store/search?q={urllib.parse.quote(term)}&c=books"
try:
response = session.get(url, proxies=proxies, timeout=10)
soup = BeautifulSoup(response.content, 'lxml')
books = soup.find_all('div', class_=['VfPpkd-WsjYwc', 'ULeU3b'])
for book in books:
title_elem = book.find('div', class_='Epkrse')
link_elem = book.find('a', href=True)
img_elem = book.find('img', src=True)
price_elem = book.find('span', class_='VfPpfd ZdBevf')
if title_elem:
book_path = link_elem['href'] if link_elem else ''
all_books.append({
'title': title_elem.get_text(strip=True),
'book_url': f"https://play.google.com{book_path}" if book_path.startswith('/') else book_path,
'cover_url': img_elem['src'] if img_elem else 'N/A',
'price': price_elem.get_text(strip=True) if price_elem else 'Free'
})
except Exception as e:
print(f"Error with '{term}': {e}")
# Write data to CSV
with open('google_play_books.csv', 'w', newline='', encoding='utf-8') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=['title', 'book_url', 'cover_url', 'price'])
writer.writeheader()
writer.writerows(all_books)
print(f"CSV file created successfully with {len(all_books)} books.")
# Example call
scrape_play_books(["Productivity books", "Alex Hormozi books", "Business strategy books"])Run the script using Python:
python scrape_google_play_books.py
Open google_play_books.csv in Excel, Google Sheets, or any spreadsheet software. You will see columns:

Let’s cover how to enrich your previously scraped book data with additional product details from individual book pages.
Here’s the complete code:
import requests, csv, json, time
from bs4 import BeautifulSoup
from itertools import islice
proxies = {
"http": "http://username:password@p.webshare.io:80",
"https": "http://username:password@p.webshare.io:80"
}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
input_file = 'google_play_books.csv'
output_file = 'enriched_google_play_books_sample.csv'
enriched = []
with open(input_file, 'r', encoding='utf-8') as infile:
reader = csv.DictReader(infile)
if reader.fieldnames:
reader.fieldnames = [fn.strip().lower() for fn in reader.fieldnames]
for raw_row in islice(reader, 10):
row = {k.strip().lower(): (v or '').strip() for k, v in raw_row.items()}
url = row.get('book_url', '')
if not url:
continue
try:
resp = requests.get(url, headers=headers, proxies=proxies, timeout=10)
soup = BeautifulSoup(resp.content, 'html.parser')
book_data = {}
for script in soup.find_all('script', type='application/ld+json'):
txt = (script.string or '').strip()
if not txt:
continue
try:
data = json.loads(txt)
except Exception:
continue
if isinstance(data, list):
book = next((d for d in data if d.get('@type') == 'Book'), None)
if book:
book_data = book
break
data = data[0] if data else {}
if isinstance(data, dict) and (data.get('@type') == 'Book' or data.get('author') or data.get('name')):
book_data = data
break
def parse_author(d):
a = d.get('author') if isinstance(d, dict) else None
if not a:
return 'N/A'
if isinstance(a, list) and a:
a0 = a[0]
return a0.get('name') if isinstance(a0, dict) else str(a0)
if isinstance(a, dict):
return a.get('name', 'N/A')
return str(a)
title = book_data.get('name') or row.get('title') or 'N/A'
author = parse_author(book_data) if book_data else 'N/A'
desc = book_data.get('description') if book_data else ''
if not desc:
meta = soup.find('meta', {'name': 'description'})
desc = meta.get('content', '') if meta else ''
description = (desc.strip()[:200] if desc else 'N/A')
agg = book_data.get('aggregateRating', {}) if isinstance(book_data.get('aggregateRating', {}), dict) else {}
reviews_avg = agg.get('ratingValue', 'N/A')
reviews_count = agg.get('ratingCount', 'N/A')
pages = book_data.get('numberOfPages') or book_data.get('pageCount') or 'N/A'
enriched.append({
'title': title,
'book_url': url,
'cover_url': row.get('cover_url', ''),
'price': row.get('price', ''),
'author': author,
'reviews_avg': reviews_avg,
'reviews_count': reviews_count,
'description': description,
'pages': pages
})
except Exception:
enriched.append({
'title': row.get('title', 'N/A'),
'book_url': url,
'cover_url': row.get('cover_url', ''),
'price': row.get('price', ''),
'author': 'N/A',
'reviews_avg': 'N/A',
'reviews_count': 'N/A',
'description': 'N/A',
'pages': 'N/A'
})
time.sleep(1)
with open(output_file, 'w', newline='', encoding='utf-8') as out:
fieldnames = ['title','book_url','cover_url','price','author','reviews_avg','reviews_count','description','pages']
writer = csv.DictWriter(out, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(enriched)
print("Enrichment complete.")Run the scraper.

Your enriched CSV will look like this:

In this guide, we demonstrated a two-step approach to scraping Google Play Books: first, collecting search results for specific queries, and second, enriching individual book pages with additional details such as author, pages, reviews, and descriptions, all using Webshare proxies. Using proxies and rotating headers helps mimic real users and avoid detection or temporary bans. When scraping dynamically rendered pages, it’s important to implement polite delays and consider anti-detection techniques to ensure reliable data extraction.