Тема: Парсинг OLX
Не можу спарсити данні з першої сторінки. Програма починає збирати інфу з другої сторінки
import requests
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36"
}
def collect_products(url="https://www.olx.ua/d/uk/elektronika/noutbuki-i-aksesuary/noutbuki/?currency=UAH"):
response = requests.get(url = url, headers = headers)
data_list = []
soup = BeautifulSoup(response.text, 'lxml')
page_cout = int(soup.find('section', class_ = 'css-j8u5qq').find_all('a', class_ = 'css-1mi714g')[-1].text.strip())
print(f'[INFO] Total pages: { page_cout }')
for page in range(1, page_cout + 1):
data = {}
print(f'[INFO] Processing {page} page')
url = f"https://www.olx.ua/d/uk/elektronika/noutbuki-i-aksesuary/noutbuki/?currency=UAH"+f"&page={ page }"
response = requests.get(url = url, headers = headers)
soup = BeautifulSoup(response.text, 'lxml')
items = soup.find_all("div", {"data-cy" : "l-card"})
for item in items:
olx = 'https://www.olx.ua'
try:
link = olx + item.find('a', class_ = 'css-rc5s2u').get('href').strip()
except:
link = 'err'
try:
title = item.find('h6', class_ = 'css-1pvd0aj-Text eu5v0x0').text.strip()
except:
title = 'err'
try:
fettle = item.find('div', class_ = 'css-puf171').text.strip()
except:
fettle = 'err'
try:
price = item.find('p', class_ = 'css-1q7gvpp-Text eu5v0x0').text.strip()
except:
price = 'err'
try:
url = f"{link}"
response = requests.get(url = url, headers = headers)
soup = BeautifulSoup(response.text, 'lxml')
description = soup.find('div' , class_ = 'css-g5mtbi-Text').text.strip()
except:
description = 'err'
print(title)
print(fettle)
print(price)
print(link)
print(description)
return data_list
if __name__ == '__main__':
collect_products()