Source code for domiporta.category

#!/usr/bin/python
# -*- coding: utf-8 -*-

import logging

from bs4 import BeautifulSoup

import domiporta
import domiporta.utils

log = logging.getLogger(__file__)
logging.basicConfig(level=logging.DEBUG)


[docs]def get_category(url=None, category='nieruchomosci', transaction_type='wszystkie', voivodeship=None, city=None, street=None, filters=None): """ Parses available offer urls from given category search page :param url: Url to search web page :param category: Type of property of interest (Mieszkanie/Dom/Garaż/Działka) :param transaction_type: Type of transaction :param voivodeship: Voivodeship :param city: City :param street: Street :param filters: Dictionary with additional filters :type url: str, None :type category:str, None :type transaction_type: str, None :type voivodeship: str, None :type city: str, None :type street: str, None :type filters: dict, None :return: List of urls of all offers for given parameters :rtype: list """ if url is None: url = domiporta.utils.get_url(category, transaction_type, voivodeship, city, street, filters) page, max_number_page, offers = 1, None, [] while max_number_page is None or page <= max_number_page: page_number = "PageNumber=" + str(page) join_param = '?' if '?' not in url else '&' page_url = "{0}{1}{2}".format(url, join_param, page_number) offers_urls, markup = get_offers_from_category(page_url) offers += offers_urls if page == 1: max_number_page = domiporta.utils.get_max_number_page(markup) # assert False page += 1 return offers
[docs]def get_offers_from_category(url): """ Parses available offer urls from given category from given page :param url: Defined url for Domiporta page with offers :type url: str :return: List of urls from given page :rtype: list """ markup = BeautifulSoup(domiporta.utils.get_content_from_source(url), 'html.parser') offers_urls = [] offers = markup.find_all('div', class_='detail-card') for offer in offers: offers_urls.append(offer.find('a').get('href')) return offers_urls, markup