抓取赶集app数据

努力尽今夕,少年犹可夸。这篇文章主要讲述抓取赶集app数据相关的知识,希望能为你提供帮助。

#!/usr/bin/env python # -*- coding:utf-8 -*-import json import requestsurl = "https://app.ganji.com/datashare/"headers = { "Content-Type": "application/x-www-form-urlencoded", "userid": "C1ED10776D9B6108D8FEFEE4EA53058A", "model":"Generic/iphone", "customerid":"705", "clientagent":"iPhone 6S Plus#414*736#11.0.3", "versionid":"8.3.0", "os":"ios", "net":"wifi", "dv":"iPhone 6S Plus", "interface":"SearchPostsByJson3", "accept-language":"zh-cn", }def req(url, headers, data): content = None try: r = requests.post(url, headers=headers, data=https://www.songbingjia.com/android/data, timeout=5) content = r.json() except Exception as e: print("requests error: ", e, "requests url: ", url) return contentdef get_ganji_list_data(): # 获取列表数据 data = https://www.songbingjia.com/android/‘t=-576747455& & showType=0& showtype=0& jsonArgs={"pageSize":20,"cityScriptIndex":2300,"majorCategoryScriptIndex":7,"queryFilters":[],"categoryId":7,"andKeywords":[{"name":"title","value":"%E5%95%86%E9%93%BA%E5%87%BA%E5%94%AE"}],"customerId":"705","sortKeywords":[{"field":"post_at","sort":"desc"}],"pageIndex":1}‘ ganji_data = https://www.songbingjia.com/android/req(url, headers, data) if ganji_data is not None: return ganji_data return Nonedef get_article_data(): ganji_data = get_ganji_list_data() if ganji_data is not None: data_list = ganji_data["posts"] print("count: ", ganji_data["total"])for data_ in data_list: title, d_sign, puid = data_["title"], data_["d_sign"], data_["puid"] print(title, d_sign) data_article = "d_sign={0}& cityId=176& post_type_for_maidian=5& categoryId=7& spfy=0".format(d_sign) # 根据 puid 获取详细信息. puid需放在headers中 headers["interface"] = "GetPostByPuid" headers["puid"] = puid content_data = https://www.songbingjia.com/android/req(url, headers, data_article) if content_data["status"] == 0: data = https://www.songbingjia.com/android/content_data["data"] end_data = https://www.songbingjia.com/android/{} end_data["price"] = data["price"]["v"] end_data["price_unit"] = data["price"]["u"] end_data["title"] = data["title"] end_data["city"] = data["city"] end_data["description"] = data["description"] end_data["district_name"] = data["district_name"] end_data["street_name"] = data["street_name"] end_data["latlng"] = data["latlng"] end_data["id"] = data["id"]time.sleep(2)

header里东西真多,最终测试 只需要这几种,累死宝宝了,
【抓取赶集app数据】抓取赶集app数据

    推荐阅读