87 lines
2.5 KiB
Python
87 lines
2.5 KiB
Python
import requests
|
|
import re
|
|
import time
|
|
|
|
# from bs4 import BeautifulSoup
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
def get_api_id():
|
|
base_url = "https://web.joongna.com/"
|
|
response = requests.get(base_url)
|
|
text = response.text
|
|
pattern = r"_next/static/chunks/pages/_app.*?\.js"
|
|
js_url = base_url + re.findall(pattern, text)[0]
|
|
response = requests.get(js_url)
|
|
text = response.text
|
|
index = text.find('iO.SENTRY_RELEASE={id:"') + 24
|
|
id = text[index : index + 20]
|
|
return id
|
|
|
|
|
|
def get_url(api_id, keyword, page=1):
|
|
base = f"https://web.joongna.com/_next/data/{api_id}/search"
|
|
return (
|
|
f"{base}/{keyword}.json?page={page}&sort=RECENT_SORT&keyword={keyword}"
|
|
)
|
|
|
|
|
|
def search_joongna(api_id, keyword, page):
|
|
url = get_url(api_id, keyword, page)
|
|
response = requests.get(url)
|
|
data = response.json()
|
|
result = []
|
|
try:
|
|
queries = data["pageProps"]["dehydratedState"]["queries"]
|
|
if len(queries) == 0:
|
|
return False
|
|
items = data["pageProps"]["dehydratedState"]["queries"][0]["state"][
|
|
"data"
|
|
]["data"]["items"]
|
|
item_length = len(items)
|
|
if item_length == 0:
|
|
return False
|
|
now = datetime.now()
|
|
target = datetime(now.year, now.month, 1)
|
|
last_item_date = datetime.strptime(
|
|
items[item_length - 1]["sortDate"], "%Y-%m-%d %H:%M:%S"
|
|
)
|
|
if (target - last_item_date).days > 30:
|
|
return False
|
|
for item in items:
|
|
result.append(
|
|
{
|
|
"title": item["title"],
|
|
"price": item["price"],
|
|
"year": item["sortDate"].split("-")[0],
|
|
"month": item["sortDate"].split("-")[1],
|
|
"day": item["sortDate"].split("-")[2].split(" ")[0],
|
|
}
|
|
)
|
|
except Exception:
|
|
print("--------------------------------------------")
|
|
print(url)
|
|
print(data["pageProps"]["dehydratedState"]["queries"])
|
|
print("--------------------------------------------")
|
|
finally:
|
|
return result
|
|
|
|
|
|
def get_joongna(keyword):
|
|
api_id = get_api_id()
|
|
result = []
|
|
page = 1
|
|
while True:
|
|
print(f"page: {page}")
|
|
page_result = search_joongna(api_id, keyword, page)
|
|
if not page_result:
|
|
break
|
|
result += page_result
|
|
page += 1
|
|
time.sleep(0.1)
|
|
|
|
# with open("joongna.json", "w", encoding="utf-8") as file:
|
|
# json.dump(result, file, ensure_ascii=False, indent=2)
|
|
return result
|