feat: Add crawl app
This commit is contained in:
parent
3cd7228c05
commit
f7f9486237
0
crawl/__init__.py
Normal file
0
crawl/__init__.py
Normal file
3
crawl/admin.py
Normal file
3
crawl/admin.py
Normal file
@ -0,0 +1,3 @@
|
||||
from django.contrib import admin
|
||||
|
||||
# Register your models here.
|
6
crawl/apps.py
Normal file
6
crawl/apps.py
Normal file
@ -0,0 +1,6 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class CrawlConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "crawl"
|
69
crawl/bunjang.py
Normal file
69
crawl/bunjang.py
Normal file
@ -0,0 +1,69 @@
|
||||
import requests
|
||||
import time
|
||||
|
||||
# import re
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def get_time():
|
||||
# return current time
|
||||
# if current date is 2020 Nov 1st, and current time is 13:05:23 return 20201101130523
|
||||
now = datetime.now()
|
||||
return now.strftime("%Y%m%d%H%M%S")
|
||||
|
||||
|
||||
def search_bunjang(keyword, page=1):
|
||||
time = get_time()
|
||||
base_url = "https://api.bunjang.co.kr/api/1/find_v2.json"
|
||||
url = f"{base_url}?q={keyword}&order=date&page={page-1}&request_id={time}&stat_device=w&n=100&stat_category_required=1&req_ref=search&version=5"
|
||||
response = requests.get(url)
|
||||
data = response.json()
|
||||
result = []
|
||||
try:
|
||||
items = data["list"]
|
||||
item_length = len(items)
|
||||
if item_length == 0:
|
||||
return False
|
||||
now = datetime.now()
|
||||
target = datetime(now.year, now.month, 1)
|
||||
last_item_date = datetime.utcfromtimestamp(
|
||||
int(items[item_length - 1]["update_time"])
|
||||
)
|
||||
if (target - last_item_date).days > 30:
|
||||
return False
|
||||
for item in items:
|
||||
update_time = datetime.utcfromtimestamp(int(item["update_time"]))
|
||||
result.append(
|
||||
{
|
||||
"title": item["name"],
|
||||
"price": item["price"],
|
||||
"year": update_time.year,
|
||||
"month": update_time.month,
|
||||
"day": update_time.day,
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
print("--------------------------------------------")
|
||||
print(url)
|
||||
print(data)
|
||||
print("--------------------------------------------")
|
||||
finally:
|
||||
return result
|
||||
|
||||
|
||||
def get_bunjang(keyword):
|
||||
result = []
|
||||
page = 1
|
||||
while True:
|
||||
print(f"page: {page}")
|
||||
page_result = search_bunjang(keyword, page)
|
||||
if not page_result:
|
||||
break
|
||||
result += page_result
|
||||
page += 1
|
||||
time.sleep(0.1)
|
||||
|
||||
# with open("bunjang.json", "w", encoding="utf-8") as file:
|
||||
# json.dump(result, file, ensure_ascii=False, indent=2)
|
||||
return result
|
86
crawl/joongna.py
Normal file
86
crawl/joongna.py
Normal file
@ -0,0 +1,86 @@
|
||||
import requests
|
||||
import re
|
||||
import time
|
||||
|
||||
# from bs4 import BeautifulSoup
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def get_api_id():
|
||||
base_url = "https://web.joongna.com/"
|
||||
response = requests.get(base_url)
|
||||
text = response.text
|
||||
pattern = r"_next/static/chunks/pages/_app.*?\.js"
|
||||
js_url = base_url + re.findall(pattern, text)[0]
|
||||
response = requests.get(js_url)
|
||||
text = response.text
|
||||
index = text.find('iO.SENTRY_RELEASE={id:"') + 24
|
||||
id = text[index : index + 20]
|
||||
return id
|
||||
|
||||
|
||||
def get_url(api_id, keyword, page=1):
|
||||
base = f"https://web.joongna.com/_next/data/{api_id}/search"
|
||||
return (
|
||||
f"{base}/{keyword}.json?page={page}&sort=RECENT_SORT&keyword={keyword}"
|
||||
)
|
||||
|
||||
|
||||
def search_joongna(api_id, keyword, page):
|
||||
url = get_url(api_id, keyword, page)
|
||||
response = requests.get(url)
|
||||
data = response.json()
|
||||
result = []
|
||||
try:
|
||||
queries = data["pageProps"]["dehydratedState"]["queries"]
|
||||
if len(queries) == 0:
|
||||
return False
|
||||
items = data["pageProps"]["dehydratedState"]["queries"][0]["state"][
|
||||
"data"
|
||||
]["data"]["items"]
|
||||
item_length = len(items)
|
||||
if item_length == 0:
|
||||
return False
|
||||
now = datetime.now()
|
||||
target = datetime(now.year, now.month, 1)
|
||||
last_item_date = datetime.strptime(
|
||||
items[item_length - 1]["sortDate"], "%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
if (target - last_item_date).days > 30:
|
||||
return False
|
||||
for item in items:
|
||||
result.append(
|
||||
{
|
||||
"title": item["title"],
|
||||
"price": item["price"],
|
||||
"year": item["sortDate"].split("-")[0],
|
||||
"month": item["sortDate"].split("-")[1],
|
||||
"day": item["sortDate"].split("-")[2].split(" ")[0],
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
print("--------------------------------------------")
|
||||
print(url)
|
||||
print(data["pageProps"]["dehydratedState"]["queries"])
|
||||
print("--------------------------------------------")
|
||||
finally:
|
||||
return result
|
||||
|
||||
|
||||
def get_joongna(keyword):
|
||||
api_id = get_api_id()
|
||||
result = []
|
||||
page = 1
|
||||
while True:
|
||||
print(f"page: {page}")
|
||||
page_result = search_joongna(api_id, keyword, page)
|
||||
if not page_result:
|
||||
break
|
||||
result += page_result
|
||||
page += 1
|
||||
time.sleep(0.1)
|
||||
|
||||
# with open("joongna.json", "w", encoding="utf-8") as file:
|
||||
# json.dump(result, file, ensure_ascii=False, indent=2)
|
||||
return result
|
0
crawl/migrations/__init__.py
Normal file
0
crawl/migrations/__init__.py
Normal file
3
crawl/models.py
Normal file
3
crawl/models.py
Normal file
@ -0,0 +1,3 @@
|
||||
from django.db import models
|
||||
|
||||
# Create your models here.
|
3
crawl/tests.py
Normal file
3
crawl/tests.py
Normal file
@ -0,0 +1,3 @@
|
||||
from django.test import TestCase
|
||||
|
||||
# Create your tests here.
|
Loading…
Reference in New Issue
Block a user