feat: Add crawl app
This commit is contained in:
parent
3cd7228c05
commit
f7f9486237
0
crawl/__init__.py
Normal file
0
crawl/__init__.py
Normal file
3
crawl/admin.py
Normal file
3
crawl/admin.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from django.contrib import admin
|
||||||
|
|
||||||
|
# Register your models here.
|
6
crawl/apps.py
Normal file
6
crawl/apps.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlConfig(AppConfig):
|
||||||
|
default_auto_field = "django.db.models.BigAutoField"
|
||||||
|
name = "crawl"
|
69
crawl/bunjang.py
Normal file
69
crawl/bunjang.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import requests
|
||||||
|
import time
|
||||||
|
|
||||||
|
# import re
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def get_time():
|
||||||
|
# return current time
|
||||||
|
# if current date is 2020 Nov 1st, and current time is 13:05:23 return 20201101130523
|
||||||
|
now = datetime.now()
|
||||||
|
return now.strftime("%Y%m%d%H%M%S")
|
||||||
|
|
||||||
|
|
||||||
|
def search_bunjang(keyword, page=1):
|
||||||
|
time = get_time()
|
||||||
|
base_url = "https://api.bunjang.co.kr/api/1/find_v2.json"
|
||||||
|
url = f"{base_url}?q={keyword}&order=date&page={page-1}&request_id={time}&stat_device=w&n=100&stat_category_required=1&req_ref=search&version=5"
|
||||||
|
response = requests.get(url)
|
||||||
|
data = response.json()
|
||||||
|
result = []
|
||||||
|
try:
|
||||||
|
items = data["list"]
|
||||||
|
item_length = len(items)
|
||||||
|
if item_length == 0:
|
||||||
|
return False
|
||||||
|
now = datetime.now()
|
||||||
|
target = datetime(now.year, now.month, 1)
|
||||||
|
last_item_date = datetime.utcfromtimestamp(
|
||||||
|
int(items[item_length - 1]["update_time"])
|
||||||
|
)
|
||||||
|
if (target - last_item_date).days > 30:
|
||||||
|
return False
|
||||||
|
for item in items:
|
||||||
|
update_time = datetime.utcfromtimestamp(int(item["update_time"]))
|
||||||
|
result.append(
|
||||||
|
{
|
||||||
|
"title": item["name"],
|
||||||
|
"price": item["price"],
|
||||||
|
"year": update_time.year,
|
||||||
|
"month": update_time.month,
|
||||||
|
"day": update_time.day,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
print("--------------------------------------------")
|
||||||
|
print(url)
|
||||||
|
print(data)
|
||||||
|
print("--------------------------------------------")
|
||||||
|
finally:
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_bunjang(keyword):
|
||||||
|
result = []
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
print(f"page: {page}")
|
||||||
|
page_result = search_bunjang(keyword, page)
|
||||||
|
if not page_result:
|
||||||
|
break
|
||||||
|
result += page_result
|
||||||
|
page += 1
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
# with open("bunjang.json", "w", encoding="utf-8") as file:
|
||||||
|
# json.dump(result, file, ensure_ascii=False, indent=2)
|
||||||
|
return result
|
86
crawl/joongna.py
Normal file
86
crawl/joongna.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
import requests
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
# from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def get_api_id():
|
||||||
|
base_url = "https://web.joongna.com/"
|
||||||
|
response = requests.get(base_url)
|
||||||
|
text = response.text
|
||||||
|
pattern = r"_next/static/chunks/pages/_app.*?\.js"
|
||||||
|
js_url = base_url + re.findall(pattern, text)[0]
|
||||||
|
response = requests.get(js_url)
|
||||||
|
text = response.text
|
||||||
|
index = text.find('iO.SENTRY_RELEASE={id:"') + 24
|
||||||
|
id = text[index : index + 20]
|
||||||
|
return id
|
||||||
|
|
||||||
|
|
||||||
|
def get_url(api_id, keyword, page=1):
|
||||||
|
base = f"https://web.joongna.com/_next/data/{api_id}/search"
|
||||||
|
return (
|
||||||
|
f"{base}/{keyword}.json?page={page}&sort=RECENT_SORT&keyword={keyword}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def search_joongna(api_id, keyword, page):
|
||||||
|
url = get_url(api_id, keyword, page)
|
||||||
|
response = requests.get(url)
|
||||||
|
data = response.json()
|
||||||
|
result = []
|
||||||
|
try:
|
||||||
|
queries = data["pageProps"]["dehydratedState"]["queries"]
|
||||||
|
if len(queries) == 0:
|
||||||
|
return False
|
||||||
|
items = data["pageProps"]["dehydratedState"]["queries"][0]["state"][
|
||||||
|
"data"
|
||||||
|
]["data"]["items"]
|
||||||
|
item_length = len(items)
|
||||||
|
if item_length == 0:
|
||||||
|
return False
|
||||||
|
now = datetime.now()
|
||||||
|
target = datetime(now.year, now.month, 1)
|
||||||
|
last_item_date = datetime.strptime(
|
||||||
|
items[item_length - 1]["sortDate"], "%Y-%m-%d %H:%M:%S"
|
||||||
|
)
|
||||||
|
if (target - last_item_date).days > 30:
|
||||||
|
return False
|
||||||
|
for item in items:
|
||||||
|
result.append(
|
||||||
|
{
|
||||||
|
"title": item["title"],
|
||||||
|
"price": item["price"],
|
||||||
|
"year": item["sortDate"].split("-")[0],
|
||||||
|
"month": item["sortDate"].split("-")[1],
|
||||||
|
"day": item["sortDate"].split("-")[2].split(" ")[0],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
print("--------------------------------------------")
|
||||||
|
print(url)
|
||||||
|
print(data["pageProps"]["dehydratedState"]["queries"])
|
||||||
|
print("--------------------------------------------")
|
||||||
|
finally:
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_joongna(keyword):
|
||||||
|
api_id = get_api_id()
|
||||||
|
result = []
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
print(f"page: {page}")
|
||||||
|
page_result = search_joongna(api_id, keyword, page)
|
||||||
|
if not page_result:
|
||||||
|
break
|
||||||
|
result += page_result
|
||||||
|
page += 1
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
# with open("joongna.json", "w", encoding="utf-8") as file:
|
||||||
|
# json.dump(result, file, ensure_ascii=False, indent=2)
|
||||||
|
return result
|
0
crawl/migrations/__init__.py
Normal file
0
crawl/migrations/__init__.py
Normal file
3
crawl/models.py
Normal file
3
crawl/models.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from django.db import models
|
||||||
|
|
||||||
|
# Create your models here.
|
3
crawl/tests.py
Normal file
3
crawl/tests.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from django.test import TestCase
|
||||||
|
|
||||||
|
# Create your tests here.
|
Loading…
Reference in New Issue
Block a user