feat: Add crawl method, image upload

This commit is contained in:
jhynsoo 2023-11-13 19:52:35 +09:00
parent 89c56cc1b6
commit 48393c8f3c
10 changed files with 159 additions and 35 deletions

View File

@ -42,6 +42,7 @@ THIRD_PARTY_APPS = [
LOCAL_APPS = [ LOCAL_APPS = [
"core.apps.CoreConfig", "core.apps.CoreConfig",
"crawl.apps.CrawlConfig",
"graph.apps.GraphConfig", "graph.apps.GraphConfig",
"market.apps.MarketConfig", "market.apps.MarketConfig",
"user.apps.UserConfig", "user.apps.UserConfig",

View File

@ -1,3 +1 @@
from django.contrib import admin from django.contrib import admin
# Register your models here.

View File

@ -37,17 +37,14 @@ def search_bunjang(keyword, page=1):
result.append( result.append(
{ {
"title": item["name"], "title": item["name"],
"price": item["price"], "price": int(item["price"]),
"year": update_time.year, "year": update_time.year,
"month": update_time.month, "month": update_time.month,
"day": update_time.day, "day": update_time.day,
} }
) )
except Exception: except Exception:
print("--------------------------------------------") pass
print(url)
print(data)
print("--------------------------------------------")
finally: finally:
return result return result
@ -56,14 +53,32 @@ def get_bunjang(keyword):
result = [] result = []
page = 1 page = 1
while True: while True:
print(f"page: {page}") print(f"b {keyword} p{page}")
page_result = search_bunjang(keyword, page) page_result = search_bunjang(keyword, page)
if not page_result: if not page_result:
break break
result += page_result filtered_result = []
for item in page_result:
price = int(item["price"])
if not (
"매입" in item["title"]
or "삽니다" in item["title"]
or "사요" in item["title"]
or "케이스" in item["title"]
or price % 10 != 0
or price < 100000
or price > 2000000
):
filtered_result.append(item)
result += filtered_result
page += 1 page += 1
time.sleep(0.1) time.sleep(0.1)
# with open("bunjang.json", "w", encoding="utf-8") as file: sum = 0
# json.dump(result, file, ensure_ascii=False, indent=2) for item in result:
return result sum += item["price"]
if len(result) == 0:
return 0
avg = round(sum // len(result), -3)
return avg

View File

@ -1,6 +1,7 @@
import requests import requests
import re import re
import time import time
import json
# from bs4 import BeautifulSoup # from bs4 import BeautifulSoup
@ -15,16 +16,15 @@ def get_api_id():
js_url = base_url + re.findall(pattern, text)[0] js_url = base_url + re.findall(pattern, text)[0]
response = requests.get(js_url) response = requests.get(js_url)
text = response.text text = response.text
index = text.find('iO.SENTRY_RELEASE={id:"') + 24 index = text.find('iO.SENTRY_RELEASE={id:"') + 23
id = text[index : index + 20] index_length = text[index:].find('"')
id = text[index : index + index_length]
return id return id
def get_url(api_id, keyword, page=1): def get_url(api_id, keyword, page=1):
base = f"https://web.joongna.com/_next/data/{api_id}/search" base = f"https://web.joongna.com/_next/data/{api_id}/search"
return ( return f"{base}/{keyword}.json?page={page}&sort=RECENT_SORT&keyword={keyword}"
f"{base}/{keyword}.json?page={page}&sort=RECENT_SORT&keyword={keyword}"
)
def search_joongna(api_id, keyword, page): def search_joongna(api_id, keyword, page):
@ -36,9 +36,9 @@ def search_joongna(api_id, keyword, page):
queries = data["pageProps"]["dehydratedState"]["queries"] queries = data["pageProps"]["dehydratedState"]["queries"]
if len(queries) == 0: if len(queries) == 0:
return False return False
items = data["pageProps"]["dehydratedState"]["queries"][0]["state"][ items = data["pageProps"]["dehydratedState"]["queries"][0]["state"]["data"][
"data" "data"
]["data"]["items"] ]["items"]
item_length = len(items) item_length = len(items)
if item_length == 0: if item_length == 0:
return False return False
@ -73,14 +73,31 @@ def get_joongna(keyword):
result = [] result = []
page = 1 page = 1
while True: while True:
print(f"page: {page}") print(f"j {keyword} p{page}")
page_result = search_joongna(api_id, keyword, page) page_result = search_joongna(api_id, keyword, page)
if not page_result: if not page_result:
break break
result += page_result filtered_result = []
for item in page_result:
if not (
"매입" in item["title"]
or "삽니다" in item["title"]
or "사요" in item["title"]
or "케이스" in item["title"]
or item["price"] % 10 != 0
or item["price"] < 100000
or item["price"] > 2000000
):
filtered_result.append(item)
result += filtered_result
page += 1 page += 1
time.sleep(0.1) time.sleep(0.1)
# with open("joongna.json", "w", encoding="utf-8") as file: sum = 0
# json.dump(result, file, ensure_ascii=False, indent=2) for item in result:
return result sum += item["price"]
if len(result) == 0:
return 0
avg = round(sum // len(result), -3)
return avg

30
crawl/methods.py Normal file
View File

@ -0,0 +1,30 @@
from django.utils import timezone
from crawl.bunjang import get_bunjang
from crawl.joongna import get_joongna
from market.models import Product
from .models import MonthlyTransaction
def crawl(self, request, queryset):
today = timezone.now()
month = today.month - 1
year = today.year
if month == 0:
month = 12
year -= 1
for product in Product.objects.all():
name = product.name
print(name)
bunjang_result = get_bunjang(name)
joongna_result = get_joongna(name)
print("----------------")
print(bunjang_result, joongna_result)
print("----------------")
avg = (bunjang_result + joongna_result) / 2
MonthlyTransaction.objects.create(
product=product,
year=year,
month=month,
price=avg,
)

View File

@ -3,4 +3,10 @@ from django.contrib import admin
from .models import Transaction, MonthlyTransaction from .models import Transaction, MonthlyTransaction
admin.site.register(Transaction) admin.site.register(Transaction)
admin.site.register(MonthlyTransaction)
@admin.register(MonthlyTransaction)
class MonthlyTransactionAdmin(admin.ModelAdmin):
list_display = ("product", "year", "month", "price")
list_filter = ("year", "month")
search_fields = ("product",)

View File

@ -4,9 +4,33 @@ from .models import Brand, Product, ItemIssues, Post, Image
admin.site.register(Brand) admin.site.register(Brand)
admin.site.register(Product)
admin.site.register(Image)
admin.site.register(ItemIssues) @admin.register(ItemIssues)
class ItemIssuesAdmin(admin.ModelAdmin):
list_display = (
"display",
"frame",
"button",
"biometric",
"camera",
"speaker",
"others",
)
@admin.register(Image)
class ImageAdmin(admin.ModelAdmin):
list_display = ("post", "image")
raw_id_fields = ("post",)
@admin.register(Product)
class ProductAdmin(admin.ModelAdmin):
list_display = ("name", "brand", "release_date")
list_filter = ("brand", "release_date")
search_fields = ("name",)
date_hierarchy = "release_date"
@admin.register(Post) @admin.register(Post)
@ -14,8 +38,6 @@ class PostAdmin(admin.ModelAdmin):
list_display = ( list_display = (
"product", "product",
"price", "price",
"text",
"author",
"item_issues", "item_issues",
"done", "done",
"written_at", "written_at",
@ -23,4 +45,8 @@ class PostAdmin(admin.ModelAdmin):
list_filter = ("done", "written_at") list_filter = ("done", "written_at")
search_fields = ("product", "text") search_fields = ("product", "text")
date_hierarchy = "written_at" date_hierarchy = "written_at"
raw_id_fields = ("author",) raw_id_fields = (
"author",
"product",
"item_issues",
)

View File

@ -12,7 +12,11 @@ class Brand(models.Model):
class Product(models.Model): class Product(models.Model):
name = models.CharField(max_length=50) name = models.CharField(max_length=50)
brand = models.ForeignKey(Brand, on_delete=models.CASCADE, related_name="products") brand = models.ForeignKey(
Brand,
on_delete=models.CASCADE,
related_name="products",
)
release_date = models.DateField(blank=True, null=True) release_date = models.DateField(blank=True, null=True)
class Meta: class Meta:

View File

@ -43,6 +43,12 @@ class ImageSerializer(ModelSerializer):
fields = ("image",) fields = ("image",)
class ImageCreateSerializer(ModelSerializer):
class Meta:
model = Image
fields = ("post", "image")
class ItemIssuesSerializer(ModelSerializer): class ItemIssuesSerializer(ModelSerializer):
class Meta: class Meta:
model = ItemIssues model = ItemIssues
@ -87,6 +93,22 @@ class PostListSerializer(ModelSerializer):
class PostCreateSerializer(ModelSerializer): class PostCreateSerializer(ModelSerializer):
item_issues = ItemIssuesSerializer(write_only=True) item_issues = ItemIssuesSerializer(write_only=True)
# item_issues = serializers.JSONField(write_only=True)
photos = serializers.ListField(
child=serializers.ImageField(),
write_only=True,
)
def create(self, validated_data):
item_issues_data = validated_data.pop("item_issues")
item_issues = ItemIssues.objects.create(**item_issues_data)
validated_data["item_issues"] = item_issues
photos = validated_data.pop("photos", [])
post = super().create(validated_data)
for photo in photos:
Image.objects.create(post=post, image=photo)
return post
class Meta: class Meta:
model = Post model = Post
@ -95,6 +117,7 @@ class PostCreateSerializer(ModelSerializer):
"price", "price",
"text", "text",
"item_issues", "item_issues",
"photos",
) )

View File

@ -5,7 +5,7 @@ from rest_framework.viewsets import ModelViewSet
from core.mixins import ActionBasedMixin from core.mixins import ActionBasedMixin
from core.permissions import IsAuthorOrReadOnly, IsAdminUserOrReadOnly from core.permissions import IsAuthorOrReadOnly, IsAdminUserOrReadOnly
from market.models import Brand, Product, Post, ItemIssues from market.models import Brand, Product, Post, ItemIssues, Image
from market.serializers import ( from market.serializers import (
BrandSerializer, BrandSerializer,
ProductSerializer, ProductSerializer,
@ -85,8 +85,12 @@ class PostViewset(ActionBasedMixin, ModelViewSet):
return Response(serializer.data) return Response(serializer.data)
def perform_create(self, serializer): def perform_create(self, serializer):
item_issues = ItemIssues.objects.create() # item_issues = ItemIssues.objects.create()
serializer.save(author=self.request.user, item_issues=item_issues) # serializer.save(author=self.request.user, item_issues=item_issues)
serializer.save(author=self.request.user)
# images = self.request.data.getlist("images")
# for image in images:
# Image.objects.create(post=serializer.instance, image=image)
def partial_update(self, request, *args, **kwargs): def partial_update(self, request, *args, **kwargs):
object = self.get_object() object = self.get_object()