From 48393c8f3c696e447f0367dfea715efc3c99bf12 Mon Sep 17 00:00:00 2001 From: jhynsoo Date: Mon, 13 Nov 2023 19:52:35 +0900 Subject: [PATCH] feat: Add crawl method, image upload --- config/settings/base.py | 1 + crawl/admin.py | 2 -- crawl/bunjang.py | 35 +++++++++++++++++++++++++---------- crawl/joongna.py | 41 +++++++++++++++++++++++++++++------------ crawl/methods.py | 30 ++++++++++++++++++++++++++++++ graph/admin.py | 8 +++++++- market/admin.py | 38 ++++++++++++++++++++++++++++++++------ market/models.py | 6 +++++- market/serializers.py | 23 +++++++++++++++++++++++ market/viewsets.py | 10 +++++++--- 10 files changed, 159 insertions(+), 35 deletions(-) create mode 100644 crawl/methods.py diff --git a/config/settings/base.py b/config/settings/base.py index 288c82d..ce88eaa 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -42,6 +42,7 @@ THIRD_PARTY_APPS = [ LOCAL_APPS = [ "core.apps.CoreConfig", + "crawl.apps.CrawlConfig", "graph.apps.GraphConfig", "market.apps.MarketConfig", "user.apps.UserConfig", diff --git a/crawl/admin.py b/crawl/admin.py index 8c38f3f..694323f 100644 --- a/crawl/admin.py +++ b/crawl/admin.py @@ -1,3 +1 @@ from django.contrib import admin - -# Register your models here. diff --git a/crawl/bunjang.py b/crawl/bunjang.py index 46d8e68..ac76cb7 100644 --- a/crawl/bunjang.py +++ b/crawl/bunjang.py @@ -37,17 +37,14 @@ def search_bunjang(keyword, page=1): result.append( { "title": item["name"], - "price": item["price"], + "price": int(item["price"]), "year": update_time.year, "month": update_time.month, "day": update_time.day, } ) except Exception: - print("--------------------------------------------") - print(url) - print(data) - print("--------------------------------------------") + pass finally: return result @@ -56,14 +53,32 @@ def get_bunjang(keyword): result = [] page = 1 while True: - print(f"page: {page}") + print(f"b {keyword} p{page}") page_result = search_bunjang(keyword, page) if not page_result: break - result += page_result + filtered_result = [] + for item in page_result: + price = int(item["price"]) + if not ( + "매입" in item["title"] + or "삽니다" in item["title"] + or "사요" in item["title"] + or "케이스" in item["title"] + or price % 10 != 0 + or price < 100000 + or price > 2000000 + ): + filtered_result.append(item) + result += filtered_result page += 1 time.sleep(0.1) - # with open("bunjang.json", "w", encoding="utf-8") as file: - # json.dump(result, file, ensure_ascii=False, indent=2) - return result + sum = 0 + for item in result: + sum += item["price"] + + if len(result) == 0: + return 0 + avg = round(sum // len(result), -3) + return avg diff --git a/crawl/joongna.py b/crawl/joongna.py index 9b9d3e2..40c22f2 100644 --- a/crawl/joongna.py +++ b/crawl/joongna.py @@ -1,6 +1,7 @@ import requests import re import time +import json # from bs4 import BeautifulSoup @@ -15,16 +16,15 @@ def get_api_id(): js_url = base_url + re.findall(pattern, text)[0] response = requests.get(js_url) text = response.text - index = text.find('iO.SENTRY_RELEASE={id:"') + 24 - id = text[index : index + 20] + index = text.find('iO.SENTRY_RELEASE={id:"') + 23 + index_length = text[index:].find('"') + id = text[index : index + index_length] return id def get_url(api_id, keyword, page=1): base = f"https://web.joongna.com/_next/data/{api_id}/search" - return ( - f"{base}/{keyword}.json?page={page}&sort=RECENT_SORT&keyword={keyword}" - ) + return f"{base}/{keyword}.json?page={page}&sort=RECENT_SORT&keyword={keyword}" def search_joongna(api_id, keyword, page): @@ -36,9 +36,9 @@ def search_joongna(api_id, keyword, page): queries = data["pageProps"]["dehydratedState"]["queries"] if len(queries) == 0: return False - items = data["pageProps"]["dehydratedState"]["queries"][0]["state"][ + items = data["pageProps"]["dehydratedState"]["queries"][0]["state"]["data"][ "data" - ]["data"]["items"] + ]["items"] item_length = len(items) if item_length == 0: return False @@ -73,14 +73,31 @@ def get_joongna(keyword): result = [] page = 1 while True: - print(f"page: {page}") + print(f"j {keyword} p{page}") page_result = search_joongna(api_id, keyword, page) if not page_result: break - result += page_result + filtered_result = [] + for item in page_result: + if not ( + "매입" in item["title"] + or "삽니다" in item["title"] + or "사요" in item["title"] + or "케이스" in item["title"] + or item["price"] % 10 != 0 + or item["price"] < 100000 + or item["price"] > 2000000 + ): + filtered_result.append(item) + result += filtered_result page += 1 time.sleep(0.1) - # with open("joongna.json", "w", encoding="utf-8") as file: - # json.dump(result, file, ensure_ascii=False, indent=2) - return result + sum = 0 + for item in result: + sum += item["price"] + + if len(result) == 0: + return 0 + avg = round(sum // len(result), -3) + return avg diff --git a/crawl/methods.py b/crawl/methods.py new file mode 100644 index 0000000..ee9ee8c --- /dev/null +++ b/crawl/methods.py @@ -0,0 +1,30 @@ +from django.utils import timezone + +from crawl.bunjang import get_bunjang +from crawl.joongna import get_joongna +from market.models import Product +from .models import MonthlyTransaction + + +def crawl(self, request, queryset): + today = timezone.now() + month = today.month - 1 + year = today.year + if month == 0: + month = 12 + year -= 1 + for product in Product.objects.all(): + name = product.name + print(name) + bunjang_result = get_bunjang(name) + joongna_result = get_joongna(name) + print("----------------") + print(bunjang_result, joongna_result) + print("----------------") + avg = (bunjang_result + joongna_result) / 2 + MonthlyTransaction.objects.create( + product=product, + year=year, + month=month, + price=avg, + ) diff --git a/graph/admin.py b/graph/admin.py index aef6443..34662a0 100644 --- a/graph/admin.py +++ b/graph/admin.py @@ -3,4 +3,10 @@ from django.contrib import admin from .models import Transaction, MonthlyTransaction admin.site.register(Transaction) -admin.site.register(MonthlyTransaction) + + +@admin.register(MonthlyTransaction) +class MonthlyTransactionAdmin(admin.ModelAdmin): + list_display = ("product", "year", "month", "price") + list_filter = ("year", "month") + search_fields = ("product",) diff --git a/market/admin.py b/market/admin.py index 6dcff6d..d8d53eb 100644 --- a/market/admin.py +++ b/market/admin.py @@ -4,9 +4,33 @@ from .models import Brand, Product, ItemIssues, Post, Image admin.site.register(Brand) -admin.site.register(Product) -admin.site.register(Image) -admin.site.register(ItemIssues) + + +@admin.register(ItemIssues) +class ItemIssuesAdmin(admin.ModelAdmin): + list_display = ( + "display", + "frame", + "button", + "biometric", + "camera", + "speaker", + "others", + ) + + +@admin.register(Image) +class ImageAdmin(admin.ModelAdmin): + list_display = ("post", "image") + raw_id_fields = ("post",) + + +@admin.register(Product) +class ProductAdmin(admin.ModelAdmin): + list_display = ("name", "brand", "release_date") + list_filter = ("brand", "release_date") + search_fields = ("name",) + date_hierarchy = "release_date" @admin.register(Post) @@ -14,8 +38,6 @@ class PostAdmin(admin.ModelAdmin): list_display = ( "product", "price", - "text", - "author", "item_issues", "done", "written_at", @@ -23,4 +45,8 @@ class PostAdmin(admin.ModelAdmin): list_filter = ("done", "written_at") search_fields = ("product", "text") date_hierarchy = "written_at" - raw_id_fields = ("author",) + raw_id_fields = ( + "author", + "product", + "item_issues", + ) diff --git a/market/models.py b/market/models.py index 8697acb..a3fd084 100644 --- a/market/models.py +++ b/market/models.py @@ -12,7 +12,11 @@ class Brand(models.Model): class Product(models.Model): name = models.CharField(max_length=50) - brand = models.ForeignKey(Brand, on_delete=models.CASCADE, related_name="products") + brand = models.ForeignKey( + Brand, + on_delete=models.CASCADE, + related_name="products", + ) release_date = models.DateField(blank=True, null=True) class Meta: diff --git a/market/serializers.py b/market/serializers.py index 1e02f5b..a6290c1 100644 --- a/market/serializers.py +++ b/market/serializers.py @@ -43,6 +43,12 @@ class ImageSerializer(ModelSerializer): fields = ("image",) +class ImageCreateSerializer(ModelSerializer): + class Meta: + model = Image + fields = ("post", "image") + + class ItemIssuesSerializer(ModelSerializer): class Meta: model = ItemIssues @@ -87,6 +93,22 @@ class PostListSerializer(ModelSerializer): class PostCreateSerializer(ModelSerializer): item_issues = ItemIssuesSerializer(write_only=True) + # item_issues = serializers.JSONField(write_only=True) + + photos = serializers.ListField( + child=serializers.ImageField(), + write_only=True, + ) + + def create(self, validated_data): + item_issues_data = validated_data.pop("item_issues") + item_issues = ItemIssues.objects.create(**item_issues_data) + validated_data["item_issues"] = item_issues + photos = validated_data.pop("photos", []) + post = super().create(validated_data) + for photo in photos: + Image.objects.create(post=post, image=photo) + return post class Meta: model = Post @@ -95,6 +117,7 @@ class PostCreateSerializer(ModelSerializer): "price", "text", "item_issues", + "photos", ) diff --git a/market/viewsets.py b/market/viewsets.py index 4845e82..d25cd09 100644 --- a/market/viewsets.py +++ b/market/viewsets.py @@ -5,7 +5,7 @@ from rest_framework.viewsets import ModelViewSet from core.mixins import ActionBasedMixin from core.permissions import IsAuthorOrReadOnly, IsAdminUserOrReadOnly -from market.models import Brand, Product, Post, ItemIssues +from market.models import Brand, Product, Post, ItemIssues, Image from market.serializers import ( BrandSerializer, ProductSerializer, @@ -85,8 +85,12 @@ class PostViewset(ActionBasedMixin, ModelViewSet): return Response(serializer.data) def perform_create(self, serializer): - item_issues = ItemIssues.objects.create() - serializer.save(author=self.request.user, item_issues=item_issues) + # item_issues = ItemIssues.objects.create() + # serializer.save(author=self.request.user, item_issues=item_issues) + serializer.save(author=self.request.user) + # images = self.request.data.getlist("images") + # for image in images: + # Image.objects.create(post=serializer.instance, image=image) def partial_update(self, request, *args, **kwargs): object = self.get_object()