diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-11-15 12:49:14 +0100 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-11-15 12:49:14 +0100 |
commit | 919540e0a3d30ae4377f0058b7060233a2b65f73 (patch) | |
tree | 937864af725c94b2880593138173cc7107f26acc | |
parent | 50902cf4232b7f61772ecb21cde01bdc0e0e64d7 (diff) | |
download | Comm-on-net-919540e0a3d30ae4377f0058b7060233a2b65f73.tar.bz2 Comm-on-net-919540e0a3d30ae4377f0058b7060233a2b65f73.zip |
Admin: full export of result from a crawl
-rw-r--r-- | commcrawler/admin.py | 124 |
1 files changed, 115 insertions, 9 deletions
diff --git a/commcrawler/admin.py b/commcrawler/admin.py index b8f6206..aab1d6a 100644 --- a/commcrawler/admin.py +++ b/commcrawler/admin.py @@ -1,20 +1,35 @@ import csv import json +from io import BytesIO +import os +import tempfile +import shutil +import zipfile from pygments import highlight from pygments.lexers.data import JsonLexer from pygments.formatters.html import HtmlFormatter from ajax_select import make_ajax_form -from django.contrib import admin +from django.contrib import admin, messages from django.http import HttpResponse from django.utils.translation import ugettext_lazy as _ from django.utils.safestring import mark_safe +from django.shortcuts import HttpResponseRedirect, reverse from commonnet.admin_site import admin_site from commcrawler import models +RESULT_FIELDNAMES = ( + "target_id", "target__name", "target__url", "nb_external_link", + "nb_internal_link", "nb_images", "nb_facebook", "nb_twitter", + "nb_instagram", "nb_youtube", "nb_dailymotion", "nb_vimeo", + "nb_video", "nb_audio", "nb_internal_pdf", "nb_external_pdf", + "nb_internal_office", "nb_external_office", "is_online", + "redirection", "error") + + def export_as_csv_action(field_names, description=_("Export selected as CSV file"), header=True): @@ -48,6 +63,103 @@ def export_as_csv_action(field_names, return export_as_csv +def export_result_as_zip(description=_("Export all results as ZIP file"), + header=True): + + def write_result_csv(crawl, result_filename): + error_pos = None + field_names = list(RESULT_FIELDNAMES) + if "error" in field_names: + error_pos = field_names.index("error") + field_names.pop(error_pos) + with open(result_filename, "w") as result_file: + writer = csv.writer(result_file) + if header: + writer.writerow(list(RESULT_FIELDNAMES)) + + for target in models.Target.objects.filter(crawl=crawl).all(): + q = models.CrawlResult.objects.filter(crawl=crawl, + target=target) + result = [] + if not q.count(): + for col in field_names: + if not col.startswith("target_"): + result.append("") + else: + if col.startswith("target__"): + key = "target__" + else: + key = "target_" + result.append(str(getattr(target, col[len(key):]))) + if error_pos: + result.insert(error_pos, "unreachable") + else: + result = list(q.values_list(*field_names).all()[0]) + if error_pos: + result.insert(error_pos, "") + + result = ["" if r is None else str(r) for r in result] + writer.writerow(result) + + def write_relations_csv(crawl, filename): + queryset = models.CrawlRelation.objects.filter(crawl=crawl) + field_names = ("source_id", "destination_id", "number") + with open(filename, "w") as fle: + writer = csv.writer(fle) + if header: + writer.writerow(list(field_names)) + for obj in queryset.values(*field_names).order_by('pk'): + row = [] + for field in field_names: + value = obj[field] + if value is None: + value = "" + else: + value = str(value) + row.append(value) + + writer.writerow(row) + + def export_as_zip(modeladmin, request, queryset): + if queryset.count() != 1: + url = reverse( + 'admin:%s_%s_changelist' % ( + modeladmin.model._meta.app_label, + modeladmin.model._meta.model_name) + ) + messages.add_message( + request, messages.ERROR, + str(_("Select only one item.")), + ) + return HttpResponseRedirect(url) + crawl = queryset.all()[0] + + tmpdir = tempfile.mkdtemp(prefix="comm-on-net") + name = crawl.name + date = crawl.started.strftime('%Y-%m-%d') + result_filename = os.path.join( + tmpdir, "{}-results-{}.csv".format(name, date)) + write_result_csv(crawl, result_filename) + relation_filename = os.path.join( + tmpdir, "{}-relations-{}.csv".format(name, date)) + write_relations_csv(crawl, relation_filename) + + s = BytesIO() + with zipfile.ZipFile(s, "w") as zf: + for filepath in (result_filename, relation_filename): + __, filename = os.path.split(filepath) + zf.write(filepath, filename) + shutil.rmtree(tmpdir) + + response = HttpResponse(s.getvalue(), content_type='application/zip') + response['Content-Disposition'] = \ + 'attachment; filename={}-{}.zip'.format(name, date) + return response + + export_as_zip.short_description = description + return export_as_zip + + class CrawlAdmin(admin.ModelAdmin): model = models.Crawl list_display = ("name", "status", "get_target_nb", "time_out", "created", @@ -57,6 +169,7 @@ class CrawlAdmin(admin.ModelAdmin): "crawl_ended", "ended") readonly_fields = () form = make_ajax_form(model, {'targets': 'target'}) + actions = [export_result_as_zip()] def get_target_nb(self, obj): return obj.target_nb @@ -89,14 +202,7 @@ class CrawlResultAdmin(admin.ModelAdmin): exclude = ("crawl_result",) form = make_ajax_form(model, {'target': 'target'}) actions = [ - export_as_csv_action( - ("target_id", "target__name", "target__url", "nb_external_link", - "nb_internal_link", "nb_images", "nb_facebook", "nb_twitter", - "nb_instagram", "nb_youtube", "nb_dailymotion", "nb_vimeo", - "nb_video", "nb_audio", "nb_internal_pdf", "nb_external_pdf", - "nb_internal_office", "nb_external_office", "is_online", - "redirection") - ) + export_as_csv_action(RESULT_FIELDNAMES) ] def open_link(self, obj): |