summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-11-15 12:49:14 +0100
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-11-15 12:49:14 +0100
commit919540e0a3d30ae4377f0058b7060233a2b65f73 (patch)
tree937864af725c94b2880593138173cc7107f26acc
parent50902cf4232b7f61772ecb21cde01bdc0e0e64d7 (diff)
downloadComm-on-net-919540e0a3d30ae4377f0058b7060233a2b65f73.tar.bz2
Comm-on-net-919540e0a3d30ae4377f0058b7060233a2b65f73.zip
Admin: full export of result from a crawl
-rw-r--r--commcrawler/admin.py124
1 files changed, 115 insertions, 9 deletions
diff --git a/commcrawler/admin.py b/commcrawler/admin.py
index b8f6206..aab1d6a 100644
--- a/commcrawler/admin.py
+++ b/commcrawler/admin.py
@@ -1,20 +1,35 @@
import csv
import json
+from io import BytesIO
+import os
+import tempfile
+import shutil
+import zipfile
from pygments import highlight
from pygments.lexers.data import JsonLexer
from pygments.formatters.html import HtmlFormatter
from ajax_select import make_ajax_form
-from django.contrib import admin
+from django.contrib import admin, messages
from django.http import HttpResponse
from django.utils.translation import ugettext_lazy as _
from django.utils.safestring import mark_safe
+from django.shortcuts import HttpResponseRedirect, reverse
from commonnet.admin_site import admin_site
from commcrawler import models
+RESULT_FIELDNAMES = (
+ "target_id", "target__name", "target__url", "nb_external_link",
+ "nb_internal_link", "nb_images", "nb_facebook", "nb_twitter",
+ "nb_instagram", "nb_youtube", "nb_dailymotion", "nb_vimeo",
+ "nb_video", "nb_audio", "nb_internal_pdf", "nb_external_pdf",
+ "nb_internal_office", "nb_external_office", "is_online",
+ "redirection", "error")
+
+
def export_as_csv_action(field_names,
description=_("Export selected as CSV file"),
header=True):
@@ -48,6 +63,103 @@ def export_as_csv_action(field_names,
return export_as_csv
+def export_result_as_zip(description=_("Export all results as ZIP file"),
+ header=True):
+
+ def write_result_csv(crawl, result_filename):
+ error_pos = None
+ field_names = list(RESULT_FIELDNAMES)
+ if "error" in field_names:
+ error_pos = field_names.index("error")
+ field_names.pop(error_pos)
+ with open(result_filename, "w") as result_file:
+ writer = csv.writer(result_file)
+ if header:
+ writer.writerow(list(RESULT_FIELDNAMES))
+
+ for target in models.Target.objects.filter(crawl=crawl).all():
+ q = models.CrawlResult.objects.filter(crawl=crawl,
+ target=target)
+ result = []
+ if not q.count():
+ for col in field_names:
+ if not col.startswith("target_"):
+ result.append("")
+ else:
+ if col.startswith("target__"):
+ key = "target__"
+ else:
+ key = "target_"
+ result.append(str(getattr(target, col[len(key):])))
+ if error_pos:
+ result.insert(error_pos, "unreachable")
+ else:
+ result = list(q.values_list(*field_names).all()[0])
+ if error_pos:
+ result.insert(error_pos, "")
+
+ result = ["" if r is None else str(r) for r in result]
+ writer.writerow(result)
+
+ def write_relations_csv(crawl, filename):
+ queryset = models.CrawlRelation.objects.filter(crawl=crawl)
+ field_names = ("source_id", "destination_id", "number")
+ with open(filename, "w") as fle:
+ writer = csv.writer(fle)
+ if header:
+ writer.writerow(list(field_names))
+ for obj in queryset.values(*field_names).order_by('pk'):
+ row = []
+ for field in field_names:
+ value = obj[field]
+ if value is None:
+ value = ""
+ else:
+ value = str(value)
+ row.append(value)
+
+ writer.writerow(row)
+
+ def export_as_zip(modeladmin, request, queryset):
+ if queryset.count() != 1:
+ url = reverse(
+ 'admin:%s_%s_changelist' % (
+ modeladmin.model._meta.app_label,
+ modeladmin.model._meta.model_name)
+ )
+ messages.add_message(
+ request, messages.ERROR,
+ str(_("Select only one item.")),
+ )
+ return HttpResponseRedirect(url)
+ crawl = queryset.all()[0]
+
+ tmpdir = tempfile.mkdtemp(prefix="comm-on-net")
+ name = crawl.name
+ date = crawl.started.strftime('%Y-%m-%d')
+ result_filename = os.path.join(
+ tmpdir, "{}-results-{}.csv".format(name, date))
+ write_result_csv(crawl, result_filename)
+ relation_filename = os.path.join(
+ tmpdir, "{}-relations-{}.csv".format(name, date))
+ write_relations_csv(crawl, relation_filename)
+
+ s = BytesIO()
+ with zipfile.ZipFile(s, "w") as zf:
+ for filepath in (result_filename, relation_filename):
+ __, filename = os.path.split(filepath)
+ zf.write(filepath, filename)
+ shutil.rmtree(tmpdir)
+
+ response = HttpResponse(s.getvalue(), content_type='application/zip')
+ response['Content-Disposition'] = \
+ 'attachment; filename={}-{}.zip'.format(name, date)
+ return response
+
+ export_as_zip.short_description = description
+ return export_as_zip
+
+
class CrawlAdmin(admin.ModelAdmin):
model = models.Crawl
list_display = ("name", "status", "get_target_nb", "time_out", "created",
@@ -57,6 +169,7 @@ class CrawlAdmin(admin.ModelAdmin):
"crawl_ended", "ended")
readonly_fields = ()
form = make_ajax_form(model, {'targets': 'target'})
+ actions = [export_result_as_zip()]
def get_target_nb(self, obj):
return obj.target_nb
@@ -89,14 +202,7 @@ class CrawlResultAdmin(admin.ModelAdmin):
exclude = ("crawl_result",)
form = make_ajax_form(model, {'target': 'target'})
actions = [
- export_as_csv_action(
- ("target_id", "target__name", "target__url", "nb_external_link",
- "nb_internal_link", "nb_images", "nb_facebook", "nb_twitter",
- "nb_instagram", "nb_youtube", "nb_dailymotion", "nb_vimeo",
- "nb_video", "nb_audio", "nb_internal_pdf", "nb_external_pdf",
- "nb_internal_office", "nb_external_office", "is_online",
- "redirection")
- )
+ export_as_csv_action(RESULT_FIELDNAMES)
]
def open_link(self, obj):