diff options
Diffstat (limited to 'chimere/utils.py')
| -rw-r--r-- | chimere/utils.py | 45 | 
1 files changed, 36 insertions, 9 deletions
| diff --git a/chimere/utils.py b/chimere/utils.py index 2d74095..73e38ba 100644 --- a/chimere/utils.py +++ b/chimere/utils.py @@ -73,7 +73,7 @@ class ImportManager(object):          raise NotImplementedError      def create_or_update_item(self, cls, values, import_key, version=None, -                              key='', pk=None): +                              key='', pk=None, category=None):          updated, created, item = False, False, None          import_key = unicode(import_key).replace(':', '^')          if not values.get('name'): @@ -85,6 +85,7 @@ class ImportManager(object):              dct_import = {                  'import_key__icontains':'%s:%s;' % (key, import_key),                  'import_source':self.importer_instance.source} +            ref_item = cls.objects.filter(**dct_import)              try:                  item = None                  if pk: @@ -137,8 +138,11 @@ class ImportManager(object):          if import_key:              item.set_key(key, import_key)          item.categories.clear() -        for cat in self.importer_instance.categories.all(): -            item.categories.add(cat) +        if category: +            item.categories.add(category) +        else: +            for cat in self.importer_instance.categories.all(): +                item.categories.add(cat)          return item, updated, created      @classmethod @@ -878,8 +882,11 @@ DATE_PARSINGS = {'fr_FR':[                           ]                  } +def clean_field(value): +    return value.strip()  class HtmlXsltManager(ImportManager): +    PARSER = 'HTMLParser'      def get(self):          u"""          Get data from the source @@ -904,7 +911,7 @@ class HtmlXsltManager(ImportManager):          # convert it to valid XHTML          #doc, errors = tidy_document(main_page)          doc = main_page -        dom = etree.HTML(doc, etree.HTMLParser()) +        dom = etree.HTML(doc, getattr(etree, self.PARSER)())          try:              xslt = etree.parse(self.importer_instance.source_file)              self.importer_instance.source_file.seek(0) @@ -926,8 +933,8 @@ class HtmlXsltManager(ImportManager):          base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1])          base_url += u"/"          for item in newdom.getroot(): -            c_item = {child.tag:child.text for child in item.getchildren() -                                                             if child.text} +            c_item = {child.tag:clean_field(child.text) +                        for child in item.getchildren() if child.text}              # try to have more information on the linked page              if transform_child and 'link' in c_item:                  # not an absolute address @@ -960,19 +967,30 @@ class HtmlXsltManager(ImportManager):                      val = re.sub(r, replaced % {'base_url':base_url}, val)                  item[k] = html_unescape(val)          updated_item, new_item = 0, 0 +        key_categories = self.importer_instance.get_key_category_dict() +        missing_cats = set()          for item in items:              if not self.importer_instance.default_localisation and \ -               not "point" in item: +               not "point" in item and not ("lat" in item and item['lat']):                  continue              cls = None              dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],                                          self.importer_instance.origin),                     'license':self.importer_instance.license,                     'name':item['name']} +            category = None +            if 'category' in item and item['category']: +                if item['category'] in key_categories: +                    category = key_categories[item['category']] +                else: +                    missing_cats.add(item['category'])              cls = Marker              if 'point' in item:                  x, y = item['point'].split(",")                  dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y) +            elif 'lat' in item and item['lat']: +                dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'], +                                                           item['lat'])              else:                  dct['point'] = self.importer_instance.default_localisation              dct['description'] = item['description'] @@ -1001,9 +1019,18 @@ class HtmlXsltManager(ImportManager):                                int(values['day2']))                          break              key = item['key'] -            it, updated, created = self.create_or_update_item(cls, dct, key) +            it, updated, created = self.create_or_update_item(cls, dct, key, +                                                              category=category)              if updated:                  updated_item += 1              if created:                  new_item += 1 -        return (new_item, updated_item, '') +        msg = '' +        if missing_cats: +            msg = _(u"Names \"%s\" doesn't match existing categories. " +                u"Modify the import to match theses names with categories.") % ( +                    u'", "'.join(missing_cats)) +        return (new_item, updated_item, msg) + +class XMLXsltManager(HtmlXsltManager): +    PARSER = 'XMLParser' | 
