master trunk of gnowsys-studio

author: gnowgi <nagarjun@gnowledge.org> 2012-03-15 16:19:20 +0530
committer: gnowgi <nagarjun@gnowledge.org> 2012-03-15 16:19:20 +0530
commit: 7a4f561e851fdc7246d804c3abb6748b8a4199a6 (patch)
tree: d2afc3463fd49625a9be482012f5c3bfcf7c42b9 /gstudio/management/commands/wp2gstudio.py
download: gnowsys-7a4f561e851fdc7246d804c3abb6748b8a4199a6.tar.gz
1 files changed, 406 insertions, 0 deletions
diff --git a/gstudio/management/commands/wp2gstudio.py b/gstudio/management/commands/wp2gstudio.py
new file mode 100644
index 0000000..5da9eb2
--- /dev/null
+++ b/gstudio/management/commands/wp2gstudio.py
@@ -0,0 +1,406 @@
+# Copyright (c) 2011,  2012 Free Software Foundation
+
+#     This program is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU Affero General Public License as
+#     published by the Free Software Foundation, either version 3 of the
+#     License, or (at your option) any later version.
+
+#     This program is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU Affero General Public License for more details.
+
+#     You should have received a copy of the GNU Affero General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+# This project incorporates work covered by the following copyright and permission notice:  
+
+#    Copyright (c) 2009, Julien Fache
+#    All rights reserved.
+
+#    Redistribution and use in source and binary forms, with or without
+#    modification, are permitted provided that the following conditions
+#    are met:
+
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of the author nor the names of other
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+
+#    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+#    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+#    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+#    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+#    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+#    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+#    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+#    OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Copyright (c) 2011,  2012 Free Software Foundation
+
+#     This program is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU Affero General Public License as
+#     published by the Free Software Foundation, either version 3 of the
+#     License, or (at your option) any later version.
+
+#     This program is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU Affero General Public License for more details.
+
+#     You should have received a copy of the GNU Affero General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+"""WordPress to Gstudio command module"""
+import sys
+from datetime import datetime
+from optparse import make_option
+from xml.etree import ElementTree as ET
+
+from django.utils.html import strip_tags
+from django.db.utils import IntegrityError
+from django.utils.encoding import smart_str
+from django.contrib.auth.models import User
+from django.contrib.sites.models import Site
+from django.utils.text import truncate_words
+from django.template.defaultfilters import slugify
+from django.contrib import comments
+from django.core.management.base import CommandError
+from django.core.management.base import LabelCommand
+
+from tagging.models import Tag
+
+from gstudio import __version__
+from gstudio.models import Nodetype
+from gstudio.models import Metatype
+from gstudio.signals import disconnect_gstudio_signals
+from gstudio.managers import DRAFT, HIDDEN, PUBLISHED
+
+WP_NS = 'http://wordpress.org/export/%s/'
+
+
+class Command(LabelCommand):
+    """Command object for importing a WordPress blog
+    into Gstudio via a WordPress eXtended RSS (WXR) file."""
+    help = 'Import a Wordpress blog into Gstudio.'
+    label = 'WXR file'
+    args = 'wordpress.xml'
+
+    option_list = LabelCommand.option_list + (
+        make_option('--noautoexcerpt', action='store_false',
+                    dest='auto_excerpt', default=True,
+                    help='Do NOT generate an excerpt if not present.'),
+        make_option('--author', dest='author', default='',
+                    help='All imported nodetypes belong to specified author'),
+        make_option('--wxr_version', dest='wxr_version', default='1.0',
+                    help='Wordpress XML export version'),
+        )
+
+    SITE = Site.objects.get_current()
+    REVERSE_STATUS = {'pending': DRAFT,
+                      'draft': DRAFT,
+                      'auto-draft': DRAFT,
+                      'inherit': DRAFT,
+                      'publish': PUBLISHED,
+                      'future': PUBLISHED,
+                      'trash': HIDDEN,
+                      'private': PUBLISHED}
+
+    def __init__(self):
+        """Init the Command and add custom styles"""
+        super(Command, self).__init__()
+        self.style.TITLE = self.style.SQL_FIELD
+        self.style.STEP = self.style.SQL_COLTYPE
+        self.style.ITEM = self.style.HTTP_INFO
+        disconnect_gstudio_signals()
+
+    def write_out(self, message, verbosity_level=1):
+        """Convenient method for outputing"""
+        if self.verbosity and self.verbosity >= verbosity_level:
+            sys.stdout.write(smart_str(message))
+            sys.stdout.flush()
+
+    def handle_label(self, wxr_file, **options):
+        global WP_NS
+        self.verbosity = int(options.get('verbosity', 1))
+        self.auto_excerpt = options.get('auto_excerpt', True)
+        WP_NS = WP_NS % options.get('wxr_version')
+        self.default_author = options.get('author')
+        if self.default_author:
+            try:
+                self.default_author = User.objects.get(
+                    username=self.default_author)
+            except User.DoesNotExist:
+                raise CommandError('Invalid username for default author')
+
+        self.write_out(self.style.TITLE(
+            'Starting migration from Wordpress to Gstudio %s:\n' % __version__))
+
+        tree = ET.parse(wxr_file)
+
+        self.authors = self.import_authors(tree)
+
+        self.metatypes = self.import_metatypes(
+            tree.findall('channel/{%s}metatype' % WP_NS))
+
+        self.import_tags(tree.findall('channel/{%s}tag' % WP_NS))
+
+        self.import_nodetypes(tree.findall('channel/item'))
+
+    def import_authors(self, tree):
+        """Retrieve all the authors used in posts
+        and convert it to new or existing user, and
+        return the convertion"""
+        self.write_out(self.style.STEP('- Importing authors\n'))
+
+        post_authors = set()
+        for item in tree.findall('channel/item'):
+            post_type = item.find('{%s}post_type' % WP_NS).text
+            if post_type == 'post':
+                post_authors.add(item.find(
+                    '{http://purl.org/dc/elements/1.1/}creator').text)
+
+        self.write_out('%i authors found.\n' % len(post_authors))
+
+        authors = {}
+        for post_author in post_authors:
+            if self.default_author:
+                authors[post_author] = self.default_author
+            else:
+                authors[post_author] = self.migrate_author(post_author)
+        return authors
+
+    def migrate_author(self, author_name):
+        """Handle actions for migrating the users"""
+        action_text = "The author '%s' needs to be migrated to an User:\n"\
+                      "1. Use an existing user ?\n"\
+                      "2. Create a new user ?\n"\
+                      "Please select a choice: " % author_name
+        while 42:
+            selection = raw_input(smart_str(action_text))
+            if selection in '12':
+                break
+        if selection == '1':
+            users = User.objects.all()
+            usernames = [user.username for user in users]
+            while 42:
+                user_text = "1. Select your user, by typing " \
+                            "one of theses usernames:\n"\
+                            "[%s]\n"\
+                            "Please select a choice: " % ', '.join(usernames)
+                user_selected = raw_input(user_text)
+                if user_selected in usernames:
+                    break
+            return users.get(username=user_selected)
+        else:
+            create_text = "2. Please type the email of the '%s' user: " % \
+                          author_name
+            author_mail = raw_input(create_text)
+            try:
+                return User.objects.create_user(author_name, author_mail)
+            except IntegrityError:
+                return User.objects.get(username=author_name)
+
+    def import_metatypes(self, metatype_nodes):
+        """Import all the metatypes from 'wp:metatype' nodes,
+        because metatypes in 'item' nodes are not necessarily
+        all the metatypes and returning it in a dict for
+        database optimizations."""
+        self.write_out(self.style.STEP('- Importing metatypes\n'))
+
+        metatypes = {}
+        for metatype_node in metatype_nodes:
+            title = metatype_node.find('{%s}cat_name' % WP_NS).text[:255]
+            slug = metatype_node.find(
+                '{%s}metatype_nicename' % WP_NS).text[:255]
+            try:
+                parent = metatype_node.find(
+                    '{%s}metatype_parent' % WP_NS).text[:255]
+            except TypeError:
+                parent = None
+            self.write_out('> %s... ' % title)
+            metatype, created = Metatype.objects.get_or_create(
+                title=title, slug=slug, parent=metatypes.get(parent))
+            metatypes[title] = metatype
+            self.write_out(self.style.ITEM('OK\n'))
+        return metatypes
+
+    def import_tags(self, tag_nodes):
+        """Import all the tags form 'wp:tag' nodes,
+        because tags in 'item' nodes are not necessarily
+        all the tags, then use only the nicename, because it's like
+        a slug and the true tag name may be not valid for url usage."""
+        self.write_out(self.style.STEP('- Importing tags\n'))
+        for tag_node in tag_nodes:
+            tag_name = tag_node.find(
+                '{%s}tag_slug' % WP_NS).text[:50]
+            self.write_out('> %s... ' % tag_name)
+            Tag.objects.get_or_create(name=tag_name)
+            self.write_out(self.style.ITEM('OK\n'))
+
+    def get_nodetype_tags(self, metatypes):
+        """Return a list of nodetype's tags,
+        by using the nicename for url compatibility"""
+        tags = []
+        for metatype in metatypes:
+            domain = metatype.attrib.get('domain', 'metatype')
+            if domain == 'tag' and metatype.attrib.get('nicename'):
+                tags.append(metatype.attrib.get('nicename'))
+        return tags
+
+    def get_nodetype_metatypes(self, metatype_nodes):
+        """Return a list of nodetype's metatypes
+        based of imported metatypes"""
+        metatypes = []
+        for metatype_node in metatype_nodes:
+            domain = metatype_node.attrib.get('domain')
+            if domain == 'metatype':
+                metatypes.append(self.metatypes[metatype_node.text])
+        return metatypes
+
+    def import_nodetype(self, title, content, item_node):
+        """Importing a nodetype but some data are missing like
+        the image, related nodetypes, start_publication and end_publication.
+        start_publication and creation_date will use the same value,
+        wich is always in Wordpress $post->post_date"""
+        creation_date = datetime.strptime(
+            item_node.find('{%s}post_date' % WP_NS).text, '%Y-%m-%d %H:%M:%S')
+
+        excerpt = item_node.find('{%sexcerpt/}encoded' % WP_NS).text
+        if not excerpt:
+            if self.auto_excerpt:
+                excerpt = truncate_words(strip_tags(content), 50)
+            else:
+                excerpt = ''
+
+        nodetype_dict = {
+            'content': content,
+            'excerpt': excerpt,
+            # Prefer use this function than
+            # item_node.find('{%s}post_name' % WP_NS).text
+            # Because slug can be not well formated
+            'slug': slugify(title)[:255] or 'post-%s' % item_node.find(
+                '{%s}post_id' % WP_NS).text,
+            'tags': ', '.join(self.get_nodetype_tags(item_node.findall(
+                'metatype'))),
+            'status': self.REVERSE_STATUS[item_node.find(
+                '{%s}status' % WP_NS).text],
+            'comment_enabled': item_node.find(
+                '{%s}comment_status' % WP_NS).text == 'open',
+            'pingback_enabled': item_node.find(
+                '{%s}ping_status' % WP_NS).text == 'open',
+            'featured': item_node.find('{%s}is_sticky' % WP_NS).text == '1',
+            'password': item_node.find('{%s}post_password' % WP_NS).text or '',
+            'login_required': item_node.find(
+                '{%s}status' % WP_NS).text == 'private',
+            'creation_date': creation_date,
+            'last_update': datetime.now(),
+            'start_publication': creation_date}
+
+        nodetype, created = Nodetype.objects.get_or_create(title=title,
+                                                     defaults=nodetype_dict)
+
+        nodetype.metatypes.add(*self.get_nodetype_metatypes(
+            item_node.findall('metatype')))
+        nodetype.authors.add(self.authors[item_node.find(
+            '{http://purl.org/dc/elements/1.1/}creator').text])
+        nodetype.sites.add(self.SITE)
+
+        #current_id = item_node.find('{%s}post_id' % WP_NS).text
+        #parent_id = item_node.find('%s}post_parent' % WP_NS).text
+
+        return nodetype
+
+    def import_nodetypes(self, items):
+        """Loops over items and find nodetype to import,
+        a nodetype need to have 'post_type' set to 'post' and
+        have content."""
+        self.write_out(self.style.STEP('- Importing nodetypes\n'))
+
+        for item_node in items:
+            title = (item_node.find('title').text or '')[:255]
+            post_type = item_node.find('{%s}post_type' % WP_NS).text
+            content = item_node.find(
+                '{http://purl.org/rss/1.0/modules/content/}encoded').text
+
+            if post_type == 'post' and content and title:
+                self.write_out('> %s... ' % title)
+                nodetype = self.import_nodetype(title, content, item_node)
+                self.write_out(self.style.ITEM('OK\n'))
+                self.import_comments(nodetype, item_node.findall(
+                    '{%s}comment/' % WP_NS))
+            else:
+                self.write_out('> %s... ' % title, 2)
+                self.write_out(self.style.NOTICE('SKIPPED (not a post)\n'), 2)
+
+    def import_comments(self, nodetype, comment_nodes):
+        """Loops over comments nodes and import then
+        in django.contrib.comments"""
+        for comment_node in comment_nodes:
+            is_pingback = comment_node.find(
+                '{%s}comment_type' % WP_NS).text == 'pingback'
+            is_trackback = comment_node.find(
+                '{%s}comment_type' % WP_NS).text == 'trackback'
+
+            title = 'Comment #%s' % (comment_node.find(
+                '{%s}comment_id/' % WP_NS).text)
+            self.write_out(' > %s... ' % title)
+
+            content = comment_node.find(
+                '{%s}comment_content/' % WP_NS).text
+            if not content:
+                self.write_out(self.style.NOTICE('SKIPPED (unfilled)\n'))
+                return
+
+            submit_date = datetime.strptime(
+                comment_node.find('{%s}comment_date' % WP_NS).text,
+                '%Y-%m-%d %H:%M:%S')
+
+            approvation = comment_node.find(
+                '{%s}comment_approved' % WP_NS).text
+            is_public = True
+            is_removed = False
+            if approvation != '1':
+                is_removed = True
+            if approvation == 'spam':
+                is_public = False
+
+            comment_dict = {
+                'content_object': nodetype,
+                'site': self.SITE,
+                'user_name': comment_node.find(
+                    '{%s}comment_author/' % WP_NS).text[:50],
+                'user_email': comment_node.find(
+                    '{%s}comment_author_email/' % WP_NS).text or '',
+                'user_url': comment_node.find(
+                    '{%s}comment_author_url/' % WP_NS).text or '',
+                'comment': content,
+                'submit_date': submit_date,
+                'ip_address': comment_node.find(
+                    '{%s}comment_author_IP/' % WP_NS).text or '',
+                'is_public': is_public,
+                'is_removed': is_removed, }
+            comment = comments.get_model()(**comment_dict)
+            comment.save()
+            if approvation == 'spam':
+                comment.flags.create(
+                    user=nodetype.authors.all()[0], flag='spam')
+            if is_pingback:
+                comment.flags.create(
+                    user=nodetype.authors.all()[0], flag='pingback')
+            if is_trackback:
+                comment.flags.create(
+                    user=nodetype.authors.all()[0], flag='trackback')
+
+            self.write_out(self.style.ITEM('OK\n'))
author	gnowgi <nagarjun@gnowledge.org>	2012-03-15 16:19:20 +0530
committer	gnowgi <nagarjun@gnowledge.org>	2012-03-15 16:19:20 +0530
commit	7a4f561e851fdc7246d804c3abb6748b8a4199a6 (patch)
tree	d2afc3463fd49625a9be482012f5c3bfcf7c42b9 /gstudio/management/commands/wp2gstudio.py
download	gnowsys-7a4f561e851fdc7246d804c3abb6748b8a4199a6.tar.gz