#! /usr/bin/env python
# -*- coding: utf-8 -*-
#######################  L Y X B L O G G E R    ########################
#   This program allows you to post to your WordPress blog right from  #
#   LyX. The input to this script is the LyXHTML output from LyX 2.0.  #
#   This script will connect using xml-rpc.                            #
#                                                                      #
#################     D O C U M E N T A T I O N       ##################
#                                                                      #
#   Please see README.html for LyxBlogger documentation.               #
#   Alternatively, see the wiki page at                                #
#   http://wiki.lyx.org/Tools/LyxBlogger                               #
#   Please submit any issues or suggestions to the author.             #
#                                                                      #
#####################       A U T H O R       ##########################
#                                                                      #
#   Copyright (C) 2010 Jack Desert                                     #
#   jackdesert556@gmail.com                                            #
#   http://www.LetsEATalready.com                                      #
#                                                                      #
######################      L I C E N S E     ##########################
#                                                                      #
#   This program is free software; you can redistribute it and/or      #
#   modify it under the terms of the GNU General Public License        #
#   as published by the Free Software Foundation; either version 2     #
#   of the License, or any later version.                              #
#                                                                      #
#   This program is distributed in the hope that it will be useful,    #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of     #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
#   GNU General Public License for more details.                       #
#                                                                      #
#   You should have received a copy of the GNU General Public License  #
#   along with this program; if not, write to                          #
#                                                                      #
#   the Free Software Foundation, Inc.,                                #
#   59 Temple Place - Suite 330,                                       #
#   Boston, MA   02111-1307, USA.                                      #
#                                                                      #
########################################################################


############   U S E R    D E F I N E D    V A R I A B L E S   #########

AUTO_URL = 'http://blogtest.letseatalready.com/xmlrpc.php'
AUTO_USER = 'test'
AUTO_PASSWORD = 'test'
AUTO_LOGIN = True


###########  T H E    R E S T    O F    T H E    C O D E ###############
import sys, os, re
import wordpresslib
from getpass import getpass

def pr3(input):
    # Use sys.stdout instead of print so results can be used for automated testing
    # For some reason a newline character is required to flush ?
    # That's okay, because we'll use str.rstrip on the other side
    sys.stdout.write(input + '\n')
    # Each line must be flushed so it can be read by the other side.
    sys.stdout.flush()


THIS_FILE = sys.argv[0]
error_msg = ''
input_file = sys.argv[1]    # Incoming file name
IMAGE_DIR = ''            # Empty until defined otherwise
CALLED_FROM_XTERM = 'string_of_text_to_pass_when_called_from_xterm'

# If already called from xterm, run the program as normal.
# Otherwise, call the program from xterm so it's visible
if (len(sys.argv) >= 3) and (sys.argv[2] == CALLED_FROM_XTERM):
    pass    # Called correctly, so code will execute
else:
    # Spawn a new xterm window to run this program in
    # -hold means leave window open after process completes
    # -fg is foreground color
    # -bg is background color
    # -fn is font (size)
    # -e means call a program
    os.system('xterm -hold -fg gold -bg black -fn 10x20 -e %s %s %s ' \
        % (THIS_FILE, input_file, CALLED_FROM_XTERM))
    sys.exit(0)     # Exit so program is not repeated.

pr3 ('LYXBLOGGER')
pr3 ('Welcome to LyxBlogger')
pr3 ('Author: Jack Desert')
pr3 ('Website: LetsEATalready.com\n')

if ('/' in input_file):     # If input_file is a folder and a file
    input_exp = re.compile('..{1,}/')   # Greedy to catch full folder
    input_obj = input_exp.match(input_file) # Must match at beginning of expression
    IMAGE_DIR = input_obj.group()

# Define Which Format to Use
pr3 ("FORMAT")
ENGINE_ELYXER, ENGINE_INTERNAL = False, False
if(input_file[-6:] == '.xhtml'):
    pr3 ("Input file ends in xhtml. Assuming this came from LyXHTML")
    ENGINE_INTERNAL = True
elif(input_file[-5:] == '.html'):
    pr3 ("Input file ends in html. Assuming this came from eLyXer")
    ENGINE_ELYXER = True
else:
    error_msg += 'Error: Input file must be of type .xhtml or .html/n/n'
    raise Exception(error_msg)

# Read data from file
f = open(input_file, 'r')
html = f.read()
f.close()

# RECORD TITLE FROM HEADER TO USE AS POST
tit_exp = re.compile('''
    <title>         # Start of the <title> tag
    ..{1,}?         # Anything in the middle (non-greedy)
    </title>        # Closing </title> tag
    ''', re.VERBOSE)    # VERBOSE allows ''' '''
tit_obj = tit_exp.search(html)
# eLyXer uses 'Converted document' as the default title if there is none.
# This code prevents 'Converted document' from being posted in your blog.
TITLE_EXPECTED, TITLE_PROMPT = False, True
pr3 ("\nTITLE")
if(tit_obj):
    TITLE_EXPECTED = True
    full_title_tag = tit_obj.group()
    blog_title = full_title_tag[7:-8]   # Strip tags off
    if (blog_title != 'Converted document'):
        TITLE_PROMPT = False
if(TITLE_PROMPT):
    pr3 ('No title found in document.')
    pr3 ('Please enter a title now')
    blog_title = sys.stdin.readline().replace('\n', '')
pr3 ('Using title: ' + blog_title)


# REMOVING TITLE FROM BODY
# Typical body title using ENGINE_INTERNAL:
#   <h1 class="title"><a id='magicparlabel-309' />
#   Example Article Title</h1>
#   <h1 class="title">
# Typical body title using ENGINE_ELYXER using optional sizing:
#   <h1 class="title">
#   <span class="footnotesize">Hi Brian</span>
#
#   </h1>
exp = re.compile('''
    <h1\                   # Beginning of tag with space
    class="title">         # The rest of the tag
    ..{1,}?                # Anything (non-greedy)
    </h1>                  # Closing tag
    ''', re.VERBOSE | re.DOTALL)                 # .. can include linebreaks
bt_obj = exp.search(html)
if(bt_obj):
    entire_bt_tag = bt_obj.group()
    html = html.replace(entire_bt_tag, '')
elif (TITLE_EXPECTED):
    pr3 ('\nWARNING! The title of your entry may appear twice. Please notify the author at jackdesert556@gmail.com to have this bug squashed.\n\n Press Enter to continue uploading.')
    sys.stdin.readline()
    # What this really means is an opening title tag was found, but
    # no title tag was found in the body.

# Eliminate everything outside the <body></body> tags
START_TAG = '<body>'
END_TAG = '</body>'
if (START_TAG in html):
    html = html.partition(START_TAG)[2]
html = html.partition(END_TAG)[0]

# Reinvoke <code> and </code> tags from their escape sequence counterparts
html = html.replace('&lt;code&gt;', '<code>')
html = html.replace('&lt;/code&gt;', '</code>')

# Remove Arrows from footnotes and margin notes
html = html.replace('[→', '[')
html = html.replace('→]', ']')


# Strip off cut material using the flag '#! CUT MATERIAL'
CUT_FLAG = '#! CUT MATERIAL'
pr3 ("\nCUT_FLAG")
pr3 ("Anything placed after the CUT_FLAG in your document will not be uploaded.")
pr3 ("This is helpful for keeping notes that you might put back in a later draft.")
if (ENGINE_INTERNAL):
    # INTERNAL uses a magicparlabel-num
    exp = re.compile('<div class="\D{1,}?"><a id=\'magicparlabel-\d{1,}\' />\n' + CUT_FLAG)
elif(ENGINE_ELYXER):
    # ELYXER may put a <span> tag in if you change the size
    exp = re.compile('<div class="\D{1,}?">\n(<span class="\D{1,}?">){0,1}?' + CUT_FLAG)
srch_obj = exp.search(html)
if(srch_obj):
    start_index = srch_obj.start()
    # pr3('this expression found at location: ' + str(start_index))
    html = html[0:start_index]
    pr3 ('The Following String was found in your document and was ')
    pr3 ('successfully used as a CUT_FLAG: ')
    pr3 (CUT_FLAG + '\n')
else:
    pr3 ("Place the contents of the following line at the beginning of")
    pr3 (" a paragraph to use it as a CUT_FLAG: ")
    pr3 (CUT_FLAG + '\n')




if (AUTO_LOGIN == True):
    display_url = AUTO_URL[0:-11]
    pr3 ("Publish this document to " + display_url + "?   Y (N)")
    a = sys.stdin.readline()
    if (a == 'Y\n' or a == 'y\n'):
        wordpress_url = AUTO_URL
        user = AUTO_USER
        password = AUTO_PASSWORD
    else:
        AUTO_LOGIN = False

if (AUTO_LOGIN ==False):
    pr3 ("URL")
    pr3("Please enter your WordPress URL")
    pr3("Example: cool_site.wordpress.com")
    wordpress_url = sys.stdin.readline()
    wordpress_url = wordpress_url.replace('http://', '')
    wordpress_url = wordpress_url.replace('www.', '')
    wordpress_url = wordpress_url.replace('\n', '')
    wordpress_url = 'http://' + wordpress_url + '/xmlrpc.php'
    pr3 ("The page we'll be talking is " + wordpress_url)
    pr3 ("\nUSERNAME")
    pr3("Please enter your WordPress username")
    user = sys.stdin.readline().replace('\n', '')
    pr3("Username is " + user + '.')
    pr3 ("\nPASSWORD")
    pr3("Please enter your WordPress password")
    password = getpass()
    pr3 ("Thank you.")



# prepare client object
wp = wordpresslib.WordPressClient(wordpress_url, user, password)

# select blog id
wp.selectBlog(0)


pr3 ('\nCATEGORY')
pr3 ('Retrieving Categories From Server')
cat_list = wp.getCategoryList()
cat_counter = 1
for cat in cat_list:
    pr3 (str(cat_counter) + '.  ' + cat.name)
    cat_counter += 1
cat_id = None
while (1):
    try:
        pr3 ('Please enter the NUMBER next to the category for this post')
        cat_response = sys.stdin.readline().replace('\n', '')
        cat = int(cat_response)
        cat_id = cat_list[cat-1].id
        pr3 ('Category Selected: ' + cat_list[cat-1].name + '\n')
        break
    except:
        pr3 ("Category Response Not Understood.\n")




# Find local location of a single image within the (x)html file

if (ENGINE_INTERNAL):
# INTERNAL img tags look something like this:
# <img src='0_home_jd_Escritorio_rv-8_tiny.jpg' alt='image: 0_home_jd_Escritorio_rv-8_tiny.jpg' />
    img_exp = re.compile('''
        <img\ src='     # The beginning of an <img> tag -- note the escaped space in the verbose regex
        (?!http://)     # Negative lookahead expression (if it has http:// it's already been changed to web reference)
        ..*?            # Non-greedy (short as possible match) of stuff in middle
        />              # The closing of the <img> tag
        ''', re.VERBOSE)
elif(ENGINE_ELYXER):
# eLyXer img tags looks something like this:
# <img class="embedded" src="rv-8_tiny.jpg" alt="figure rv-8_tiny.jpg" style="max-width: 2048px; max-height: 1536px; "/>
# Notice ELYXER uses double quotes instead of single quotes within the tag.
    img_exp = re.compile('''
        <img\ class="embedded"\          # The beginning of an <img> tag -- note two escaped spaces
        src="           # Note use of double quotes instead of single
        (?!http://)     # Negative lookahead expression (if it has http:// it's already been changed to web reference)
        ..*?            # Non-greedy (short as possible match) of stuff in middle
        />              # The closing of the <img> tag
        ''', re.VERBOSE)





img_obj = img_exp.search(html)
imageSrc = None
if(img_obj):
    pr3 ('IMAGES\nFirst We\'ll Upload Your Images')
while(img_obj):
    img_tag = img_obj.group()
    # Find local address of image
    # The only difference between the two is single vs double quotes
    if (ENGINE_INTERNAL):
        add_exp = re.compile('''
            src='   # The beginning of the address
            ..*?    # Non-greedy rest of the address
            '       # The (first) closing (single) quotation mark
            ''', re.VERBOSE)
    elif (ENGINE_ELYXER):
        add_exp = re.compile('''
            src="   # The beginning of the address
            ..*?    # Non-greedy rest of the address
            "       # The (first) closing (double) quotation mark
            ''', re.VERBOSE)

    add_obj = add_exp.search(img_tag)
    if (add_obj == None):
        pr3 ("Error parsing img tag: " + img_tag)
        msg = "LyxBlogger failed to find src attribute in <img> tag"
        raise Exception(msg)
    long_address = add_obj.group()
    short_address = long_address[5:-1]  # Strip off the src="

    filesize = str(os.path.getsize(IMAGE_DIR + short_address) / 1024) + ' kB'
    pr3("Uploading image: " + short_address + '.  Size: ' + filesize )
    # upload image for post
    imageSrc = wp.newMediaObject(IMAGE_DIR + short_address)
    html = html.replace(short_address, imageSrc)
    img_obj = img_exp.search(html)          # Note this is a repeat.


# create post object
post = wordpresslib.WordPressPost()
post.title = blog_title
post.description = html

# I have no idea why this takes a tuple (something, )
post.categories = (cat_id,)
# publish post
pr3 ('\nWORDS\nNow We\'ll Upload Your Thoughts')
filesize = str(os.path.getsize(input_file) / 1024) + ' kB'
pr3("Uploading xhtml: " + input_file + '.  Size: ' + filesize )
idNewPost = wp.newPost(post, True)


pr3 ('\nSUCCESS!')
pr3 ('You just published your document at ' + wordpress_url[7:-11])
pr3 ('Thank you for using LyxBlogger.\n\n')
pr3("SHIFT + PAGE UP scrolls screen        ALT + F4 closes this window")



