#! /bin/sh
# Convert Twitter Archive Parser markdown into reasonable per-month HTML
#
# Designed to process the output of:
#
# https://github.com/timhutton/twitter-archive-parser
#
# over a 2022 Twitter Archive download
#
# TODO: convert 2/3/4 sets of images into something formatted more like
# Twitter selector boxes
#
# Written by Ewen McNeill <ewen#naos.co.nz>, 2023-09-25
# Updated by Ewen McNeill <ewen#naos.co.nz>, 2023-09-26
# ---------------------------------------------------------------------------

FILENAME="${1}"

if [ -z "${FILENAME}" ]; then
    echo "Usage: $0 FILENAME.md" >&2
    exit 1
fi

BASEFILENAME="$(basename ${FILENAME})"

case $(echo "${BASEFILENAME}" | cut -f 2 -d -) in
  01) MONTH=January;   ;;
  02) MONTH=February;  ;;
  03) MONTH=March;     ;;
  04) MONTH=April;     ;;
  05) MONTH=May;       ;;
  06) MONTH=June;      ;;
  07) MONTH=July;      ;;
  08) MONTH=August;    ;;
  09) MONTH=September; ;;
  10) MONTH=October;   ;;
  11) MONTH=November;  ;;
  12) MONTH=December;  ;;
esac

YEAR="$(echo ${BASEFILENAME} | cut -f 1 -d -)"

cat "${FILENAME}" |
    # Special case hash tag at start of a Tweet / Tweet line, as Markdown
    # will incorrectly turn those into a heading; we insert a zero width
    # space, and then remove it again after conversion to HTML.
    #
    sed 's/> #/> \&zwsp;#/;'   |
    markdown_py -x fenced_code |
    sed 's/\&zwsp;#/#/g;'      |
    (
      # HTML Header
      cat <<EOF;
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="stylesheet" href="pico.min.css">
    <style>
      body main    { --typography-spacing-vertical: 1.0rem; 
                     padding-top: 1.0rem;                   }
      body main > p { margin-bottom: 0; }
      body main h1 { --typography-spacing-vertical: 1.0rem; }
      body main hr { --muted-border-color: black;           }
      blockquote   { --typography-spacing-vertical: 1.0rem; margin: 0 0;  
                     border-left: none; }
      p.quotedimg  { margin-bottom: 0;                      }
      p:last-child { margin-bottom: 0;                      }
    </style>
    <title>Twitter: @ewenmcneill -- ${MONTH} ${YEAR}</title>
  </head>
  <body>
    <main class="container">
      <h1>Twitter: @ewenmcneill -- ${MONTH} ${YEAR}</h1>
EOF
      # Generated content from above, with the blockquote (ie, Tweet)
      # moved *below* the next paragraph (ie, the tweet date).  If
      # the Tweet is a reply, then there's a stand alone "Replying to..."
      # paragraph, which we also want to move the tweet date before that.
      #
      (cat; echo "") | perl -ne '
          if (/<blockquote>/ or /<p>Replying to/) {
            while (! /<\/blockquote>/) {
              $_ .= <>;
            }
            $blockquote = $_;
            $_ = <>;
            print $_;
            print $blockquote;
          } else {
            print;
          }' |

      # Rewrite the Twitter Icon / Link HTML to inject an anchor tag
      # for the tweet number, and a link to that anchor tag
      #
         sed 's/^<p>\(<img [^>]*media\/tweet.ico[^>]*>\)\(.*status\/\)\([0-9][0-9]*\)"\(.*\)<\/p>/<p><a name="\3">\1<\/a>\2\3"\4 (<a href="#\3">#<\/a>)<\/p>/;' | 

      # Rewrite Tweeted images to have a class we can select on
      #
         sed 's/^<p>\(<img.*jpg.*\)$/<p class="quotedimg">\1/;';

      # HTML Footer
      cat <<EOF
    </main>
  </body>
</html>
EOF
    )
