#!/bin/bash
txt=/tmp/soz.txt
php=/tmp/soz.php
#dir=~/.stardict/dic/ 

help() {
	echo -e "
$(tput bold)soz$(tput sgr0) — a bash script for handling three types of dictionaries in a uniform manner.

These three types are POST (web form), query string (modifying url), and sdcv
(offline StarDict dictionary).

You can add new dictionaries by modifying the \"Dictionaries\" section of the script.

\"soz\" means \"word\" in Turkish and other Turkic languages:
|| tr: söz, az: söz, tk: söz, uz: soʻz, ug: söz, kr: söz, ||
|| kz: söz, tt: süz, ba: hüź, tyv: sös, sah: ös           ||

==== FEATURES ====
 * Easily adding or removing dictionaries as desired
 * Accessing different types of dictionaries using the same command format
 * Dictionary names are based on easy to remember two letter language codes
 * Support for phrases as well as words
 * Option to format web dictionaries' outputs
 * Option to specify substitution for space when using query strings
 * Option to enter words interactively or non-interactively
 * Prettified sdcv output

==== USAGE ====
usage:    soz DICTIONARY [WORD]

==== EXAMPLES ====
example:  soz entr pusillanimous
result:   searches the word pusillanimous in the English-Turkish dictionary.

example:  soz etymen
result:   displays a prompt which will search the entered word in the English
          etymology dictionary.

==== OPTIONS ====
--list | -l:  lists all the dictionaries (not yet implemented)
--help | -h:  prints this
--info | -i:  prints info (version, author, license)"
}

info() {
echo -e "soz (söz) 1.0 — July 2018
Written by Sunur Efe Vural <efe@efe.kim>
License: CC0 1.0 Universal <https://creativecommons.org/publicdomain/zero/1.0/legalcode>"
}


###--- Formatting Functions ---###

# These functions  are optional  for web dictionaries.  I use  these for
# removing non-dictionary parts of the  webpage. You can use any program
# you wish. Function name has to  be "Format" appended to the dictionary
# name. So  if the dictionary name  is "enfr", the name  of the function
# should be "enfrFormat".  Also both the input and the  output should be
# the $txt variable.

azFormat() {
	sed -i '0,/\[Search\]/d;/Popular\ Searches/,$d' $txt
}

azenFormat() {
	sed -i '/sözünün tərcüməsi ([0-9]* nəticə)/,/Çox Axtarılanlar/!d;/Çox Axtarılanlar/d' $txt
}

tkFormat() {
	sed -i '0,/Söz bölegi/d;/━━━━━━━━━━━/,$d' $txt
}

uzFormat() {
	sed -i -e '/Maʼnosi\[tahrirlash\]/,/.\[tahrirlash\]/!d' -e '/I\[tahrirlash\]/ s/\[tahrirlash\]//' -e '/\[tahrirlash\]/d' $txt
}

trFormat() {
	sed -i '0,/Türkçe\[düzenle\]/d;/tr\.wiktionary\.org/,$d' $txt
}

trenFormat() {
	sed -i  '0,/Türkçe-İngilizce Çeviri/d;/Türkçe-İngilizce çeviriler: Zargan Ltd\./,$d' $txt
}

etymtrFormat() {
	sed -i '0,/İzahlı görünüm/d;/« Önceki sayfa/,$d' $txt
}

otFormat() {
	sed -i '0,/aramasına en yakın sonuçlar/d;/Çağdaş Türk Lehçeleri ve Edebiyatları/,$d' $txt
}

###--- Main Dictionary Functions ---###

defineWord() {
	shift
	word="$@"
	if [ -z "$1" ]; then
		read -rp 'Enter word or phrase: ' word
	fi
}

interactiveMode() {
	if [ -z "$2" ]; then
		$0 "$@"
	fi
}

format() {
	if [ "$(type -t "${1}Format")" = 'function' ]; then
		"${1}Format"
	fi
}

queryDict() {
	defineWord "$@"
	if [ -z "$blank" ]; then
		word=$(echo "$word" | sed 's/\ /%20/g'); else
		word=$(echo "$word" | sed 's/\ /'$blank'/g')
	fi
	site=$(echo $site | sed 's/\%s/'$word'/')
	w3m -o display_link_number=0 "$site" > "$txt"
	format "$1"
	cat "$txt"
	interactiveMode "$@"
}

postDict() {
	defineWord "$@"
	wget -q --post-data "$searchname=$word&$submitname=OK" -O $php "$site"
	w3m -o display_link_number=0 -T text/html $php > $txt
	format "$1"
	cat $txt
	interactiveMode "$@"
}

#If you are using newer versions of sdcv uncomment the two lines in the function and comment the other line starting with sdcv...
sdcvDict() {
	defineWord "$@"
#	sdcv --data-dir=$dir -x -n -u "$bookname" "$word" | grep -x -v -e "-->$bookname" | sed '/^-->/!d' | sed -e ':a;N;$!ba;s/\n-->/,\ /g' -e 's/-->/Maybe\ you\ mean\ one\ of\ these:\ /'
#	sdcv --data-dir=$dir -x -n -e -u "$bookname" "$word"
	sdcv --data-dir=$dir -n -u "$bookname" "$word"
	interactiveMode "$@"
}


###--- Dictionaries ---###
# This is where you can add new  dictionaries. You can add three type of
# dictionaries, which  you have to  direct them to the  right functions.
# postDict()  is  for  submitting  words and  phrases  through  a  form,
# queryDict() is for getting the webpage for words through modifying the
# url, and sdcvDict() is for  offline StarDict dictionaries. All of them
# should be followed by $@ to pass on the positional parameters.

case $1 in
### Post data (filling forms):
# "site"  variable  is the  address  specified  in  the form,  it's  not
# necessarily the the address where the form is located.
# "searchname"  variable is the  name of the  input type search  or text
# (where you input text).
# "submitname" variable is the name of the input type submit.
	uz) site="https://uz.wiktionary.org/wiki/Bosh_sahifa/w/index.php" ; searchname="search" ; submitname="go" ; postDict "$@" ;;
	tr) site="https://tr.wiktionary.org/wiki/Anasayfa/w/index.php" ; searchname="search" ; submitname="go" ; postDict "$@" ;;
	ot) site="http://ctle.pau.edu.tr/osmtr/index.php" ; searchname="searchterm" ; submitname="search" ; postDict "$@" ;;
### Query strings:
# "site" variable is the url which includes the query string. Query string is specified with %s.
# "blank" variable is what the form transforms spaces to. It's optional and 'blank=""' doesn't have to be included.
#	tk) site="https://enedilim.com/sozluk/soz/%s" ; blank="" ; queryDict "$@";;
#	az) site="https://azerdict.com/izahli-luget/%s" ; blank="-" ; queryDict "$@" ;;
	azen) site="https://azerdict.com/az/english/%s" ; blank="-" ; queryDict "$@" ;;
	tren) site="http://www.zargan.com/tr/q/%s-ceviri-nedir" ; blank="" ; queryDict "$@" ;;
	etymtr) site="http://nisanyansozluk.com/?k=%s" ; blank="+" ; queryDict "$@" ;;
### Stardict / Sdcv:
# "bookname" is the dictionary name as it is specified in Stardict's dict ifo file.
	entr) bookname="English - Turkish" ; sdcvDict "$@" ;;
	tk) bookname="Türkmen Diliniň Düşündirişli Sözlügi" ; sdcvDict "$@" ;;
	trta) bookname="Türkiye Türkçesi - Tatarca Sözlük" ; sdcvDict "$@" ;;
	ugtr) bookname="Yeni Uygur Türkçesi Sözlüğü" ; sdcvDict "$@" ;;
	uzen) bookname="Özbekçe - İngilizce Sözlük" ; sdcvDict "$@" ;;
	aztr) bookname="Azerbaycan Türkçesinin Anlamdaşlar Sözlüğü" ; sdcvDict "$@" ;;
	az) bookname="Azerbaycan Dilinin İzahlı Lugatı" ; sdcvDict "$@" ;;
	etymen) bookname="English Etymology" ; sdcvDict "$@" ;;
	en) bookname="WordNet® 3.0 (En-En)" ; sdcvDict "$@" ;;
	krtr) bookname="Yudahinin Kırgızca Sözlüğü" ; sdcvDict "$@" ;;
	fren) bookname="French - English" ; sdcvDict "$@" ;; 
	ruen) bookname="Russian-English full dictionary" ; sdcvDict "$@" ;;
	--help|-h) help ;;
	--info|-i|--version|-v) info ;;
	--list|-l) echo -e "I don't know how to implement this yet, sorry :::::(((((((((((( \nYou should just check the \"Dictionaries\" section of the script." ;;
	*) echo "No dictionary as '$1' found. Type \"soz --help\" for usage." ;;
esac
