言語処理100本ノック 2015をやってみた(第1章)
スポンサーリンク
言語処理100本ノック2015の第1章をやってみたので記録。大学の時のこういう授業受けてみたかったなぁ。www.cl.ecei.tohoku.ac.jp
#一部ちゃんと前提を読んでおらず、pythonではなく、unixのコマンドでやってしまいました。
00. 文字列の逆順
【実行&結果】
$ echo stressed | rev desserts
01. 「パタトクカシーー」
【プログラム】
string=$1 for n in 1,3,5,7 do echo $string | cut -c $n done
【実行&結果】
./01.sh "パタトクカシーー" パトカー
02. 「パトカー」+「タクシー」=「パタトクカシーー」
【プログラム】
#!/usr/bin/env python # coding:utf-8 import sys import random str1=list(u"パトカー") str2=list(u"タクシー") temp="" i=0 if len(str1) > len(str2): max_val = len(str1) else: max_val = len(str2) while i < max_val: if i < len(str1): temp += str1[i] if i < len(str2): temp += str2[i] i += 1 print temp
【実行&結果】
$ sudo python 02.py パタトクカシーー
03. 円周率
【プログラム】
#!/usr/bin/env python # coding:utf-8 import sys import random argvs = sys.argv argc = len(argvs) org_str=str(argvs[1]) ngram_char = org_str.replace(',', '') ngram_char = ngram_char.replace('.', '') ngram_char = ngram_char.split(" ") temp = "" for val in ngram_char: temp += str(len(val)) print temp
【実行&結果】
$ sudo python 03.py "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics." 314159265358979
04. 元素記号
【プログラム】
str=$1 strings=`echo $str | sed -e 's/,//g' -e 's/\.//g'` array=($strings) last_array_num=`expr ${#array[*]} - 1` for n in `seq 0 $last_array_num` do array[$n]=`echo ${array[$n]} | cut -c 1-2` done for n in 1 5 6 7 8 9 15 16 19 do array_no=`expr $n - 1` temp2=`echo ${array[$array_no]} | cut -c 1` array[$array_no]=$temp2 done echo ${array[@]}
【実行&結果】
$ ./04.sh "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can." H He Li Be B C N O F Ne Na Mi Al Si P S Cl Ar K Ca
05. n-gram
【プログラム】
#!/usr/bin/env python # coding:utf-8 import sys argvs = sys.argv argc = len(argvs) num=int(argvs[1]) org_str=str(argvs[2]) flag=str(argvs[3]) ngram_char = org_str.replace(',', '') ngram_char = ngram_char.replace('.', '') if flag=='char': ngram_char = ngram_char.replace(' ', '') else: ngram_char = ngram_char.split(" ") def n_gram(ngram_char,num): counter = 0 while counter < (len(ngram_char)-1): if len(ngram_char[counter:(counter+num)]) == num: print ngram_char[counter:(counter+num)] counter = counter+1 if __name__ == "__main__": n_gram(ngram_char,num)
【実行&結果】
単語bi-gram
$ $ sudo python 05.py 2 "I am an NLPer" word ['I', 'am'] ['am', 'an'] ['an', 'NLPer']
文字bi-gram
$ sudo python 05.py 2 "I am an NLPer" char Ia am ma an nN NL LP Pe er
06. 集合
【プログラム】
#!/usr/bin/env python # coding:utf-8 import sys argvs = sys.argv argc = len(argvs) num=int(argvs[1]) arr_data=[] arr_data.append(str(argvs[2])) arr_data.append(str(argvs[3])) counter = 0 while counter < len(arr_data): temp = arr_data[counter].replace(',', '') temp = temp.replace('.', '') temp = temp.replace(' ', '') arr_data[counter]=temp counter = counter + 1 def n_gram(ngram_char,num): counter = 0 temp_arr=[] while counter < (len(ngram_char)-1): if len(ngram_char[counter:(counter+num)]) == num: temp_arr.append(ngram_char[counter:(counter+num)]) counter = counter+1 return temp_arr if __name__ == "__main__": rs_data=[] for temp in arr_data: rs_data.append(n_gram(temp,num)) print "\nA" print rs_data[0] print "\nB:" print rs_data[1] print "" s = set(rs_data[0]) print "\n和集合" print s.union(rs_data[1]) print "\n差集合" print s.difference(rs_data[1]) print "\n積集合" print s.intersection(rs_data[1])
【実行&結果】
$ sudo python 06.py 2 "paraparaparadise" "paragraph" A ['pa', 'ar', 'ra', 'ap', 'pa', 'ar', 'ra', 'ap', 'pa', 'ar', 'ra', 'ad', 'di', 'is', 'se'] B: ['pa', 'ar', 'ra', 'ag', 'gr', 'ra', 'ap', 'ph'] 和集合 set(['ad', 'ag', 'di', 'is', 'ap', 'pa', 'ra', 'ph', 'ar', 'se', 'gr']) 差集合 set(['ad', 'di', 'is', 'se']) 積集合 set(['ap', 'pa', 'ar', 'ra'])
07. テンプレートによる文生成
【プログラム】
#!/usr/bin/env python # coding:utf-8 import sys argvs = sys.argv argc = len(argvs) x=str(argvs[1]) y=str(argvs[2]) z=str(argvs[3]) print x + "時の" + y + "は" + z print "x=" + x + ",y=" + y +",z=" + z
【実行&結果】
$ sudo python 07.py 12 気温 22.4 12時の気温は22.4 x=12,y=気温,z=22.4
08. 暗号文
【プログラム】
#!/usr/bin/env python # coding:utf-8 import re import sys def cipher(arr_str): arr_encrypt=[] temp_str = "" for temp in arr_str: matchOB = re.search("[a-z]",temp) if matchOB is None: arr_encrypt.append(temp) temp_str+=temp else: val_temp = 219-int(ord(temp)) arr_encrypt.append(chr(val_temp)) temp_str+=chr(val_temp) return arr_encrypt def decrypt(arr_encrypt): arr_decrypt=[] for temp in arr_encrypt: matchOB = re.search("[a-z]",temp) if matchOB is None: arr_decrypt.append(temp) else: val_temp = 219 - int(ord(temp)) arr_decrypt.append(chr(val_temp)) return arr_decrypt def arr_to_str(arr_str): temp_str="" for temp in arr_str: temp_str += temp return temp_str if __name__ == "__main__": argvs = sys.argv argc = len(argvs) arr_str = list(argvs[1]) print "encrpt : " + arr_to_str(cipher(arr_str)) print "decrypt : " + arr_to_str(decrypt(cipher(arr_str)))
【実行&結果】
$ sudo python 08.py A1abcdE1F3 encrpt : A1zyxwE1F3 decrypt : A1abcdE1F3
09. Typoglycemia
【プログラム】
#!/usr/bin/env python # coding:utf-8 import sys import random argvs = sys.argv argc = len(argvs) org_str=str(argvs[1]) ngram_char = org_str.replace(',', '') ngram_char = ngram_char.replace('.', '') ngram_char = ngram_char.split(" ") def switch_val(arr_str,i): flag=0 while flag==0: rand_no = random.randint(1,(len(arr_str)-2)) if len(arr_str[rand_no]) > 4: arr_str[i],arr_str[rand_no]=arr_str[rand_no],arr_str[i] flag=1 return arr_str if __name__ == "__main__": ngram_char.remove("") i = 1 while i < (len(ngram_char) - 1 ): if len(ngram_char[i]) > 4: ngram_char = switch_val(ngram_char,i) i+=1 for temp in ngram_char: print temp
【実行&結果】
$ sudo python 09.py "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ." I power actually that I understand reading phenomenal what I was believe : the could couldn't of the human mind