Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 69d6a90

Browse files
committed
week 1 & week 2
0 parents  commit 69d6a90

12 files changed

Lines changed: 4674 additions & 0 deletions

Week 1/lesson1.ipynb

Lines changed: 1587 additions & 0 deletions
Large diffs are not rendered by default.

Week 2/dataset.tsv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Name Birth City PositionИванов А.А. 22.03.1980 Москва Сорокин И.В. 07.08.1965 Волгоград инженерБелов М.М. 13.02.1980 Ростов менеджерМельникова Д.С. 15.04.1985 Ростов Рыбина Е.П. 19.11.1985 Москва инженерКостров С.О. 31.05.1985 Москва стажер

Week 2/hw1.ipynb

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 61,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import re\n",
10+
"import numpy as np"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 62,
16+
"metadata": {},
17+
"outputs": [
18+
{
19+
"data": {
20+
"text/plain": [
21+
"254"
22+
]
23+
},
24+
"execution_count": 62,
25+
"metadata": {},
26+
"output_type": "execute_result"
27+
}
28+
],
29+
"source": [
30+
"d = {}\n",
31+
"lines = []\n",
32+
"with open('sentences.txt') as f:\n",
33+
" for count, line in enumerate(f.readlines()):\n",
34+
" line = line.lower()\n",
35+
" line = re.split('[^a-z]', line)\n",
36+
" \n",
37+
" lines.append(line)\n",
38+
"\n",
39+
" lines[count] = [tok for tok in lines[count] if tok != '']\n",
40+
"\n",
41+
" line = lines[count]\n",
42+
"\n",
43+
" for key, token in enumerate(line):\n",
44+
" if (len(token)):\n",
45+
" if (token not in d):\n",
46+
" d[token] = 1\n",
47+
" else:\n",
48+
" d[token] += 1\n",
49+
" \n",
50+
"len(d)"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 63,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"pred = []\n",
60+
"lines = list(lines)\n",
61+
"\n",
62+
"words = list(d)\n",
63+
"\n",
64+
"for i in range(count):\n",
65+
" temp = []\n",
66+
" for j in range(len(words)):\n",
67+
" temp.append(lines[i].count(words[j]))\n",
68+
" pred.append(np.array(temp))"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": 64,
74+
"metadata": {},
75+
"outputs": [
76+
{
77+
"name": "stdout",
78+
"output_type": "stream",
79+
"text": [
80+
"[4. 6.]\n"
81+
]
82+
}
83+
],
84+
"source": [
85+
"from scipy import spatial\n",
86+
"res = np.zeros(shape=(20,2))\n",
87+
"\n",
88+
"for i in range(len(lines) - 2):\n",
89+
" res[i] = (i+1, spatial.distance.cosine(pred[0], pred[i+1]))\n",
90+
"\n",
91+
"\n",
92+
"res = res[res[:, 1].argsort()]\n",
93+
"\n",
94+
"answ = res[0:2, 0]\n",
95+
"answ.sort()\n",
96+
"print(answ)\n",
97+
"\n",
98+
"f = open(\"submission-1.txt\", \"w\")\n",
99+
"f.write(\"%i %i\" % (answ[0], answ[1]))\n",
100+
"f.close()"
101+
]
102+
}
103+
],
104+
"metadata": {
105+
"interpreter": {
106+
"hash": "02769d244be1e9c182a881d7d25bc03ab28e10cd9282ec6a7530b69182abd7c2"
107+
},
108+
"kernelspec": {
109+
"display_name": "Python 3.10.2 64-bit",
110+
"language": "python",
111+
"name": "python3"
112+
},
113+
"language_info": {
114+
"codemirror_mode": {
115+
"name": "ipython",
116+
"version": 3
117+
},
118+
"file_extension": ".py",
119+
"mimetype": "text/x-python",
120+
"name": "python",
121+
"nbconvert_exporter": "python",
122+
"pygments_lexer": "ipython3",
123+
"version": "3.10.2"
124+
},
125+
"orig_nbformat": 4
126+
},
127+
"nbformat": 4,
128+
"nbformat_minor": 2
129+
}

Week 2/hw2.ipynb

Lines changed: 118 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)