1
1
from fabric import api
2
2
3
+ import csv
3
4
import glob
4
5
import os
5
6
import re
@@ -23,6 +24,10 @@ def prepare(output_directory, source_topic, destination_topic):
23
24
The CSV files are then downloaded to the local directory specified.
24
25
25
26
"""
27
+ do_prepare (output_directory , source_topic , destination_topic )
28
+
29
+
30
+ def do_prepare (output_directory , source_topic , destination_topic ):
26
31
if not os .path .isdir (output_directory ):
27
32
os .makedirs (output_directory )
28
33
prepare_publisher_csv (output_directory , source_topic , destination_topic )
@@ -41,6 +46,10 @@ def process(file_pattern):
41
46
to process.
42
47
43
48
"""
49
+ do_process (file_pattern )
50
+
51
+
52
+ def do_process (file_pattern ):
44
53
paths_by_app = {}
45
54
for path in glob .glob (file_pattern ):
46
55
app = os .path .basename (path ).split ("_" )[0 ]
@@ -53,6 +62,80 @@ def process(file_pattern):
53
62
', ' .join (paths_by_app .keys ()))
54
63
55
64
65
+ @api .task
66
+ def move_in_panopticon (source_topic , destination_topic ):
67
+ """Move content tagged in panopticon
68
+
69
+ """
70
+ do_move_in_panopticon (source_topic , destination_topic )
71
+
72
+ def do_move_in_panopticon (source_topic , destination_topic ):
73
+ util .use_random_host ('class-backend' )
74
+ print ("\n Retagging content on %s" % (api .env .host_string ,))
75
+ util .rake ('panopticon' , 'move_content_to_new_topic' ,
76
+ source = source_topic , dest = destination_topic )
77
+
78
+
79
+ @api .task
80
+ def delete_topic (old_topic ):
81
+ """Delete a topic, causing it to return a 410 gone status.
82
+
83
+ """
84
+ do_delete_topic (old_topic )
85
+
86
+ def do_delete_topic (old_topic ):
87
+ util .use_random_host ('class-backend' )
88
+ print ("\n Deleting old topic %s" % (old_topic ,))
89
+ util .rake ('panopticon' , 'specialist_sector_cleanup' , SLUG = old_topic )
90
+
91
+
92
+ @api .task
93
+ def move_topics (csv_filename , output_directory , action = "all" ):
94
+ """Apply a set of topic changes from a CSV.
95
+
96
+ This takes a CSV which maps from old topic slug to new topic slug, and
97
+ coordinates running all the changes.
98
+
99
+ """
100
+ topic_slug_changes = read_topic_slug_changes (csv_filename )
101
+ action = action .lower ()
102
+
103
+ if action in ["all" , "prepare" ]:
104
+ if os .path .exists (output_directory ):
105
+ print "Output directory already exists"
106
+ print "Aborting, to avoid risk of pulling in unexpected changes."
107
+ exit (1 )
108
+ os .makedirs (output_directory )
109
+ for old_slug , new_slug in topic_slug_changes .items ():
110
+ do_prepare (output_directory , old_slug , new_slug )
111
+
112
+ if action in ["all" , "process" ]:
113
+ do_process (output_directory + "/*.csv" )
114
+
115
+ if action in ["all" , "panopticon" ]:
116
+ for old_slug , new_slug in topic_slug_changes .items ():
117
+ move_in_panopticon (old_slug , new_slug )
118
+
119
+ if action in ["all" , "delete" ]:
120
+ for old_slug in topic_slug_changes .keys ():
121
+ delete_topic (old_slug )
122
+
123
+
124
+ def read_topic_slug_changes (csv_filename ):
125
+ changes = {}
126
+ with open (csv_filename ) as fobj :
127
+ reader = csv .reader (fobj )
128
+ headings = [
129
+ heading .lower ().strip ().replace (' ' , '_' )
130
+ for heading in reader .next ()
131
+ ]
132
+ old_index = headings .index ('old_topic_slug' )
133
+ new_index = headings .index ('new_topic_slug' )
134
+ for row in reader :
135
+ changes [row [old_index ]] = row [new_index ]
136
+ return changes
137
+
138
+
56
139
def slugify (topic ):
57
140
return re .sub ('[^a-z0-9]' , '_' , topic .lower ())
58
141
@@ -68,7 +151,7 @@ def build_filename(app, source_topic, destination_topic):
68
151
69
152
def prepare_publisher_csv (output_directory , source_topic , destination_topic ):
70
153
util .use_random_host ('class-backend' )
71
- print ("Fetching publisher change CSV from %s" % (api .env .host_string ,))
154
+ print ("\n Fetching publisher change CSV from %s" % (api .env .host_string ,))
72
155
73
156
filename = build_filename ('publisher' , source_topic , destination_topic )
74
157
remote_path = '/var/apps/publisher/tmp/%s' % (filename ,)
@@ -82,7 +165,7 @@ def prepare_publisher_csv(output_directory, source_topic, destination_topic):
82
165
83
166
def prepare_whitehall_csv (output_directory , source_topic , destination_topic ):
84
167
util .use_random_host ('class-whitehall_backend' )
85
- print ("Fetching whitehall change CSV from %s" % (api .env .host_string ,))
168
+ print ("\n Fetching whitehall change CSV from %s" % (api .env .host_string ,))
86
169
87
170
filename = build_filename ('whitehall' , source_topic , destination_topic )
88
171
remote_path = '/var/apps/whitehall/tmp/%s' % (filename ,)
@@ -103,17 +186,25 @@ def process_publisher_csvs(paths):
103
186
104
187
def process_publisher_csv (path ):
105
188
filename = os .path .basename (path )
106
- print ("Applying publisher change CSV %s on %s" % (
189
+ print ("\n Applying publisher change CSV %s on %s" % (
107
190
filename ,
108
191
api .env .host_string ,
109
192
))
193
+ if file_length (path ) == 1 :
194
+ print ("No changes in file - skipping" )
195
+ return
110
196
111
197
remote_path = '/var/apps/publisher/tmp/%s' % (filename , )
112
198
api .put (path , remote_path , use_sudo = True )
113
199
util .rake ('publisher' , 'topic_changes:process' , remote_path )
114
200
api .sudo ("rm '%s'" % (remote_path ,))
115
201
116
202
203
+ def file_length (path ):
204
+ with open (path ) as fobj :
205
+ return len (fobj .readlines ())
206
+
207
+
117
208
def process_whitehall_csvs (paths ):
118
209
util .use_random_host ('class-whitehall_backend' )
119
210
for path in paths :
@@ -122,10 +213,13 @@ def process_whitehall_csvs(paths):
122
213
123
214
def process_whitehall_csv (path ):
124
215
filename = os .path .basename (path )
125
- print ("Applying whitehall change CSV %s on %s" % (
216
+ print ("\n Applying whitehall change CSV %s on %s" % (
126
217
filename ,
127
218
api .env .host_string ,
128
219
))
220
+ if file_length (path ) == 1 :
221
+ print ("No changes in file - skipping" )
222
+ return
129
223
130
224
remote_path = '/var/apps/whitehall/tmp/%s' % (filename , )
131
225
api .put (path , remote_path , use_sudo = True )
0 commit comments