1212 ExtractorError ,
1313 float_or_none ,
1414 sanitized_Request ,
15- unescapeHTML ,
16- update_url_query ,
15+ str_or_none ,
16+ traverse_obj ,
1717 urlencode_postdata ,
1818 USER_AGENTS ,
1919)
2020
2121
2222class CeskaTelevizeIE (InfoExtractor ):
23- _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
23+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?: ivysilani|porady|zive) /(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
2424 _TESTS = [{
25- 'url' : 'http://www.ceskatelevize.cz/ivysilani/ivysilani/ 10441294653-hyde-park-civilizace/214411058091220 ' ,
25+ 'url' : 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en ' ,
2626 'info_dict' : {
27- 'id' : '61924494877246241 ' ,
27+ 'id' : '61924494877028507 ' ,
2828 'ext' : 'mp4' ,
29- 'title' : 'Hyde Park Civilizace: Život v Grónsku ' ,
30- 'description' : 'md5:3fec8f6bb497be5cdb0c9e8781076626 ' ,
29+ 'title' : 'Bonus 01 - En - Hyde Park Civilizace ' ,
30+ 'description' : 'English Subtittles ' ,
3131 'thumbnail' : r're:^https?://.*\.jpg' ,
32- 'duration' : 3350 ,
32+ 'duration' : 81.3 ,
3333 },
3434 'params' : {
3535 # m3u8 download
3636 'skip_download' : True ,
3737 },
3838 }, {
39- 'url' : 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en' ,
39+ # live stream
40+ 'url' : 'http://www.ceskatelevize.cz/zive/ct1/' ,
4041 'info_dict' : {
41- 'id' : '61924494877028507 ' ,
42+ 'id' : '102 ' ,
4243 'ext' : 'mp4' ,
43- 'title' : 'Hyde Park Civilizace: Bonus 01 - En' ,
44- 'description' : 'English Subtittles' ,
45- 'thumbnail' : r're:^https?://.*\.jpg' ,
46- 'duration' : 81.3 ,
44+ 'title' : r'ČT1 - živé vysílání online' ,
45+ 'description' : 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.' ,
46+ 'is_live' : True ,
4747 },
4848 'params' : {
4949 # m3u8 download
5050 'skip_download' : True ,
5151 },
5252 }, {
53- # live stream
53+ # another
5454 'url' : 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/' ,
55+ 'only_matching' : True ,
5556 'info_dict' : {
5657 'id' : 402 ,
5758 'ext' : 'mp4' ,
5859 'title' : r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$' ,
5960 'is_live' : True ,
6061 },
62+ # 'skip': 'Georestricted to Czech Republic',
63+ }, {
64+ 'url' : 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25' ,
65+ 'only_matching' : True ,
66+ }, {
67+ # video with 18+ caution trailer
68+ 'url' : 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/' ,
69+ 'info_dict' : {
70+ 'id' : '215562210900007-bogotart' ,
71+ 'title' : 'Bogotart - Queer' ,
72+ 'description' : 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti' ,
73+ },
74+ 'playlist' : [{
75+ 'info_dict' : {
76+ 'id' : '61924494877311053' ,
77+ 'ext' : 'mp4' ,
78+ 'title' : 'Bogotart - Queer (Varování 18+)' ,
79+ 'duration' : 11.9 ,
80+ },
81+ }, {
82+ 'info_dict' : {
83+ 'id' : '61924494877068022' ,
84+ 'ext' : 'mp4' ,
85+ 'title' : 'Bogotart - Queer (Queer)' ,
86+ 'thumbnail' : r're:^https?://.*\.jpg' ,
87+ 'duration' : 1558.3 ,
88+ },
89+ }],
6190 'params' : {
6291 # m3u8 download
6392 'skip_download' : True ,
6493 },
65- 'skip' : 'Georestricted to Czech Republic' ,
6694 }, {
67- 'url' : 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25' ,
95+ # iframe embed
96+ 'url' : 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/' ,
6897 'only_matching' : True ,
6998 }]
7099
100+ def _search_nextjs_data (self , webpage , video_id , ** kw ):
101+ return self ._parse_json (
102+ self ._search_regex (
103+ r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>' ,
104+ webpage , 'next.js data' , ** kw ),
105+ video_id , ** kw )
106+
71107 def _real_extract (self , url ):
72108 playlist_id = self ._match_id (url )
73-
74- webpage = self ._download_webpage (url , playlist_id )
109+ webpage , urlh = self ._download_webpage_handle (url , playlist_id )
110+ parsed_url = compat_urllib_parse_urlparse (urlh .geturl ())
111+ site_name = self ._og_search_property ('site_name' , webpage , fatal = False , default = 'Česká televize' )
112+ playlist_title = self ._og_search_title (webpage , default = None )
113+ if site_name and playlist_title :
114+ playlist_title = re .split (r'\s*[—|]\s*%s' % (site_name , ), playlist_title , 1 )[0 ]
115+ playlist_description = self ._og_search_description (webpage , default = None )
116+ if playlist_description :
117+ playlist_description = playlist_description .replace ('\xa0 ' , ' ' )
118+
119+ type_ = 'IDEC'
120+ if re .search (r'(^/porady|/zive)/' , parsed_url .path ):
121+ next_data = self ._search_nextjs_data (webpage , playlist_id )
122+ if '/zive/' in parsed_url .path :
123+ idec = traverse_obj (next_data , ('props' , 'pageProps' , 'data' , 'liveBroadcast' , 'current' , 'idec' ), get_all = False )
124+ else :
125+ idec = traverse_obj (next_data , ('props' , 'pageProps' , 'data' , ('show' , 'mediaMeta' ), 'idec' ), get_all = False )
126+ if not idec :
127+ idec = traverse_obj (next_data , ('props' , 'pageProps' , 'data' , 'videobonusDetail' , 'bonusId' ), get_all = False )
128+ if idec :
129+ type_ = 'bonus'
130+ if not idec :
131+ raise ExtractorError ('Failed to find IDEC id' )
132+ iframe_hash = self ._download_webpage (
133+ 'https://www.ceskatelevize.cz/v-api/iframe-hash/' ,
134+ playlist_id , note = 'Getting IFRAME hash' )
135+ query = {'hash' : iframe_hash , 'origin' : 'iVysilani' , 'autoStart' : 'true' , type_ : idec , }
136+ webpage = self ._download_webpage (
137+ 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php' ,
138+ playlist_id , note = 'Downloading player' , query = query )
75139
76140 NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
77141 if '%s</p>' % NOT_AVAILABLE_STRING in webpage :
78- raise ExtractorError (NOT_AVAILABLE_STRING , expected = True )
142+ self .raise_geo_restricted (NOT_AVAILABLE_STRING )
143+ if any (not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač' , 'IDEC nebyl nalezen' , )):
144+ raise ExtractorError ('no video with IDEC available' , video_id = idec , expected = True )
79145
80146 type_ = None
81147 episode_id = None
@@ -100,15 +166,15 @@ def _real_extract(self, url):
100166 data = {
101167 'playlist[0][type]' : type_ ,
102168 'playlist[0][id]' : episode_id ,
103- 'requestUrl' : compat_urllib_parse_urlparse ( url ) .path ,
169+ 'requestUrl' : parsed_url .path ,
104170 'requestSource' : 'iVysilani' ,
105171 }
106172
107173 entries = []
108174
109175 for user_agent in (None , USER_AGENTS ['Safari' ]):
110176 req = sanitized_Request (
111- 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist' ,
177+ 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/ ' ,
112178 data = urlencode_postdata (data ))
113179
114180 req .add_header ('Content-type' , 'application/x-www-form-urlencoded' )
@@ -130,9 +196,6 @@ def _real_extract(self, url):
130196 req = sanitized_Request (compat_urllib_parse_unquote (playlist_url ))
131197 req .add_header ('Referer' , url )
132198
133- playlist_title = self ._og_search_title (webpage , default = None )
134- playlist_description = self ._og_search_description (webpage , default = None )
135-
136199 playlist = self ._download_json (req , playlist_id , fatal = False )
137200 if not playlist :
138201 continue
@@ -167,7 +230,7 @@ def _real_extract(self, url):
167230 entries [num ]['formats' ].extend (formats )
168231 continue
169232
170- item_id = item .get ('id' ) or item ['assetId' ]
233+ item_id = str_or_none ( item .get ('id' ) or item ['assetId' ])
171234 title = item ['title' ]
172235
173236 duration = float_or_none (item .get ('duration' ))
@@ -181,8 +244,6 @@ def _real_extract(self, url):
181244
182245 if playlist_len == 1 :
183246 final_title = playlist_title or title
184- if is_live :
185- final_title = self ._live_title (final_title )
186247 else :
187248 final_title = '%s (%s)' % (playlist_title , title )
188249
@@ -200,6 +261,8 @@ def _real_extract(self, url):
200261 for e in entries :
201262 self ._sort_formats (e ['formats' ])
202263
264+ if len (entries ) == 1 :
265+ return entries [0 ]
203266 return self .playlist_result (entries , playlist_id , playlist_title , playlist_description )
204267
205268 def _get_subtitles (self , episode_id , subs ):
@@ -236,54 +299,3 @@ def _fix_subtitle(subtitle):
236299 yield line
237300
238301 return '\r \n ' .join (_fix_subtitle (subtitles ))
239-
240-
241- class CeskaTelevizePoradyIE (InfoExtractor ):
242- _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
243- _TESTS = [{
244- # video with 18+ caution trailer
245- 'url' : 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/' ,
246- 'info_dict' : {
247- 'id' : '215562210900007-bogotart' ,
248- 'title' : 'Queer: Bogotart' ,
249- 'description' : 'Alternativní průvodce současným queer světem' ,
250- },
251- 'playlist' : [{
252- 'info_dict' : {
253- 'id' : '61924494876844842' ,
254- 'ext' : 'mp4' ,
255- 'title' : 'Queer: Bogotart (Varování 18+)' ,
256- 'duration' : 10.2 ,
257- },
258- }, {
259- 'info_dict' : {
260- 'id' : '61924494877068022' ,
261- 'ext' : 'mp4' ,
262- 'title' : 'Queer: Bogotart (Queer)' ,
263- 'thumbnail' : r're:^https?://.*\.jpg' ,
264- 'duration' : 1558.3 ,
265- },
266- }],
267- 'params' : {
268- # m3u8 download
269- 'skip_download' : True ,
270- },
271- }, {
272- # iframe embed
273- 'url' : 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/' ,
274- 'only_matching' : True ,
275- }]
276-
277- def _real_extract (self , url ):
278- video_id = self ._match_id (url )
279-
280- webpage = self ._download_webpage (url , video_id )
281-
282- data_url = update_url_query (unescapeHTML (self ._search_regex (
283- (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1' ,
284- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1' ),
285- webpage , 'iframe player url' , group = 'url' )), query = {
286- 'autoStart' : 'true' ,
287- })
288-
289- return self .url_result (data_url , ie = CeskaTelevizeIE .ie_key ())
0 commit comments