Skip to content

Instantly share code, notes, and snippets.

@aino-prashant
Last active March 7, 2019 12:20
Show Gist options
  • Save aino-prashant/7861353bc408303d95ec2ea234fc97f1 to your computer and use it in GitHub Desktop.
Save aino-prashant/7861353bc408303d95ec2ea234fc97f1 to your computer and use it in GitHub Desktop.
Scrapping Scripts
<target-source url="https://shop.corriereadriatico.it/edicola"
edition="Ancona">
<execution-element type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;navLogin&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id=&quot;username_box&quot;]" />
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id=&quot;password_box&quot;]" />
<attribute-value>extrapola1</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='buttonDiv']['loading-example-btn']" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;dailyImg&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;dowloadFlipPDF&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;pdfA&quot;]/option[last()]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH"
value="//*[@id=&quot;pdfModal&quot;]/div/div/div[3]/button[2]" />
<action-type>CLICK</action-type>
</execution-element>
</execution-element>
</target-source>
<target-source url="http://edizionidigitali.netweek.it/dmedia/newsstand/title/merate">
<execution-element type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id='bs-menu-collapse']/ul/li[4]/a"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id='input_username']"/>
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id='input_password']"/>
<attribute-value>12681870155</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='input_password']"/>
<action-type>KEY_PRESS</action-type>
<key>ENTER</key>
</execution-element>
<execution-element type="url"
value="http://edizionidigitali.netweek.it/dmedia/books/merate/{{currentYear}}/{{currentdate}}merate/pdf/issue.pdf"/>
</execution-element>
</target-source>
<target-source url="http://digital.ilcentro.it/ilcentro/books/latinaoggi/">
<execution-element type="flow">
<execution-element type="act">
<locator type="IFRAME_NAME" value="iframe_login" />
<action-type>SWITCHFRAME</action-type>
<key />
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@id='input_username']" />
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@id='input_password']" />
<attribute-value>12681870155</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;login_form&quot;]/p[6]" />
<action-type>CLICK</action-type>
<key />
</execution-element>
<execution-element type="If">
<condition expression-type="XPATH"
expression="boolean(//*[@id=&quot;activate&quot;]/a)" />
<then-execution type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;activate&quot;]/a" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</then-execution>
<else-execution />
</execution-element>
<execution-element type="If">
<condition expression-type="CLASSNAME" expression="vc-tooltip" />
<then-execution type="flow">
<execution-element type="act">
<locator type="ID" value="pages" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</then-execution>
<else-execution />
</execution-element>
<execution-element type="for-each">
<iterator-locator type="XPATH"
value="childrenOf('//*[@id=&quot;thumbcont&quot;]/ul',img)" />
<repeat-execution type="flow">
<execution-element type="act">
<locator type="ID" value="articles" />
<action-type>CLICK</action-type>
<key />
</execution-element>
<execution-element type="for-each">
<iterator-locator type="ID"
value="childrenOf('articles_list')" />
<repeat-execution type="flow">
<execution-element type="act">
<locator type="IFRAME_NAME" value="shadowbox_content" />
<action-type>SWITCHFRAME</action-type>
<key />
</execution-element>
<execution-element type="entity" entity-name="Article">
<operation name="saveArticle" service="eXtrapola" />
<entity-property name="title">
<execution-element type="data-fetch">
<locator type="CSS_SELECTOR" value="h1.titolo_articolo.titolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="subtitle">
<execution-element type="data-fetch">
<locator type="CSS_SELECTOR" value="h2.sottotitolo_articolo.sottotitolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="content">
<execution-element type="data-fetch">
<locator type="CLASSNAME" value="testo_articolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
</execution-element>
<execution-element type="act">
<locator type="IFRAME_NAME" value="iframe_login" />
<action-type>SWITCHFRAMEDEFAULT</action-type>
<key />
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;shadowbox_nav_close&quot;]" />
<action-type>CLICK</action-type>
<key />
</execution-element>
<execution-element type="If">
<condition expression-type="ID" expression="textual_articles" />
<then-execution />
<else-execution type="flow">
<execution-element type="act">
<locator type="ID" value="articles" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</else-execution>
</execution-element>
</repeat-execution>
</execution-element>
<execution-element type="If">
<condition expression-type="CLASSNAME" expression="vc-tooltip" />
<then-execution type="flow">
<execution-element type="act">
<locator type="CLASSNAME" value="pages" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</then-execution>
<else-execution />
</execution-element>
</repeat-execution>
</execution-element>
</execution-element>
</target-source>
<target-source url="http://digital.ilcentro.it/ilcentro/books/latinaoggi/">
<execution-element type="flow">
<execution-element type="act">
<locator type="IFRAME_NAME" value="iframe_login" />
<action-type>SWITCHFRAME</action-type>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@id='input_username']" />
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@id=&quot;input_password&quot;]" />
<attribute-value>12681870155</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;login_form&quot;]/p[6]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="If">
<condition expression-type="XPATH"
expression="boolean(//*[@id=&quot;activate&quot;]/a)" />
<then-execution type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;activate&quot;]/a" />
<action-type>CLICK</action-type>
</execution-element>
</then-execution>
<else-execution />
</execution-element>
<execution-element type="url"
value="http://digital.ilcentro.it/ilcentro/reader/xsearch.jsp?">
<parameter name="edition" expression="{{currentdate}}pescara" />
<parameter name="pag" expression="1" />
</execution-element>
<execution-element type="loop">
<condition expression-type="CLASSNAME" expression="avanti" />
<repeat-execution type="flow">
<execution-element type="act">
<locator type="CLASSNAME" value="avanti" />
<action-type>CLICK</action-type>
</execution-element>
</repeat-execution>
</execution-element>
</execution-element>
</target-source>
<target-source url="https://shop.ilmattino.it/edicola"
edition="Avellino">
<execution-element type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;navLogin&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id='username_box']" />
<attribute-value>ainosoftpl</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id='password_box']" />
<attribute-value>aino2018</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='password_box']"/>
<action-type>KEY_PRESS</action-type>
<key>ENTER</key>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;selectEdizione&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;selectEdizione&quot;]/option[3]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;dailyImg&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;dowloadFlipPDF&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;pdfA&quot;]/option[last()]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH"
value="//*[@id=&quot;pdfModal&quot;]/div/div/div[3]/button[2]" />
<action-type>CLICK</action-type>
</execution-element>
</execution-element>
</target-source>
<target-source url="https://shop.ilmessaggero.it/edicola">
<execution-element type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;navLogin&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id=&quot;username_box&quot;]" />
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id=&quot;password_box&quot;]" />
<attribute-value>extrapola1</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='buttonDiv']['loading-example-btn']" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;dailyImg&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;dowloadFlipPDF&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;pdfA&quot;]/option[last()]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH"
value="//*[@id=&quot;pdfModal&quot;]/div/div/div[3]/button[2]" />
<action-type>CLICK</action-type>
</execution-element>
</execution-element>
</target-source>
<target-source
url="http://edicoladigitale.ilsecoloxix.it/secoloxix/books/levante/"
edition="Levante">
<execution-element type="flow">
<execution-element type="act">
<locator type="IFRAME_NAME" value="iframe_login" />
<action-type>SWITCHFRAME</action-type>
</execution-element>
<execution-element type="log" message="Login frame selected" />
<execution-element type="act">
<locator type="XPATH" value="//*[@id='mega']" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="log" message="Login button clicked" />
<execution-element type="data-put">
<locator type="XPATH" value="//input[@name='username']" />
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@name='password']" />
<attribute-value>12681870155</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//input[@value='Entra']" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="log" message="Login succesful" />
<execution-element type="If">
<condition expression-type="XPATH" expression="boolean(//*[@id='activate']/a)" />
<then-execution type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;activate&quot;]/a" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="log" message="User activated" />
</then-execution>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//p[@class='label downloadpdf_btn']/a[1]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="log"
message="PDF located and downloading" />
</execution-element>
</target-source>
<target-source url="https://login.kataweb.it/registrazione/repubblica.it/login.jsp" edition="Roma" pdf-password="[email protected]">
<execution-element type="flow">
<execution-element type="data-put">
<locator type="XPATH" value="//input[@name=&quot;userid&quot;]"/>
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@name=&quot;userpw&quot;]"/>
<attribute-value>12681870155</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='sempreconnesso']"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//input[@name=&quot;submit&quot;]"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="log" message="Login succesful"/>
<execution-element type="url"
value="https://quotidiano.repubblica.it/edicola/edicola.jsp"/>
<execution-element type="loop">
<condition expression-type="XPATH" expression="not(boolean(//*[@id='extra-content']))" />
<repeat-execution type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[.='Edicola']" />
<action-type>CLICK</action-type>
</execution-element>
</repeat-execution>
</execution-element>
<execution-element type="log" message="Edicola link clicked until needed"/>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='extra-content']" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='select1']/option[normalize-space(text())='Il giornale di oggi']"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="log" message="Today edition selected"/>
<execution-element type="act">
<locator type="XPATH" value="//*[normalize-space(text())='Roma']"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="log" message="PDF for Roma edition located and donwloading"/>
<execution-element type="url"
value="https://quotidiano.repubblica.it/edicola/manager?service=logout&amp;backurl=http%3A%2F%2Fwww.repubblica.it"/>
</execution-element>
</target-source>
<target-source url="http://lavoce.ita.newsmemory.com/">
<execution-element type="flow">
<execution-element type="act">
<locator type="IFRAME_NAME" value="mainframe"/>
<action-type>SWITCHFRAME</action-type>
</execution-element>
<execution-element type="act">
<locator type="IFRAME_NAME" value="lightbox-iframe"/>
<action-type>SWITCHFRAME</action-type>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@id='username']"/>
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@name='password']"/>
<attribute-value>4DD6C3AE</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='loginBtn']"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="IFRAME_NAME" value=""/>
<action-type>SWITCHFRAMEPARENT</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='thumbsAction']"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='secHead_1']/a"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="IFRAME_NAME" value="formPhodir"/>
<action-type>SWITCHFRAME</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="/html/body/div/table/tbody/tr[2]/td/font/b[2]/a"/>
<action-type>CLICK</action-type>
</execution-element>
</execution-element>
</target-source>
<target-source url="http://carta.leggo.it/Sfogliatore/"
edition="Milano">
<execution-element type="flow">
<execution-element type="url"
value="http://carta.leggo.it/Sfogliatore/?testata=LG&amp;edizione=MILANO&amp;">
<parameter name="data" expression="{{currentdate}}" />
</execution-element>
<execution-element type="If">
<condition expression-type="XPATH"
expression="boolean(//*[@id=&quot;cookieLawBanner&quot;])" />
<then-execution type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;cookieLawBanner&quot;]/a" />
<action-type>CLICK</action-type>
</execution-element>
</then-execution>
<else-execution />
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;dowloadFlipPDF&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;pdfA&quot;]/option[last()]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH"
value="//*[@id=&quot;pdfModal&quot;]/div/div/div[3]/button[2]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH"
value="//*[@id=&quot;pdfModal&quot;]/div/div/div[3]/button[1]" />
<action-type>CLICK</action-type>
</execution-element>
</execution-element>
</target-source>
<target-source url="https://quotidianodibari.it/profilo/">
<execution-element type="flow">
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id=&quot;user_login&quot;]" />
<attribute-value>Previewsrl</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id=&quot;user_pass&quot;]" />
<attribute-value>n5RmpNkd</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;wp-submit&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;menu-item-2424&quot;]" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH"
value="//*[@id=&quot;Content&quot;]/div/div[1]/div/div[1]/div/div/div/div[2]/div/div/div/div/ul/li[1]/div[2]/a" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;menu-item-2433&quot;]/a" />
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;error-page&quot;]/p[2]/a" />
<action-type>CLICK</action-type>
</execution-element>
</execution-element>
</target-source>
<target-source url="http://shop.quotidianodipuglia.it/#" edition="Lecce">
<execution-element type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id='navLogin']"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id='username_box']"/>
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//*[@id='password_box']"/>
<attribute-value>extrapola1</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='buttonDiv']['loading-example-btn']"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='dailyImg']"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='dowloadFlipPDF']"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='pdfA']/option[last()]"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='pdfModal']/div/div/div[3]/button[2]"/>
<action-type>CLICK</action-type>
</execution-element>
</execution-element>
</target-source>
<target-source url="http://www.quotidianoenergia.it/">
<execution-element type="flow">
<execution-element type="act">
<locator type="XPATH" value="/html/body/section/div[1]/div[1]/div/div[1]/a[2]"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="// *[@id='login-login']"/>
<attribute-value>[email protected]</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="// *[@id='login-pwd']"/>
<attribute-value>newuser117</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id='login']/button"/>
<action-type>CLICK</action-type>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="/html/body/div[2]/div/div/div[2]/div/div[1]/div/div[1]/a"/>
<action-type>CLICK</action-type>
</execution-element>
</execution-element>
</target-source>
@aino-prashant
Copy link
Author

aino-prashant commented Jan 21, 2019

Il Secolo XIX has 5 editions,here we added 1 script ,to configure others need to change url only.

@aino-prashant
Copy link
Author

aino-prashant commented Jan 22, 2019

For source Il Centro only first 6 pdf's is download correctly,other are downloaded with content (We're sorry,access to the functions of Ultrazoom and the editions in PDF format is not available.)

@aino-prashant
Copy link
Author

aino-prashant commented Feb 19, 2019

Source Name: Il Mattino editions: (Avellino,Nazionale,Caserta,Napoli,Benevento,Salerno,Circondario Nord) -Need to change option value . <locator type="XPATH" value="//*[@id=&quot;selectEdizione&quot;]/option[3]" />
Source Name: La Repubblica Editons:(Firenze,Roma,Bologna,Genova,Napoli,Torino,Palermo,Nazionale,Milano,Bari)
Source Name: Leggo Editions :Milano,Roma - need to change edition parameter in the url.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment